-
Notifications
You must be signed in to change notification settings - Fork 45
[Feature #21943] Add StringScanner#integer_at to extract capture group as Integer directly #192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
dca7b9e
4600b0a
46a2e1d
82b93ad
b12e653
0948c72
8726c03
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1852,6 +1852,67 @@ strscan_values_at(int argc, VALUE *argv, VALUE self) | |
| return new_ary; | ||
| } | ||
|
|
||
| /* | ||
| * call-seq: | ||
| * get_int(index) -> integer or nil | ||
| * | ||
| * Returns the captured substring at the given +index+ as an Integer, | ||
| * without creating an intermediate String object. | ||
| * | ||
| * Returns +nil+ if the most recent match failed, or if the capture | ||
| * at +index+ is out of range, or if the capture did not participate | ||
| * in the match. | ||
| * | ||
| * This is semantically equivalent to <tt>self[index].to_i</tt> but | ||
| * avoids the allocation of a temporary String. | ||
| * | ||
| * scanner = StringScanner.new("2024-06-15") | ||
| * scanner.scan(/(\d{4})-(\d{2})-(\d{2})/) | ||
| * scanner.get_int(1) # => 2024 | ||
| * scanner.get_int(2) # => 6 | ||
| * scanner.get_int(3) # => 15 | ||
| * scanner.get_int(0) # => 20240615 (entire match as integer) | ||
| * | ||
| */ | ||
| static VALUE | ||
| strscan_get_int(VALUE self, VALUE idx) | ||
| { | ||
| struct strscanner *p; | ||
| long i; | ||
| long beg, end, len; | ||
| const char *ptr; | ||
| VALUE buffer_v, integer; | ||
|
|
||
| GET_SCANNER(self, p); | ||
| if (! MATCHED_P(p)) return Qnil; | ||
|
|
||
| i = NUM2LONG(idx); | ||
|
|
||
| if (i < 0) | ||
| i += p->regs.num_regs; | ||
| if (i < 0) return Qnil; | ||
| if (i >= p->regs.num_regs) return Qnil; | ||
| if (p->regs.beg[i] == -1) return Qnil; | ||
|
|
||
| beg = adjust_register_position(p, p->regs.beg[i]); | ||
| end = adjust_register_position(p, p->regs.end[i]); | ||
| len = end - beg; | ||
|
|
||
| if (len <= 0) return INT2FIX(0); | ||
|
|
||
| ptr = S_PBEG(p) + beg; | ||
|
|
||
| { | ||
| char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it OK that we allocate C string?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your feedback! The original implementation did allocate a C string via Updated in 4600b0a. For values up to 18 digits on 64-bit (9 on 32-bit), digits are now parsed directly from the source string's byte buffer with no allocation at all — neither Ruby String nor C string. The In the primary use case of date component parsing (1-4 digit values), this means zero allocation of any kind.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, forgotten that is declared just in an internal header.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we need to use |
||
| MEMCPY(buffer, ptr, char, len); | ||
| buffer[len] = '\0'; | ||
| integer = rb_cstr2inum(buffer, 10); | ||
| RB_ALLOCV_END(buffer_v); | ||
| } | ||
|
|
||
| return integer; | ||
| } | ||
|
|
||
| /* | ||
| * :markup: markdown | ||
| * :include: strscan/link_refs.txt | ||
|
|
@@ -2290,6 +2351,7 @@ Init_strscan(void) | |
| rb_define_method(StringScanner, "size", strscan_size, 0); | ||
| rb_define_method(StringScanner, "captures", strscan_captures, 0); | ||
| rb_define_method(StringScanner, "values_at", strscan_values_at, -1); | ||
| rb_define_method(StringScanner, "get_int", strscan_get_int, 1); | ||
|
|
||
| rb_define_method(StringScanner, "rest", strscan_rest, 0); | ||
| rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like this
integer_atparses more strict thanInteger(string)that accepts"1_234".So I think empty string case should also raise error like
Integer("")raisesArgumentError.In any case, I think it's worth adding empty string matched test case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tompng
Thank you for your feedback. 0948c72 fixed it.