Class: RE2::Scanner
- Includes:
- Enumerable
- Defined in:
- ext/re2/re2.cc,
lib/re2/scanner.rb
Instance Method Summary collapse
- #each ⇒ Object
-
#eof? ⇒ Boolean
Returns whether the Scanner has consumed all input or not.
- #regexp ⇒ RE2::Regexp
-
#rewind ⇒ Object
Rewind the Scanner to the start of the string.
-
#scan ⇒ Array<String>, ...
Scan the given text incrementally for matches using
FindAndConsume
, returning an array of submatches on each subsequent call. -
#string ⇒ String
Returns the text supplied when incrementally matching with Regexp#scan.
Instance Method Details
#each ⇒ Object
16 17 18 19 20 21 22 23 24 |
# File 'lib/re2/scanner.rb', line 16 def each if block_given? while matches = scan yield matches end else to_enum(:each) end end |
#eof? ⇒ Boolean
Returns whether the RE2::Scanner has consumed all input or not.
316 317 318 319 320 321 |
# File 'ext/re2/re2.cc', line 316
static VALUE re2_scanner_eof(const VALUE self) {
re2_scanner *c;
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
return BOOL2RUBY(c->eof);
}
|
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the RE2::Scanner.
556 557 558 559 560 561 |
# File 'ext/re2/re2.cc', line 556
static VALUE re2_scanner_regexp(const VALUE self) {
re2_scanner *c;
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
return c->regexp;
}
|
#rewind ⇒ Object
Rewind the RE2::Scanner to the start of the string.
334 335 336 337 338 339 340 341 342 343 344 |
# File 'ext/re2/re2.cc', line 334
static VALUE re2_scanner_rewind(VALUE self) {
re2_scanner *c;
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
delete c->input;
c->input = new(std::nothrow) re2::StringPiece(
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
c->eof = false;
return self;
}
|
#scan ⇒ Array<String>, ...
Scan the given text incrementally for matches using
FindAndConsume
, returning an array of submatches on each subsequent
call. Returns nil
if no matches are found or an empty array for every
match if the pattern has no capturing groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
# File 'ext/re2/re2.cc', line 365
static VALUE re2_scanner_scan(VALUE self) {
re2_pattern *p;
re2_scanner *c;
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
if (c->eof) {
return Qnil;
}
re2::StringPiece::size_type original_input_size = c->input->size();
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
argv[i] = &matches[i];
args[i] = &argv[i];
}
if (RE2::FindAndConsumeN(c->input, *p->pattern, args.data(),
c->number_of_capturing_groups)) {
re2::StringPiece::size_type new_input_size = c->input->size();
bool input_advanced = new_input_size < original_input_size;
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
if (matches[i].empty()) {
rb_ary_push(result, Qnil);
} else {
rb_ary_push(result, encoded_str_new(matches[i].data(),
matches[i].size(),
p->pattern->options().encoding()));
}
}
/* Check whether we've exhausted the input yet. */
c->eof = new_input_size == 0;
/* If the match didn't advance the input, we need to do this ourselves. */
if (!input_advanced && new_input_size > 0) {
c->input->remove_prefix(1);
}
return result;
} else {
return Qnil;
}
}
|
#string ⇒ String
Returns the text supplied when incrementally matching with Regexp#scan.
301 302 303 304 305 306 |
# File 'ext/re2/re2.cc', line 301
static VALUE re2_scanner_string(const VALUE self) {
re2_scanner *c;
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
return c->text;
}
|