Class: RE2::Scanner

Inherits:
Object show all
Includes:
Enumerable
Defined in:
ext/re2/re2.cc,
lib/re2/scanner.rb

Instance Method Summary collapse

Instance Method Details

#eachObject



16
17
18
19
20
21
22
23
24
# File 'lib/re2/scanner.rb', line 16

def each
  if block_given?
    while matches = scan
      yield matches
    end
  else
    to_enum(:each)
  end
end

#eof?Boolean

Returns whether the RE2::Scanner has consumed all input or not.

Examples:

c = RE2::Regexp.new('(\d+)').scan("foo")
c.eof? #=> true

Returns:

  • (Boolean)

    whether the RE2::Scanner has consumed all input or not



316
317
318
319
320
321
# File 'ext/re2/re2.cc', line 316

static VALUE re2_scanner_eof(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return BOOL2RUBY(c->eof);
}

#regexpRE2::Regexp

Returns the Regexp used in the RE2::Scanner.

Examples:

c = RE2::Regexp.new('(\d+)').scan("bob 123")
c.regexp #=> #<RE2::Regexp /(\d+)/>

Returns:



556
557
558
559
560
561
# File 'ext/re2/re2.cc', line 556

static VALUE re2_scanner_regexp(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return c->regexp;
}

#rewindObject

Rewind the RE2::Scanner to the start of the string.

Examples:

s = RE2::Regexp.new('(\d+)').scan("1 2 3")
e = s.to_enum
e.scan #=> ["1"]
e.scan #=> ["2"]
s.rewind
e.scan #=> ["1"]


334
335
336
337
338
339
340
341
342
343
344
# File 'ext/re2/re2.cc', line 334

static VALUE re2_scanner_rewind(VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  delete c->input;
  c->input = new(std::nothrow) re2::StringPiece(
      RSTRING_PTR(c->text), RSTRING_LEN(c->text));
  c->eof = false;

  return self;
}

#scanArray<String>, ...

Scan the given text incrementally for matches using FindAndConsume, returning an array of submatches on each subsequent call. Returns nil if no matches are found or an empty array for every match if the pattern has no capturing groups.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
s.scan #=> ["Foo"]
s.scan #=> ["bar"]

Returns:

  • (Array<String>)

    if the pattern has capturing groups

  • ([])

    if the pattern does not have capturing groups

  • (nil)

    if no matches are found



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
# File 'ext/re2/re2.cc', line 365

static VALUE re2_scanner_scan(VALUE self) {
  re2_pattern *p;
  re2_scanner *c;

  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
  TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);

  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
  std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);

  if (c->eof) {
    return Qnil;
  }

  re2::StringPiece::size_type original_input_size = c->input->size();

  for (int i = 0; i < c->number_of_capturing_groups; ++i) {
    argv[i] = &matches[i];
    args[i] = &argv[i];
  }

  if (RE2::FindAndConsumeN(c->input, *p->pattern, args.data(),
        c->number_of_capturing_groups)) {
    re2::StringPiece::size_type new_input_size = c->input->size();
    bool input_advanced = new_input_size < original_input_size;

    VALUE result = rb_ary_new2(c->number_of_capturing_groups);

    for (int i = 0; i < c->number_of_capturing_groups; ++i) {
      if (matches[i].empty()) {
        rb_ary_push(result, Qnil);
      } else {
        rb_ary_push(result, encoded_str_new(matches[i].data(),
              matches[i].size(),
              p->pattern->options().encoding()));
      }
    }

    /* Check whether we've exhausted the input yet. */
    c->eof = new_input_size == 0;

    /* If the match didn't advance the input, we need to do this ourselves. */
    if (!input_advanced && new_input_size > 0) {
      c->input->remove_prefix(1);
    }

    return result;
  } else {
    return Qnil;
  }
}

#stringString

Returns the text supplied when incrementally matching with Regexp#scan.

Examples:

c = RE2::Regexp.new('(\d+)').scan("foo")
c.string #=> "foo"

Returns:



301
302
303
304
305
306
# File 'ext/re2/re2.cc', line 301

static VALUE re2_scanner_string(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return c->text;
}