Class: RE2::Scanner

Inherits:
Object show all
Includes:
Enumerable
Defined in:
ext/re2/re2.cc,
lib/re2/scanner.rb

Instance Method Summary collapse

Instance Method Details

#eachObject



16
17
18
19
20
21
22
23
24
# File 'lib/re2/scanner.rb', line 16

def each
  if block_given?
    while matches = scan
      yield matches
    end
  else
    to_enum(:each)
  end
end

#eof?Boolean

Returns whether the RE2::Scanner has consumed all input or not.

Examples:

c = RE2::Regexp.new('(\d+)').scan("foo")
c.eof? #=> true

Returns:

  • (Boolean)

    whether the RE2::Scanner has consumed all input or not



304
305
306
307
308
309
# File 'ext/re2/re2.cc', line 304

static VALUE re2_scanner_eof(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return BOOL2RUBY(c->eof);
}

#regexpRE2::Regexp

Returns the Regexp used in the RE2::Scanner.

Examples:

c = RE2::Regexp.new('(\d+)').scan("bob 123")
c.regexp #=> #<RE2::Regexp /(\d+)/>

Returns:



544
545
546
547
548
549
# File 'ext/re2/re2.cc', line 544

static VALUE re2_scanner_regexp(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return c->regexp;
}

#rewindObject

Rewind the RE2::Scanner to the start of the string.

Examples:

s = RE2::Regexp.new('(\d+)').scan("1 2 3")
e = s.to_enum
e.scan #=> ["1"]
e.scan #=> ["2"]
s.rewind
e.scan #=> ["1"]


322
323
324
325
326
327
328
329
330
331
332
# File 'ext/re2/re2.cc', line 322

static VALUE re2_scanner_rewind(VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  delete c->input;
  c->input = new(std::nothrow) re2::StringPiece(
      RSTRING_PTR(c->text), RSTRING_LEN(c->text));
  c->eof = false;

  return self;
}

#scanArray<String>, ...

Scan the given text incrementally for matches using FindAndConsume, returning an array of submatches on each subsequent call. Returns nil if no matches are found or an empty array for every match if the pattern has no capturing groups.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
s.scan #=> ["Foo"]
s.scan #=> ["bar"]

Returns:

  • (Array<String>)

    if the pattern has capturing groups

  • ([])

    if the pattern does not have capturing groups

  • (nil)

    if no matches are found



353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# File 'ext/re2/re2.cc', line 353

static VALUE re2_scanner_scan(VALUE self) {
  re2_pattern *p;
  re2_scanner *c;

  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
  TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);

  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
  std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);

  if (c->eof) {
    return Qnil;
  }

  re2::StringPiece::size_type original_input_size = c->input->size();

  for (int i = 0; i < c->number_of_capturing_groups; ++i) {
    argv[i] = &matches[i];
    args[i] = &argv[i];
  }

  if (RE2::FindAndConsumeN(c->input, *p->pattern, args.data(),
        c->number_of_capturing_groups)) {
    re2::StringPiece::size_type new_input_size = c->input->size();
    bool input_advanced = new_input_size < original_input_size;

    VALUE result = rb_ary_new2(c->number_of_capturing_groups);

    for (int i = 0; i < c->number_of_capturing_groups; ++i) {
      if (matches[i].empty()) {
        rb_ary_push(result, Qnil);
      } else {
        rb_ary_push(result, encoded_str_new(matches[i].data(),
              matches[i].size(),
              p->pattern->options().encoding()));
      }
    }

    /* Check whether we've exhausted the input yet. */
    c->eof = new_input_size == 0;

    /* If the match didn't advance the input, we need to do this ourselves. */
    if (!input_advanced && new_input_size > 0) {
      c->input->remove_prefix(1);
    }

    return result;
  } else {
    return Qnil;
  }
}

#stringString

Returns the text supplied when incrementally matching with Regexp#scan.

Examples:

c = RE2::Regexp.new('(\d+)').scan("foo")
c.string #=> "foo"

Returns:



289
290
291
292
293
294
# File 'ext/re2/re2.cc', line 289

static VALUE re2_scanner_string(const VALUE self) {
  re2_scanner *c;
  TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);

  return c->text;
}