Class: RE2::Set

Inherits:
Object show all
Defined in:
ext/re2/re2.cc

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRE2::Set #initialize(anchor) ⇒ RE2::Set #initialize(anchor, options) ⇒ RE2::Set

Returns a new RE2::Set object, a collection of patterns that can be searched for simultaneously.

Overloads:

  • #initializeRE2::Set

    Returns a new RE2::Set object for unanchored patterns with the default options.

    Raises:

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor) ⇒ RE2::Set

    Returns a new RE2::Set object for the specified anchor with the default options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    Raises:

    • (ArgumentError)

      if anchor is not :unanchored, :anchor_start or :anchor_both

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor, options) ⇒ RE2::Set

    Returns a new RE2::Set object with the specified options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    • options (Hash)

      the options with which to compile the pattern

    Options Hash (options):

    • :utf8 (Boolean) — default: true

      text and pattern are UTF-8; otherwise Latin-1

    • :posix_syntax (Boolean) — default: false

      restrict regexps to POSIX egrep syntax

    • :longest_match (Boolean) — default: false

      search for longest match, not first match

    • :log_errors (Boolean) — default: true

      log syntax and execution errors to ERROR

    • :max_mem (Integer)

      approx. max memory footprint of RE2

    • :literal (Boolean) — default: false

      interpret string as literal, not regexp

    • :never_nl (Boolean) — default: false

      never match \n, even if it is in regexp

    • :case_sensitive (Boolean) — default: true

      match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)

    • :perl_classes (Boolean) — default: false

      allow Perl's \d \s \w \D \S \W when in posix_syntax mode

    • :word_boundary (Boolean) — default: false

      allow \b \B (word boundary and not) when in posix_syntax mode

    • :one_line (Boolean) — default: false

      ^ and $ only match beginning and end of text when in posix_syntax mode

    Raises:

    • (ArgumentError)

      if anchor is not one of the accepted choices

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern



2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
# File 'ext/re2/re2.cc', line 2306

static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
  VALUE anchor, options;
  re2_set *s;

  rb_scan_args(argc, argv, "02", &anchor, &options);
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  RE2::Anchor re2_anchor = RE2::UNANCHORED;

  if (!NIL_P(anchor)) {
    Check_Type(anchor, T_SYMBOL);
    ID id_anchor_arg = SYM2ID(anchor);
    if (id_anchor_arg == id_unanchored) {
      re2_anchor = RE2::UNANCHORED;
    } else if (id_anchor_arg == id_anchor_start) {
      re2_anchor = RE2::ANCHOR_START;
    } else if (id_anchor_arg == id_anchor_both) {
      re2_anchor = RE2::ANCHOR_BOTH;
    } else {
      rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
    }
  }

  RE2::Options re2_options;

  if (RTEST(options)) {
    parse_re2_options(&re2_options, options);
  }

  if (s->set) {
    delete s->set;
    s->set = nullptr;
  }

  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
  if (s->set == nullptr) {
    rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
  }

  return self;
}

Class Method Details

.match_raises_errors?Boolean

Returns whether the underlying RE2 version outputs error information from RE2::Set::Match. If not, #match will raise an error if attempting to set its :exception option to true.

Returns:

  • (Boolean)

    whether the underlying RE2 outputs error information from RE2::Set matches



2427
2428
2429
2430
2431
2432
2433
# File 'ext/re2/re2.cc', line 2427

static VALUE re2_set_match_raises_errors_p(VALUE) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
  return Qtrue;
#else
  return Qfalse;
#endif
}

.size?Boolean

Returns whether the underlying RE2 version has a Set::Size method.

Returns:

  • (Boolean)

    whether the underlying RE2 has a Set::Size method



2440
2441
2442
2443
2444
2445
2446
# File 'ext/re2/re2.cc', line 2440

static VALUE re2_set_size_p(VALUE) {
#ifdef HAVE_SET_SIZE
  return Qtrue;
#else
  return Qfalse;
#endif
}

Instance Method Details

#add(pattern) ⇒ Integer

Adds a pattern to the set. Returns the index that will identify the pattern in the output of #match. Cannot be called after #compile has been called.

Examples:

set = RE2::Set.new
set.add("abc") #=> 0
set.add("def") #=> 1

Parameters:

  • pattern (String)

    the regex pattern

Returns:

  • (Integer)

    the index of the pattern in the set

Raises:

  • (ArgumentError)

    if called after compile or the pattern is rejected



2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
# File 'ext/re2/re2.cc', line 2361

static VALUE re2_set_add(VALUE self, VALUE pattern) {
  StringValue(pattern);

  re2_set *s = unwrap_re2_set(self);

  int index;
  VALUE msg;

  {
    std::string err;
    index = s->set->Add(
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), &err);
    msg = rb_str_new(err.data(), err.size());
  }

  if (index < 0) {
    rb_raise(rb_eArgError,
             "str rejected by RE2::Set->Add(): %s", RSTRING_PTR(msg));
  }

  return INT2FIX(index);
}

#compileBoolean

Compiles a RE2::Set so it can be used to match against. Must be called after #add and before #match.

Examples:

set = RE2::Set.new
set.add("abc")
set.compile #=> true

Returns:

  • (Boolean)

    whether compilation was a success



2394
2395
2396
2397
2398
# File 'ext/re2/re2.cc', line 2394

static VALUE re2_set_compile(VALUE self) {
  re2_set *s = unwrap_re2_set(self);

  return BOOL2RUBY(s->set->Compile());
}

#initialize_copy() ⇒ Object



2261
2262
2263
# File 'ext/re2/re2.cc', line 2261

static VALUE re2_set_initialize_copy(VALUE, VALUE) {
  rb_raise(rb_eTypeError, "cannot copy RE2::Set");
}

#lengthInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



2409
2410
2411
2412
2413
2414
2415
2416
2417
# File 'ext/re2/re2.cc', line 2409

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s = unwrap_re2_set(self);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}

#match(str) ⇒ Array<Integer> #match(str, options) ⇒ Array<Integer>

Matches the given text against patterns in the set, returning an array of integer indices of the matching patterns if matched or an empty array if there are no matches.

Overloads:

  • #match(str) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef") #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

  • #match(str, options) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching and the :exception option is set to true.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef", exception: true) #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    • options (Hash)

      the options with which to match

    Options Hash (options):

    • :exception (Boolean) — default: true

      whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

Returns:

  • (Array<Integer>)


2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
# File 'ext/re2/re2.cc', line 2488

static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
  VALUE str, options;
  bool raise_exception = true;
  rb_scan_args(argc, argv, "11", &str, &options);

  StringValue(str);
  re2_set *s = unwrap_re2_set(self);

  if (RTEST(options)) {
    Check_Type(options, T_HASH);

    VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
    if (!NIL_P(exception_option)) {
      raise_exception = RTEST(exception_option);
    }
  }

  std::vector<int> v;

  if (raise_exception) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
    RE2::Set::ErrorInfo e;
    bool match_failed = !s->set->Match(
        re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v, &e);
    VALUE result = rb_ary_new2(v.size());

    if (match_failed) {
      switch (e.kind) {
        case RE2::Set::kNoError:
          break;
        case RE2::Set::kNotCompiled:
          rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
        case RE2::Set::kOutOfMemory:
          rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
        case RE2::Set::kInconsistent:
          rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
        default:  // Just in case a future version of libre2 adds new ErrorKinds
          rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
      }
    } else {
      for (int index : v) {
        rb_ary_push(result, INT2FIX(index));
      }
    }

    return result;
#else
    rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
#endif
  } else {
    bool matched = s->set->Match(
        re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v);
    VALUE result = rb_ary_new2(v.size());

    if (matched) {
      for (int index : v) {
        rb_ary_push(result, INT2FIX(index));
      }
    }

    return result;
  }
}

#sizeInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



2409
2410
2411
2412
2413
2414
2415
2416
2417
# File 'ext/re2/re2.cc', line 2409

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s = unwrap_re2_set(self);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}