Class: RE2::Set

Inherits:
Object show all
Defined in:
ext/re2/re2.cc

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRE2::Set #initialize(anchor) ⇒ RE2::Set #initialize(anchor, options) ⇒ RE2::Set

Returns a new RE2::Set object, a collection of patterns that can be searched for simultaneously.

Overloads:

  • #initializeRE2::Set

    Returns a new RE2::Set object for unanchored patterns with the default options.

    Raises:

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor) ⇒ RE2::Set

    Returns a new RE2::Set object for the specified anchor with the default options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    Raises:

    • (ArgumentError)

      if anchor is not :unanchored, :anchor_start or :anchor_both

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor, options) ⇒ RE2::Set

    Returns a new RE2::Set object with the specified options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    • options (Hash)

      the options with which to compile the pattern

    Options Hash (options):

    • :utf8 (Boolean) — default: true

      text and pattern are UTF-8; otherwise Latin-1

    • :posix_syntax (Boolean) — default: false

      restrict regexps to POSIX egrep syntax

    • :longest_match (Boolean) — default: false

      search for longest match, not first match

    • :log_errors (Boolean) — default: true

      log syntax and execution errors to ERROR

    • :max_mem (Integer)

      approx. max memory footprint of RE2

    • :literal (Boolean) — default: false

      interpret string as literal, not regexp

    • :never_nl (Boolean) — default: false

      never match \n, even if it is in regexp

    • :case_sensitive (Boolean) — default: true

      match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)

    • :perl_classes (Boolean) — default: false

      allow Perl's \d \s \w \D \S \W when in posix_syntax mode

    • :word_boundary (Boolean) — default: false

      allow \b \B (word boundary and not) when in posix_syntax mode

    • :one_line (Boolean) — default: false

      ^ and $ only match beginning and end of text when in posix_syntax mode

    Raises:

    • (ArgumentError)

      if anchor is not one of the accepted choices

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern



2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
# File 'ext/re2/re2.cc', line 2479

static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
  VALUE anchor, options;
  re2_set *s;

  rb_scan_args(argc, argv, "02", &anchor, &options);
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  RE2::Anchor re2_anchor = RE2::UNANCHORED;

  if (!NIL_P(anchor)) {
    Check_Type(anchor, T_SYMBOL);
    ID id_anchor_arg = SYM2ID(anchor);
    if (id_anchor_arg == id_unanchored) {
      re2_anchor = RE2::UNANCHORED;
    } else if (id_anchor_arg == id_anchor_start) {
      re2_anchor = RE2::ANCHOR_START;
    } else if (id_anchor_arg == id_anchor_both) {
      re2_anchor = RE2::ANCHOR_BOTH;
    } else {
      rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
    }
  }

  RE2::Options re2_options;

  if (RTEST(options)) {
    parse_re2_options(&re2_options, options);
  }

  rb_check_frozen(self);

  /* Prevent re-initialisation: #match releases the GVL while holding a
   * pointer to s->set, so replacing s->set from another thread would free
   * the object out from under a concurrent match.
   */
  if (s->set) {
    rb_raise(rb_eTypeError, "already initialized RE2::Set");
  }

  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
  if (s->set == nullptr) {
    rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
  }

  return self;
}

Class Method Details

.match_raises_errors?Boolean

Returns whether the underlying RE2 version outputs error information from `RE2::Set::Match`. If not, #match will raise an error if attempting to set its :exception option to true.

Returns:

  • (Boolean)

    whether the underlying RE2 outputs error information from RE2::Set matches



2613
2614
2615
2616
2617
2618
2619
# File 'ext/re2/re2.cc', line 2613

static VALUE re2_set_match_raises_errors_p(VALUE) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
  return Qtrue;
#else
  return Qfalse;
#endif
}

.size?Boolean

Returns whether the underlying RE2 version has a Set::Size method.

Returns:

  • (Boolean)

    whether the underlying RE2 has a Set::Size method



2626
2627
2628
2629
2630
2631
2632
# File 'ext/re2/re2.cc', line 2626

static VALUE re2_set_size_p(VALUE) {
#ifdef HAVE_SET_SIZE
  return Qtrue;
#else
  return Qfalse;
#endif
}

Instance Method Details

#add(pattern) ⇒ Integer

Adds a pattern to the set. Returns the index that will identify the pattern in the output of #match. Cannot be called after #compile has been called.

Examples:

set = RE2::Set.new
set.add("abc") #=> 0
set.add("def") #=> 1

Parameters:

  • pattern (String)

    the regex pattern

Returns:

  • (Integer)

    the index of the pattern in the set

Raises:

  • (ArgumentError)

    if called after compile or the pattern is rejected



2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
# File 'ext/re2/re2.cc', line 2539

static VALUE re2_set_add(VALUE self, VALUE pattern) {
  StringValue(pattern);

  re2_set *s = unwrap_re2_set(self);
  rb_check_frozen(self);

  int index;
  VALUE msg;

  {
    std::string err;
    index = s->set->Add(
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), &err);
    msg = rb_str_new(err.data(), err.size());
  }

  if (index < 0) {
    rb_raise(rb_eArgError,
             "str rejected by RE2::Set->Add(): %s", RSTRING_PTR(msg));
  }

  return INT2FIX(index);
}

#compileBoolean

Compiles a RE2::Set so it can be used to match against. Must be called after #add and before #match.

Examples:

set = RE2::Set.new
set.add("abc")
set.compile #=> true

Returns:

  • (Boolean)

    whether compilation was a success



2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
# File 'ext/re2/re2.cc', line 2573

static VALUE re2_set_compile(VALUE self) {
  re2_set *s = unwrap_re2_set(self);
  rb_check_frozen(self);

  bool compiled = s->set->Compile();

  if (compiled) {
    rb_obj_freeze(self);
  }

  return BOOL2RUBY(compiled);
}

#initialize_copy() ⇒ Object



2434
2435
2436
# File 'ext/re2/re2.cc', line 2434

static VALUE re2_set_initialize_copy(VALUE, VALUE) {
  rb_raise(rb_eTypeError, "cannot copy RE2::Set");
}

#lengthInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



2595
2596
2597
2598
2599
2600
2601
2602
2603
# File 'ext/re2/re2.cc', line 2595

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s = unwrap_re2_set(self);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}

#match(str) ⇒ Array<Integer> #match(str, options) ⇒ Array<Integer>

Matches the given text against patterns in the set, returning an array of integer indices of the matching patterns if matched or an empty array if there are no matches.

Overloads:

  • #match(str) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef") #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

  • #match(str, options) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching and the :exception option is set to true.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef", exception: true) #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    • options (Hash)

      the options with which to match

    Options Hash (options):

    • :exception (Boolean) — default: true

      whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

Returns:

  • (Array<Integer>)


2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
# File 'ext/re2/re2.cc', line 2674

static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
  VALUE str, options;
  bool raise_exception = true;
  rb_scan_args(argc, argv, "11", &str, &options);

  StringValue(str);
  str = rb_str_new_frozen(str);

  re2_set *s = unwrap_re2_set(self);

  if (RTEST(options)) {
    Check_Type(options, T_HASH);

    VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
    if (!NIL_P(exception_option)) {
      raise_exception = RTEST(exception_option);
    }
  }

  std::vector<int> v;

  if (raise_exception) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
    RE2::Set::ErrorInfo e;
    nogvl_set_match_arg arg;
    arg.set = s->set;
    arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
    arg.v = &v;
    arg.error_info = &e;
    arg.matched = false;

#ifdef _WIN32
    nogvl_set_match(&arg);
#else
    rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
#endif
    RB_GC_GUARD(str);

    bool match_failed = !arg.matched;
    VALUE result = rb_ary_new2(v.size());

    if (match_failed) {
      switch (e.kind) {
        case RE2::Set::kNoError:
          break;
        case RE2::Set::kNotCompiled:
          rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
        case RE2::Set::kOutOfMemory:
          rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
        case RE2::Set::kInconsistent:
          rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
        default:  // Just in case a future version of libre2 adds new ErrorKinds
          rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
      }
    } else {
      for (int index : v) {
        rb_ary_push(result, INT2FIX(index));
      }
    }

    return result;
#else
    rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
#endif
  } else {
    nogvl_set_match_arg arg;
    arg.set = s->set;
    arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
    arg.v = &v;
#ifdef HAVE_ERROR_INFO_ARGUMENT
    arg.error_info = nullptr;
#endif
    arg.matched = false;

#ifdef _WIN32
    nogvl_set_match(&arg);
#else
    rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
#endif
    RB_GC_GUARD(str);

    VALUE result = rb_ary_new2(v.size());

    if (arg.matched) {
      for (int index : v) {
        rb_ary_push(result, INT2FIX(index));
      }
    }

    return result;
  }
}

#sizeInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



2595
2596
2597
2598
2599
2600
2601
2602
2603
# File 'ext/re2/re2.cc', line 2595

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s = unwrap_re2_set(self);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}