Class: RE2::Set

Inherits:
Object show all
Defined in:
ext/re2/re2.cc

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRE2::Set #initialize(anchor) ⇒ RE2::Set #initialize(anchor, options) ⇒ RE2::Set

Returns a new RE2::Set object, a collection of patterns that can be searched for simultaneously.

Overloads:

  • #initializeRE2::Set

    Returns a new RE2::Set object for unanchored patterns with the default options.

    Raises:

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor) ⇒ RE2::Set

    Returns a new RE2::Set object for the specified anchor with the default options.

    Parameters:

    • anchor (Symbol)

      One of :unanchored, :anchor_start, :anchor_both

    Raises:

    • (ArgumentError)

      if anchor is not :unanchored, :anchor_start or :anchor_both

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor, options) ⇒ RE2::Set

    Returns a new RE2::Set object with the specified options.

    Parameters:

    • anchor (Symbol)

      One of :unanchored, :anchor_start, :anchor_both

    • options (Hash)

      the options with which to compile the pattern

    Options Hash (options):

    • :utf8 (Boolean) — default: true

      text and pattern are UTF-8; otherwise Latin-1

    • :posix_syntax (Boolean) — default: false

      restrict regexps to POSIX egrep syntax

    • :longest_match (Boolean) — default: false

      search for longest match, not first match

    • :log_errors (Boolean) — default: true

      log syntax and execution errors to ERROR

    • :max_mem (Integer)

      approx. max memory footprint of RE2

    • :literal (Boolean) — default: false

      interpret string as literal, not regexp

    • :never_nl (Boolean) — default: false

      never match n, even if it is in regexp

    • :case_sensitive (Boolean) — default: true

      match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)

    • :perl_classes (Boolean) — default: false

      allow Perl’s d s w D S W when in posix_syntax mode

    • :word_boundary (Boolean) — default: false

      allow b B (word boundary and not) when in posix_syntax mode

    • :one_line (Boolean) — default: false

      ^ and $ only match beginning and end of text when in posix_syntax mode

    Raises:

    • (ArgumentError)

      if anchor is not one of the accepted choices

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern



1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
# File 'ext/re2/re2.cc', line 1673

static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
  VALUE anchor, options;
  re2_set *s;

  rb_scan_args(argc, argv, "02", &anchor, &options);
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  RE2::Anchor re2_anchor = RE2::UNANCHORED;

  if (!NIL_P(anchor)) {
    Check_Type(anchor, T_SYMBOL);
    ID id_anchor = SYM2ID(anchor);
    if (id_anchor == id_unanchored) {
      re2_anchor = RE2::UNANCHORED;
    } else if (id_anchor == id_anchor_start) {
      re2_anchor = RE2::ANCHOR_START;
    } else if (id_anchor == id_anchor_both) {
      re2_anchor = RE2::ANCHOR_BOTH;
    } else {
      rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
    }
  }

  RE2::Options re2_options;

  if (RTEST(options)) {
    parse_re2_options(&re2_options, options);
  }

  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
  if (s->set == 0) {
    rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
  }

  return self;
}

Class Method Details

.match_raises_errors?Bool

Returns whether the underlying re2 version outputs error information from RE2::Set::Match. If not, #match will raise an error if attempting to set its :exception option to true.

Returns:

  • (Bool)

    whether the underlying re2 outputs error information from Set matches



1771
1772
1773
1774
1775
1776
1777
# File 'ext/re2/re2.cc', line 1771

static VALUE re2_set_match_raises_errors_p(VALUE) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
  return Qtrue;
#else
  return Qfalse;
#endif
}

Instance Method Details

#add(pattern) ⇒ Integer

Adds a pattern to the set. Returns the index that will identify the pattern in the output of #match. Cannot be called after #compile has been called.

Examples:

set = RE2::Set.new
set.add("abc")    #=> 0
set.add("def")    #=> 1

Parameters:

  • pattern (String)

    the regex pattern

Returns:

  • (Integer)

    the index of the pattern in the set

Raises:

  • (ArgumentError)

    if called after compile or the pattern is rejected



1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
# File 'ext/re2/re2.cc', line 1722

static VALUE re2_set_add(VALUE self, VALUE pattern) {
  StringValue(pattern);

  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  /* To prevent the memory of the err string leaking when we call rb_raise,
   * take a copy of it and let it go out of scope.
   */
  char msg[100];
  int index;

  {
    std::string err;
    index = s->set->Add(RSTRING_PTR(pattern), &err);
    strlcpy(msg, err.c_str(), sizeof(msg));
  }

  if (index < 0) {
    rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", msg);
  }

  return INT2FIX(index);
}

#compileBool

Compiles a Set so it can be used to match against. Must be called after #add and before #match.

Examples:

set = RE2::Set.new
set.add("abc")
set.compile    # => true

Returns:

  • (Bool)

    whether compilation was a success



1757
1758
1759
1760
1761
1762
# File 'ext/re2/re2.cc', line 1757

static VALUE re2_set_compile(VALUE self) {
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  return BOOL2RUBY(s->set->Compile());
}

#match(str) ⇒ Array<Integer> #match(str, options) ⇒ Array<Integer>

Matches the given text against patterns in the set, returning an array of integer indices of the matching patterns if matched or an empty array if there are no matches.

Overloads:

  • #match(str) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef")    # => [0, 1]

    Parameters:

    • str (String)

      the text to match against

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of re2 does not output error information

  • #match(str, options) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching and the :exception option is set to true.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef", :exception => true)    # => [0, 1]

    Parameters:

    • str (String)

      the text to match against

    • options (Hash)

      the options with which to match

    Options Hash (options):

    • :exception (Boolean) — default: true

      whether to raise exceptions with re2’s error information (not supported on ABI version 0 of re2)

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of re2 does not output error information

Returns:

  • (Array<Integer>)


1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
# File 'ext/re2/re2.cc', line 1819

static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
  VALUE str, options;
  bool raise_exception = true;
  rb_scan_args(argc, argv, "11", &str, &options);

  StringValue(str);
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  if (RTEST(options)) {
    Check_Type(options, T_HASH);

    VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
    if (!NIL_P(exception_option)) {
      raise_exception = RTEST(exception_option);
    }
  }

  std::vector<int> v;

  if (raise_exception) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
    RE2::Set::ErrorInfo e;
    bool match_failed = !s->set->Match(RSTRING_PTR(str), &v, &e);
    VALUE result = rb_ary_new2(v.size());

    if (match_failed) {
      switch (e.kind) {
        case RE2::Set::kNoError:
          break;
        case RE2::Set::kNotCompiled:
          rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
        case RE2::Set::kOutOfMemory:
          rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
        case RE2::Set::kInconsistent:
          rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
        default:  // Just in case a future version of libre2 adds new ErrorKinds
          rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
      }
    } else {
      for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
        rb_ary_push(result, INT2FIX(v[i]));
      }
    }

    return result;
#else
    rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
#endif
  } else {
    bool matched = s->set->Match(RSTRING_PTR(str), &v);
    VALUE result = rb_ary_new2(v.size());

    if (matched) {
      for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
        rb_ary_push(result, INT2FIX(v[i]));
      }
    }

    return result;
  }
}