Class: RE2::Set

Inherits:
Object show all
Defined in:
ext/re2/re2.cc

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRE2::Set #initialize(anchor) ⇒ RE2::Set #initialize(anchor, options) ⇒ RE2::Set

Returns a new RE2::Set object, a collection of patterns that can be searched for simultaneously.

Overloads:

  • #initializeRE2::Set

    Returns a new RE2::Set object for unanchored patterns with the default options.

    Raises:

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor) ⇒ RE2::Set

    Returns a new RE2::Set object for the specified anchor with the default options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    Raises:

    • (ArgumentError)

      if anchor is not :unanchored, :anchor_start or :anchor_both

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern

  • #initialize(anchor, options) ⇒ RE2::Set

    Returns a new RE2::Set object with the specified options.

    Parameters:

    • anchor (Symbol)

      one of :unanchored, :anchor_start, :anchor_both

    • options (Hash)

      the options with which to compile the pattern

    Options Hash (options):

    • :utf8 (Boolean) — default: true

      text and pattern are UTF-8; otherwise Latin-1

    • :posix_syntax (Boolean) — default: false

      restrict regexps to POSIX egrep syntax

    • :longest_match (Boolean) — default: false

      search for longest match, not first match

    • :log_errors (Boolean) — default: true

      log syntax and execution errors to ERROR

    • :max_mem (Integer)

      approx. max memory footprint of RE2

    • :literal (Boolean) — default: false

      interpret string as literal, not regexp

    • :never_nl (Boolean) — default: false

      never match \n, even if it is in regexp

    • :case_sensitive (Boolean) — default: true

      match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)

    • :perl_classes (Boolean) — default: false

      allow Perl's \d \s \w \D \S \W when in posix_syntax mode

    • :word_boundary (Boolean) — default: false

      allow \b \B (word boundary and not) when in posix_syntax mode

    • :one_line (Boolean) — default: false

      ^ and $ only match beginning and end of text when in posix_syntax mode

    Raises:

    • (ArgumentError)

      if anchor is not one of the accepted choices

    • (NoMemoryError)

      if memory could not be allocated for the compiled pattern



1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
# File 'ext/re2/re2.cc', line 1869

static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
  VALUE anchor, options;
  re2_set *s;

  rb_scan_args(argc, argv, "02", &anchor, &options);
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  RE2::Anchor re2_anchor = RE2::UNANCHORED;

  if (!NIL_P(anchor)) {
    Check_Type(anchor, T_SYMBOL);
    ID id_anchor_arg = SYM2ID(anchor);
    if (id_anchor_arg == id_unanchored) {
      re2_anchor = RE2::UNANCHORED;
    } else if (id_anchor_arg == id_anchor_start) {
      re2_anchor = RE2::ANCHOR_START;
    } else if (id_anchor_arg == id_anchor_both) {
      re2_anchor = RE2::ANCHOR_BOTH;
    } else {
      rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
    }
  }

  RE2::Options re2_options;

  if (RTEST(options)) {
    parse_re2_options(&re2_options, options);
  }

  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
  if (s->set == 0) {
    rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
  }

  return self;
}

Class Method Details

.match_raises_errors?Boolean

Returns whether the underlying RE2 version outputs error information from RE2::Set::Match. If not, #match will raise an error if attempting to set its :exception option to true.

Returns:

  • (Boolean)

    whether the underlying RE2 outputs error information from RE2::Set matches



1988
1989
1990
1991
1992
1993
1994
# File 'ext/re2/re2.cc', line 1988

static VALUE re2_set_match_raises_errors_p(VALUE) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
  return Qtrue;
#else
  return Qfalse;
#endif
}

.size?Boolean

Returns whether the underlying RE2 version has a Set::Size method.

Returns:

  • (Boolean)

    whether the underlying RE2 has a Set::Size method



2001
2002
2003
2004
2005
2006
2007
# File 'ext/re2/re2.cc', line 2001

static VALUE re2_set_size_p(VALUE) {
#ifdef HAVE_SET_SIZE
  return Qtrue;
#else
  return Qfalse;
#endif
}

Instance Method Details

#add(pattern) ⇒ Integer

Adds a pattern to the set. Returns the index that will identify the pattern in the output of #match. Cannot be called after #compile has been called.

Examples:

set = RE2::Set.new
set.add("abc") #=> 0
set.add("def") #=> 1

Parameters:

  • pattern (String)

    the regex pattern

Returns:

  • (Integer)

    the index of the pattern in the set

Raises:

  • (ArgumentError)

    if called after compile or the pattern is rejected



1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
# File 'ext/re2/re2.cc', line 1919

static VALUE re2_set_add(VALUE self, VALUE pattern) {
  StringValue(pattern);

  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  int index;
  VALUE msg;

  {
    std::string err;
    index = s->set->Add(
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), &err);
    msg = rb_str_new(err.data(), err.size());
  }

  if (index < 0) {
    rb_raise(rb_eArgError,
             "str rejected by RE2::Set->Add(): %s", RSTRING_PTR(msg));
  }

  return INT2FIX(index);
}

#compileBoolean

Compiles a RE2::Set so it can be used to match against. Must be called after #add and before #match.

Examples:

set = RE2::Set.new
set.add("abc")
set.compile #=> true

Returns:

  • (Boolean)

    whether compilation was a success



1953
1954
1955
1956
1957
1958
# File 'ext/re2/re2.cc', line 1953

static VALUE re2_set_compile(VALUE self) {
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  return BOOL2RUBY(s->set->Compile());
}

#lengthInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
# File 'ext/re2/re2.cc', line 1969

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}

#match(str) ⇒ Array<Integer> #match(str, options) ⇒ Array<Integer>

Matches the given text against patterns in the set, returning an array of integer indices of the matching patterns if matched or an empty array if there are no matches.

Overloads:

  • #match(str) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef") #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

  • #match(str, options) ⇒ Array<Integer>

    Returns an array of integer indices of patterns matching the given string (if any). Raises exceptions if there are any errors while matching and the :exception option is set to true.

    Examples:

    set = RE2::Set.new
    set.add("abc")
    set.add("def")
    set.compile
    set.match("abcdef", exception: true) #=> [0, 1]

    Parameters:

    • str (String)

      the text to match against

    • options (Hash)

      the options with which to match

    Options Hash (options):

    • :exception (Boolean) — default: true

      whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)

    Returns:

    • (Array<Integer>)

      the indices of matching regexps

    Raises:

    • (MatchError)

      if an error occurs while matching

    • (UnsupportedError)

      if the underlying version of RE2 does not output error information

Returns:

  • (Array<Integer>)


2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
# File 'ext/re2/re2.cc', line 2049

static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
  VALUE str, options;
  bool raise_exception = true;
  rb_scan_args(argc, argv, "11", &str, &options);

  StringValue(str);
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  if (RTEST(options)) {
    Check_Type(options, T_HASH);

    VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
    if (!NIL_P(exception_option)) {
      raise_exception = RTEST(exception_option);
    }
  }

  std::vector<int> v;

  if (raise_exception) {
#ifdef HAVE_ERROR_INFO_ARGUMENT
    RE2::Set::ErrorInfo e;
    bool match_failed = !s->set->Match(
        re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v, &e);
    VALUE result = rb_ary_new2(v.size());

    if (match_failed) {
      switch (e.kind) {
        case RE2::Set::kNoError:
          break;
        case RE2::Set::kNotCompiled:
          rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
        case RE2::Set::kOutOfMemory:
          rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
        case RE2::Set::kInconsistent:
          rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
        default:  // Just in case a future version of libre2 adds new ErrorKinds
          rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
      }
    } else {
      for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
        rb_ary_push(result, INT2FIX(v[i]));
      }
    }

    return result;
#else
    rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
#endif
  } else {
    bool matched = s->set->Match(
        re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v);
    VALUE result = rb_ary_new2(v.size());

    if (matched) {
      for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
        rb_ary_push(result, INT2FIX(v[i]));
      }
    }

    return result;
  }
}

#sizeInteger

Returns the size of the RE2::Set.

Examples:

set = RE2::Set.new
set.add("abc")
set.size #=> 1

Returns:

  • (Integer)

    the number of patterns in the set



1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
# File 'ext/re2/re2.cc', line 1969

static VALUE re2_set_size(VALUE self) {
#ifdef HAVE_SET_SIZE
  re2_set *s;
  TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);

  return INT2FIX(s->set->Size());
#else
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set does not have Size method");
#endif
}