Module: RE2

Defined in:
ext/re2/re2.cc,
lib/re2/regexp.rb,
lib/re2/string.rb,
lib/re2/scanner.rb,
lib/re2/version.rb

Defined Under Namespace

Modules: String Classes: MatchData, Regexp, Scanner, Set

Constant Summary collapse

VERSION =
"2.25.0"

Class Method Summary collapse

Class Method Details

.escape(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using QuoteMeta. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2160
2161
2162
2163
2164
2165
2166
2167
# File 'ext/re2/re2.cc', line 2160

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.extract(text, pattern, rewrite) ⇒ String?

If pattern matches text, returns a copy of rewrite with substitutions using Extract. Non-matching portions of text are ignored.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
#=> "example-alice"
RE2.extract("no match", '(\d+)', '\1') #=> nil

Parameters:

  • text (String)

    the string from which to extract

  • pattern (String, RE2::Regexp)

    a regexp matching the text

  • rewrite (String)

    the rewrite string with \1-style substitutions

Returns:

  • (String, nil)

    the extracted string on a successful match or nil if there is no match

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
# File 'ext/re2/re2.cc', line 2101

static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
    VALUE rewrite) {
  /* Ensure rewrite and text are strings. */
  StringValue(rewrite);
  StringValue(text);

  re2_pattern *p;
  std::string out;
  bool extracted;

  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
    extracted = RE2::Extract(
        re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
        *p->pattern,
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
        &out);

    if (extracted) {
      return encoded_str_new(out.data(), out.size(),
          p->pattern->options().encoding());
    } else {
      return Qnil;
    }
  } else {
    /* Ensure pattern is a string. */
    StringValue(pattern);

    extracted = RE2::Extract(
        re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
        RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
        &out);

    if (extracted) {
      return encoded_str_new(out.data(), out.size(),
          RE2::Options::EncodingUTF8);
    } else {
      return Qnil;
    }
  }
}

.global_replace(str, pattern, rewrite) ⇒ String

Return a copy of str with pattern replaced by rewrite using GlobalReplace.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

re2 = RE2::Regexp.new("oo?")
RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
RE2.global_replace("hello there", "e", "i")  #=> "hillo thiri"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
# File 'ext/re2/re2.cc', line 2047

static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
                               VALUE rewrite) {
  /* Ensure rewrite is a string. */
  StringValue(rewrite);

  /* Take a copy of str so it can be modified in-place by
   * RE2::GlobalReplace.
   */
  re2_pattern *p;
  StringValue(str);
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  /* Do the replacement. */
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
    RE2::GlobalReplace(&str_as_string, *p->pattern,
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    /* Ensure pattern is a string. */
    StringValue(pattern);

    RE2::GlobalReplace(&str_as_string,
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
  }
}

.GlobalReplace(str, pattern, rewrite) ⇒ String

Return a copy of str with pattern replaced by rewrite using GlobalReplace.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

re2 = RE2::Regexp.new("oo?")
RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
RE2.global_replace("hello there", "e", "i")  #=> "hillo thiri"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
# File 'ext/re2/re2.cc', line 2047

static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
                               VALUE rewrite) {
  /* Ensure rewrite is a string. */
  StringValue(rewrite);

  /* Take a copy of str so it can be modified in-place by
   * RE2::GlobalReplace.
   */
  re2_pattern *p;
  StringValue(str);
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  /* Do the replacement. */
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
    RE2::GlobalReplace(&str_as_string, *p->pattern,
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    /* Ensure pattern is a string. */
    StringValue(pattern);

    RE2::GlobalReplace(&str_as_string,
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
  }
}

.quote(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using QuoteMeta. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2160
2161
2162
2163
2164
2165
2166
2167
# File 'ext/re2/re2.cc', line 2160

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.QuoteMeta(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using QuoteMeta. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2160
2161
2162
2163
2164
2165
2166
2167
# File 'ext/re2/re2.cc', line 2160

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.replace(str, pattern, rewrite) ⇒ String

Returns a copy of str with the first occurrence pattern replaced with rewrite using Replace.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
re2 = RE2::Regexp.new("hel+o")
RE2.replace("hello there", re2, "yo")        #=> "yo there"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
# File 'ext/re2/re2.cc', line 1994

static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
    VALUE rewrite) {
  /* Ensure rewrite is a string. */
  StringValue(rewrite);

  re2_pattern *p;

  /* Take a copy of str so it can be modified in-place by
   * RE2::Replace.
   */
  StringValue(str);
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  /* Do the replacement. */
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
    RE2::Replace(&str_as_string, *p->pattern,
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    /* Ensure pattern is a string. */
    StringValue(pattern);

    RE2::Replace(&str_as_string,
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
  }
}

.Replace(str, pattern, rewrite) ⇒ String

Returns a copy of str with the first occurrence pattern replaced with rewrite using Replace.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
re2 = RE2::Regexp.new("hel+o")
RE2.replace("hello there", re2, "yo")        #=> "yo there"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
# File 'ext/re2/re2.cc', line 1994

static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
    VALUE rewrite) {
  /* Ensure rewrite is a string. */
  StringValue(rewrite);

  re2_pattern *p;

  /* Take a copy of str so it can be modified in-place by
   * RE2::Replace.
   */
  StringValue(str);
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  /* Do the replacement. */
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
    RE2::Replace(&str_as_string, *p->pattern,
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    /* Ensure pattern is a string. */
    StringValue(pattern);

    RE2::Replace(&str_as_string,
        re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
        re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));

    return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
  }
}