Module: RE2

Defined in:
ext/re2/re2.cc,
lib/re2/regexp.rb,
lib/re2/string.rb,
lib/re2/scanner.rb,
lib/re2/version.rb

Defined Under Namespace

Modules: String Classes: MatchData, Regexp, Scanner, Set

Constant Summary collapse

VERSION =
"2.27.0"

Class Method Summary collapse

Class Method Details

.escape(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using `QuoteMeta`. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2377
2378
2379
2380
2381
2382
2383
2384
# File 'ext/re2/re2.cc', line 2377

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.extract(text, pattern, rewrite) ⇒ String?

If pattern matches text, returns a copy of rewrite with substitutions using `Extract`. Non-matching portions of text are ignored.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
#=> "example-alice"
RE2.extract("no match", '(\d+)', '\1') #=> nil

Parameters:

  • text (String)

    the string from which to extract

  • pattern (String, RE2::Regexp)

    a regexp matching the text

  • rewrite (String)

    the rewrite string with \1-style substitutions

Returns:

  • (String, nil)

    the extracted string on a successful match or nil if there is no match

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
# File 'ext/re2/re2.cc', line 2307

static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
    VALUE rewrite) {
  re2_pattern *p = nullptr;

  /* Coerce and freeze all arguments before any C++ allocations so that any
   * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
   * memory, and later coercions cannot mutate earlier strings.
   */
  StringValue(text);
  text = rb_str_new_frozen(text);
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
  } else {
    StringValue(pattern);
    pattern = rb_str_new_frozen(pattern);
  }
  StringValue(rewrite);
  rewrite = rb_str_new_frozen(rewrite);

  std::string out;

  nogvl_extract_arg arg;
  arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
  if (p) {
    arg.pattern = p->pattern;
  } else {
    arg.pattern = nullptr;
    arg.string_pattern = re2::StringPiece(
        RSTRING_PTR(pattern), RSTRING_LEN(pattern));
  }
  arg.rewrite = re2::StringPiece(
      RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
  arg.out = &out;
  arg.extracted = false;

#ifdef _WIN32
  nogvl_extract(&arg);
#else
  rb_thread_call_without_gvl(nogvl_extract, &arg, NULL, NULL);
#endif

  RB_GC_GUARD(text);
  RB_GC_GUARD(rewrite);
  RB_GC_GUARD(pattern);

  if (arg.extracted) {
    return encoded_str_new(out.data(), out.size(),
        p ? p->pattern->options().encoding()
          : RE2::Options::EncodingUTF8);
  } else {
    return Qnil;
  }
}

.global_replace(str, pattern, rewrite) ⇒ String

Return a copy of str with pattern replaced by rewrite using `GlobalReplace`.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

re2 = RE2::Regexp.new("oo?")
RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
RE2.global_replace("hello there", "e", "i")  #=> "hillo thiri"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
# File 'ext/re2/re2.cc', line 2231

static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
                               VALUE rewrite) {
  re2_pattern *p = nullptr;

  /* Coerce and freeze all arguments before any C++ allocations so that any
   * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
   * memory, and later coercions cannot mutate earlier strings.
   */
  StringValue(str);
  str = rb_str_new_frozen(str);
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
  } else {
    StringValue(pattern);
    pattern = rb_str_new_frozen(pattern);
  }
  StringValue(rewrite);
  rewrite = rb_str_new_frozen(rewrite);

  /* Take a copy of str so it can be modified in-place by
   * RE2::GlobalReplace.
   */
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  nogvl_replace_arg arg;
  arg.str = &str_as_string;
  if (p) {
    arg.pattern = p->pattern;
  } else {
    arg.pattern = nullptr;
    arg.string_pattern = re2::StringPiece(
        RSTRING_PTR(pattern), RSTRING_LEN(pattern));
  }
  arg.rewrite = re2::StringPiece(
      RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));

#ifdef _WIN32
  nogvl_global_replace(&arg);
#else
  rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
#endif

  RB_GC_GUARD(rewrite);
  RB_GC_GUARD(pattern);

  if (p) {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        RE2::Options::EncodingUTF8);
  }
}

.GlobalReplace(str, pattern, rewrite) ⇒ String

Return a copy of str with pattern replaced by rewrite using `GlobalReplace`.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

re2 = RE2::Regexp.new("oo?")
RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
RE2.global_replace("hello there", "e", "i")  #=> "hillo thiri"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
# File 'ext/re2/re2.cc', line 2231

static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
                               VALUE rewrite) {
  re2_pattern *p = nullptr;

  /* Coerce and freeze all arguments before any C++ allocations so that any
   * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
   * memory, and later coercions cannot mutate earlier strings.
   */
  StringValue(str);
  str = rb_str_new_frozen(str);
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
  } else {
    StringValue(pattern);
    pattern = rb_str_new_frozen(pattern);
  }
  StringValue(rewrite);
  rewrite = rb_str_new_frozen(rewrite);

  /* Take a copy of str so it can be modified in-place by
   * RE2::GlobalReplace.
   */
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  nogvl_replace_arg arg;
  arg.str = &str_as_string;
  if (p) {
    arg.pattern = p->pattern;
  } else {
    arg.pattern = nullptr;
    arg.string_pattern = re2::StringPiece(
        RSTRING_PTR(pattern), RSTRING_LEN(pattern));
  }
  arg.rewrite = re2::StringPiece(
      RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));

#ifdef _WIN32
  nogvl_global_replace(&arg);
#else
  rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
#endif

  RB_GC_GUARD(rewrite);
  RB_GC_GUARD(pattern);

  if (p) {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        RE2::Options::EncodingUTF8);
  }
}

.quote(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using `QuoteMeta`. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2377
2378
2379
2380
2381
2382
2383
2384
# File 'ext/re2/re2.cc', line 2377

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.QuoteMeta(unquoted) ⇒ String

Returns a version of str with all potentially meaningful regexp characters escaped using `QuoteMeta`. The returned string, used as a regular expression, will exactly match the original string.

Examples:

RE2.escape("1.5-2.0?")         #=> "1\\.5\\-2\\.0\\?"
RE2.quote("1.5-2.0?")          #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
RE2::Regexp.quote("1.5-2.0?")  #=> "1\\.5\\-2\\.0\\?"

Parameters:

  • unquoted (String)

    the unquoted string

Returns:

  • (String)

    the escaped string

Raises:

  • (TypeError)

    if the given unquoted string cannot be coerced to a String



2377
2378
2379
2380
2381
2382
2383
2384
# File 'ext/re2/re2.cc', line 2377

static VALUE re2_escape(VALUE, VALUE unquoted) {
  StringValue(unquoted);

  std::string quoted_string = RE2::QuoteMeta(
      re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));

  return rb_str_new(quoted_string.data(), quoted_string.size());
}

.replace(str, pattern, rewrite) ⇒ String

Returns a copy of str with the first occurrence pattern replaced with rewrite using `Replace`.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
re2 = RE2::Regexp.new("hel+o")
RE2.replace("hello there", re2, "yo")        #=> "yo there"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
# File 'ext/re2/re2.cc', line 2159

static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
    VALUE rewrite) {
  re2_pattern *p = nullptr;

  /* Coerce and freeze all arguments before any C++ allocations so that any
   * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
   * memory, and later coercions cannot mutate earlier strings.
   */
  StringValue(str);
  str = rb_str_new_frozen(str);
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
  } else {
    StringValue(pattern);
    pattern = rb_str_new_frozen(pattern);
  }
  StringValue(rewrite);
  rewrite = rb_str_new_frozen(rewrite);

  /* Take a copy of str so it can be modified in-place by RE2::Replace. */
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  nogvl_replace_arg arg;
  arg.str = &str_as_string;
  if (p) {
    arg.pattern = p->pattern;
  } else {
    arg.pattern = nullptr;
    arg.string_pattern = re2::StringPiece(
        RSTRING_PTR(pattern), RSTRING_LEN(pattern));
  }
  arg.rewrite = re2::StringPiece(
      RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));

#ifdef _WIN32
  nogvl_replace(&arg);
#else
  rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
#endif

  RB_GC_GUARD(rewrite);
  RB_GC_GUARD(pattern);

  if (p) {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        RE2::Options::EncodingUTF8);
  }
}

.Replace(str, pattern, rewrite) ⇒ String

Returns a copy of str with the first occurrence pattern replaced with rewrite using `Replace`.

Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the Regexp is set to false (any other encoding's behaviour is undefined).

Examples:

RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
re2 = RE2::Regexp.new("hel+o")
RE2.replace("hello there", re2, "yo")        #=> "yo there"

Parameters:

  • str (String)

    the string to modify

  • pattern (String, RE2::Regexp)

    a regexp matching text to be replaced

  • rewrite (String)

    the string to replace with

Returns:

  • (String)

    the resulting string

Raises:

  • (TypeError)

    if the given rewrite or pattern (if not provided as a Regexp) cannot be coerced to Strings



2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
# File 'ext/re2/re2.cc', line 2159

static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
    VALUE rewrite) {
  re2_pattern *p = nullptr;

  /* Coerce and freeze all arguments before any C++ allocations so that any
   * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
   * memory, and later coercions cannot mutate earlier strings.
   */
  StringValue(str);
  str = rb_str_new_frozen(str);
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
    p = unwrap_re2_regexp(pattern);
  } else {
    StringValue(pattern);
    pattern = rb_str_new_frozen(pattern);
  }
  StringValue(rewrite);
  rewrite = rb_str_new_frozen(rewrite);

  /* Take a copy of str so it can be modified in-place by RE2::Replace. */
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));

  nogvl_replace_arg arg;
  arg.str = &str_as_string;
  if (p) {
    arg.pattern = p->pattern;
  } else {
    arg.pattern = nullptr;
    arg.string_pattern = re2::StringPiece(
        RSTRING_PTR(pattern), RSTRING_LEN(pattern));
  }
  arg.rewrite = re2::StringPiece(
      RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));

#ifdef _WIN32
  nogvl_replace(&arg);
#else
  rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
#endif

  RB_GC_GUARD(rewrite);
  RB_GC_GUARD(pattern);

  if (p) {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        p->pattern->options().encoding());
  } else {
    return encoded_str_new(str_as_string.data(), str_as_string.size(),
        RE2::Options::EncodingUTF8);
  }
}