Class: RE2::Regexp
Class Method Summary collapse
-
.compile ⇒ Object
Returns a new Regexp object with a compiled version of
patternstored inside. -
.escape(unquoted) ⇒ String
Returns a version of
strwith all potentially meaningful regexp characters escaped using `QuoteMeta`. -
.match_has_endpos_argument? ⇒ Boolean
Returns whether the underlying RE2 version supports passing an
endposargument to Match. -
.quote(unquoted) ⇒ String
Returns a version of
strwith all potentially meaningful regexp characters escaped using `QuoteMeta`.
Instance Method Summary collapse
-
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
-
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
-
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set tofalse. -
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set totrue. -
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set tofalse. -
#error ⇒ String?
If the Regexp could not be created properly, returns an error string otherwise returns
nil. -
#error_arg ⇒ String?
If the Regexp could not be created properly, returns the offending portion of the regexp otherwise returns
nil. -
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given
textexactly and return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using `FullMatch`.
-
#initialize(*args) ⇒ Object
constructor
Returns a new Regexp object with a compiled version of
patternstored inside. - #initialize_copy(other) ⇒ Object
-
#inspect ⇒ String
Returns a printable version of the regular expression.
-
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literaloption set totrue. -
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errorsoption set totrue. -
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_matchoption set totrue. - #match(*args) ⇒ Object
-
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
-
#max_mem ⇒ Integer
Returns the
max_memsetting for the regular expression. -
#named_captures ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#names ⇒ Array<String>
Returns an array of names of all named capturing groups.
-
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nloption set totrue. -
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp wasn't valid on construction.
-
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
-
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_lineoption set totrue. -
#options ⇒ Hash
Returns a hash of the options currently set for the Regexp.
-
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given
textand return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
-
#pattern ⇒ String
Returns a string version of the regular expression.
-
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the perl_classes option set to
true. -
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntaxoption set totrue. -
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost".
-
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with `FindAndConsume`.
-
#source ⇒ String
Returns a string version of the regular expression.
-
#to_s ⇒ String
Returns a string version of the regular expression.
-
#to_str ⇒ String
Returns a string version of the regular expression.
-
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
utf8option set totrue. -
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundaryoption set totrue.
Constructor Details
#initialize(pattern) ⇒ RE2::Regexp #initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern stored inside.
1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 |
# File 'ext/re2/re2.cc', line 1372
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
VALUE pattern, options;
re2_pattern *p;
rb_scan_args(argc, argv, "11", &pattern, &options);
/* Ensure pattern is a string. */
StringValue(pattern);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
rb_check_frozen(self);
if (p->pattern) {
delete p->pattern;
p->pattern = nullptr;
}
if (RTEST(options)) {
RE2::Options re2_options;
parse_re2_options(&re2_options, options);
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), re2_options);
} else {
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
}
if (p->pattern == nullptr) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
rb_obj_freeze(self);
return self;
}
|
Class Method Details
.initialize(pattern) ⇒ RE2::Regexp .initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern stored inside.
.escape(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters
escaped using
`QuoteMeta`. The returned string, used as a regular expression, will
exactly match the original string.
2377 2378 2379 2380 2381 2382 2383 2384 |
# File 'ext/re2/re2.cc', line 2377
static VALUE re2_escape(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
.match_has_endpos_argument? ⇒ Boolean
2130 2131 2132 2133 2134 2135 2136 |
# File 'ext/re2/re2.cc', line 2130 static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) { #ifdef HAVE_ENDPOS_ARGUMENT return Qtrue; #else return Qfalse; #endif } |
.quote(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters
escaped using
`QuoteMeta`. The returned string, used as a regular expression, will
exactly match the original string.
2377 2378 2379 2380 2381 2382 2383 2384 |
# File 'ext/re2/re2.cc', line 2377
static VALUE re2_escape(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
Instance Method Details
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 |
# File 'ext/re2/re2.cc', line 2045
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
bool matched = re2_match_without_gvl(
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
}
|
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 |
# File 'ext/re2/re2.cc', line 2045
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
bool matched = re2_match_without_gvl(
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
}
|
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to false.
1621 1622 1623 |
# File 'ext/re2/re2.cc', line 1621 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to true.
1605 1606 1607 1608 1609 |
# File 'ext/re2/re2.cc', line 1605
static VALUE re2_regexp_case_sensitive(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().case_sensitive());
}
|
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to false.
1621 1622 1623 |
# File 'ext/re2/re2.cc', line 1621 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#error ⇒ String?
If the RE2::Regexp could not be created properly, returns an error string
otherwise returns nil.
1676 1677 1678 1679 1680 1681 1682 1683 1684 |
# File 'ext/re2/re2.cc', line 1676
static VALUE re2_regexp_error(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
if (p->pattern->ok()) {
return Qnil;
} else {
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
}
}
|
#error_arg ⇒ String?
If the RE2::Regexp could not be created properly, returns
the offending portion of the regexp otherwise returns nil.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 |
# File 'ext/re2/re2.cc', line 1696
static VALUE re2_regexp_error_arg(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
if (p->pattern->ok()) {
return Qnil;
} else {
return encoded_str_new(p->pattern->error_arg().data(),
p->pattern->error_arg().size(),
p->pattern->options().encoding());
}
}
|
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given text exactly and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
68 69 70 |
# File 'lib/re2/regexp.rb', line 68 def full_match(text, = {}) match(text, Hash().merge(anchor: :anchor_both)) end |
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using `FullMatch`.
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 |
# File 'ext/re2/re2.cc', line 2066
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
bool matched = re2_match_without_gvl(
p->pattern, text, 0, RSTRING_LEN(text), RE2::ANCHOR_BOTH, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
}
|
#initialize_copy(other) ⇒ Object
1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 |
# File 'ext/re2/re2.cc', line 1410
static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
re2_pattern *self_p;
re2_pattern *other_p = unwrap_re2_regexp(other);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
rb_check_frozen(self);
if (self_p->pattern) {
delete self_p->pattern;
self_p->pattern = nullptr;
}
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
other_p->pattern->options());
if (self_p->pattern == nullptr) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
rb_obj_freeze(self);
return self;
}
|
#inspect ⇒ String
Returns a printable version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is
undefined).
1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 |
# File 'ext/re2/re2.cc', line 1447
static VALUE re2_regexp_inspect(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
std::ostringstream output;
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literal option set to true.
1575 1576 1577 1578 1579 |
# File 'ext/re2/re2.cc', line 1575
static VALUE re2_regexp_literal(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().literal());
}
|
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errors option set to true.
1546 1547 1548 1549 1550 |
# File 'ext/re2/re2.cc', line 1546
static VALUE re2_regexp_log_errors(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().log_errors());
}
|
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_match option set to true.
1531 1532 1533 1534 1535 |
# File 'ext/re2/re2.cc', line 1531
static VALUE re2_regexp_longest_match(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().longest_match());
}
|
#match(text) ⇒ RE2::MatchData, ... #match(text, options) ⇒ RE2::MatchData, ... #match(text, submatches) ⇒ RE2::MatchData, ...
General matching: match the pattern against the given text using
`Match` and return a MatchData instance with the specified number of
submatches (defaults to the total number of capturing groups) or a boolean
(if no submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 |
# File 'ext/re2/re2.cc', line 1888
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
re2_pattern *p;
re2_matchdata *m;
VALUE text, options;
rb_scan_args(argc, argv, "11", &text, &options);
/* Coerce and freeze text to prevent mutation. */
StringValue(text);
text = rb_str_new_frozen(text);
p = unwrap_re2_regexp(self);
int n;
size_t startpos = 0;
size_t endpos = RSTRING_LEN(text);
RE2::Anchor anchor = RE2::UNANCHORED;
if (RTEST(options)) {
if (RB_INTEGER_TYPE_P(options)) {
n = NUM2INT(options);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (TYPE(options) != T_HASH) {
options = rb_Hash(options);
}
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
if (!NIL_P(endpos_option)) {
#ifdef HAVE_ENDPOS_ARGUMENT
ssize_t endpos_value = NUM2SSIZET(endpos_option);
if (endpos_value < 0) {
rb_raise(rb_eArgError, "endpos should be >= 0");
}
endpos = static_cast<size_t>(endpos_value);
#else
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
#endif
}
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
if (!NIL_P(anchor_option)) {
Check_Type(anchor_option, T_SYMBOL);
ID id_anchor_option = SYM2ID(anchor_option);
if (id_anchor_option == id_unanchored) {
anchor = RE2::UNANCHORED;
} else if (id_anchor_option == id_anchor_start) {
anchor = RE2::ANCHOR_START;
} else if (id_anchor_option == id_anchor_both) {
anchor = RE2::ANCHOR_BOTH;
} else {
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
}
}
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
if (!NIL_P(submatches_option)) {
n = NUM2INT(submatches_option);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
if (!NIL_P(startpos_option)) {
ssize_t startpos_value = NUM2SSIZET(startpos_option);
if (startpos_value < 0) {
rb_raise(rb_eArgError, "startpos should be >= 0");
}
startpos = static_cast<size_t>(startpos_value);
}
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
if (startpos > endpos) {
rb_raise(rb_eArgError, "startpos should be <= endpos");
}
#ifndef HAVE_ENDPOS_ARGUMENT
/* Old RE2's Match() takes int startpos. Reject values that would overflow. */
if (startpos > INT_MAX) {
rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
}
#endif
if (n == 0) {
bool matched = re2_match_without_gvl(
p->pattern, text, startpos, endpos, anchor, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
} else {
if (n == INT_MAX) {
rb_raise(rb_eRangeError, "number of matches should be < %d", INT_MAX);
}
/* Because match returns the whole match as well. */
n += 1;
re2::StringPiece *matches = new(std::nothrow) re2::StringPiece[n];
if (matches == nullptr) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPieces for matches");
}
bool matched = re2_match_without_gvl(
p->pattern, text, startpos, endpos, anchor, matches, n);
RB_GC_GUARD(text);
if (matched) {
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
RB_OBJ_WRITE(matchdata, &m->regexp, self);
RB_OBJ_WRITE(matchdata, &m->text, text);
m->matches = matches;
m->number_of_matches = n;
return matchdata;
} else {
delete[] matches;
return Qnil;
}
}
}
|
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 |
# File 'ext/re2/re2.cc', line 2045
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
bool matched = re2_match_without_gvl(
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
}
|
#max_mem ⇒ Integer
Returns the max_mem setting for the regular expression.
1560 1561 1562 1563 1564 |
# File 'ext/re2/re2.cc', line 1560
static VALUE re2_regexp_max_mem(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->options().max_mem());
}
|
#named_captures ⇒ Hash
Returns a hash of names to capturing indices of groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 |
# File 'ext/re2/re2.cc', line 1791
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const auto& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
for (const auto& group : groups) {
rb_hash_aset(capturing_groups,
encoded_str_new(group.first.data(), group.first.size(),
p->pattern->options().encoding()),
INT2FIX(group.second));
}
return capturing_groups;
}
|
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 |
# File 'ext/re2/re2.cc', line 1791
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const auto& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
for (const auto& group : groups) {
rb_hash_aset(capturing_groups,
encoded_str_new(group.first.data(), group.first.size(),
p->pattern->options().encoding()),
INT2FIX(group.second));
}
return capturing_groups;
}
|
#names ⇒ Array<String>
Returns an array of names of all named capturing groups. Names are returned in alphabetical order rather than definition order, as RE2 stores named groups internally in a sorted map.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
423 424 425 426 427 428 429 430 431 432 433 434 435 436 |
# File 'ext/re2/re2.cc', line 423
static VALUE re2_regexp_names(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const auto& groups = p->pattern->NamedCapturingGroups();
VALUE names = rb_ary_new2(groups.size());
for (const auto& group : groups) {
rb_ary_push(names,
encoded_str_new(group.first.data(), group.first.size(),
p->pattern->options().encoding()));
}
return names;
}
|
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nl option set to true.
1590 1591 1592 1593 1594 |
# File 'ext/re2/re2.cc', line 1590
static VALUE re2_regexp_never_nl(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().never_nl());
}
|
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp
wasn't valid on construction. The overall match ($0) does not
count: if the regexp is "(a)(b)", returns 2.
1776 1777 1778 1779 1780 |
# File 'ext/re2/re2.cc', line 1776
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
|
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
1486 1487 1488 1489 1490 |
# File 'ext/re2/re2.cc', line 1486
static VALUE re2_regexp_ok(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->ok());
}
|
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_line option set to true.
1664 1665 1666 1667 1668 |
# File 'ext/re2/re2.cc', line 1664
static VALUE re2_regexp_one_line(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().one_line());
}
|
#options ⇒ Hash
Returns a hash of the options currently set for the RE2::Regexp.
1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 |
# File 'ext/re2/re2.cc', line 1726
static VALUE re2_regexp_options(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
VALUE options = rb_hash_new();
rb_hash_aset(options, ID2SYM(id_utf8),
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
rb_hash_aset(options, ID2SYM(id_posix_syntax),
BOOL2RUBY(p->pattern->options().posix_syntax()));
rb_hash_aset(options, ID2SYM(id_longest_match),
BOOL2RUBY(p->pattern->options().longest_match()));
rb_hash_aset(options, ID2SYM(id_log_errors),
BOOL2RUBY(p->pattern->options().log_errors()));
rb_hash_aset(options, ID2SYM(id_max_mem),
INT2FIX(p->pattern->options().max_mem()));
rb_hash_aset(options, ID2SYM(id_literal),
BOOL2RUBY(p->pattern->options().literal()));
rb_hash_aset(options, ID2SYM(id_never_nl),
BOOL2RUBY(p->pattern->options().never_nl()));
rb_hash_aset(options, ID2SYM(id_case_sensitive),
BOOL2RUBY(p->pattern->options().case_sensitive()));
rb_hash_aset(options, ID2SYM(id_perl_classes),
BOOL2RUBY(p->pattern->options().perl_classes()));
rb_hash_aset(options, ID2SYM(id_word_boundary),
BOOL2RUBY(p->pattern->options().word_boundary()));
rb_hash_aset(options, ID2SYM(id_one_line),
BOOL2RUBY(p->pattern->options().one_line()));
/* This is a read-only hash after all... */
rb_obj_freeze(options);
return options;
}
|
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given text and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
39 40 41 |
# File 'lib/re2/regexp.rb', line 39 def partial_match(text, = {}) match(text, Hash().merge(anchor: :unanchored)) end |
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using `PartialMatch`.
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 |
# File 'ext/re2/re2.cc', line 2045
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
bool matched = re2_match_without_gvl(
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
RB_GC_GUARD(text);
return BOOL2RUBY(matched);
}
|
#pattern ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1470 1471 1472 1473 1474 1475 1476 |
# File 'ext/re2/re2.cc', line 1470
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
perl_classes option set to true.
1634 1635 1636 1637 1638 |
# File 'ext/re2/re2.cc', line 1634
static VALUE re2_regexp_perl_classes(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().perl_classes());
}
|
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntax option set to true.
1516 1517 1518 1519 1520 |
# File 'ext/re2/re2.cc', line 1516
static VALUE re2_regexp_posix_syntax(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().posix_syntax());
}
|
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost". Larger numbers are more expensive than smaller numbers.
1715 1716 1717 1718 1719 |
# File 'ext/re2/re2.cc', line 1715
static VALUE re2_regexp_program_size(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->ProgramSize());
}
|
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with `FindAndConsume`.
2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 |
# File 'ext/re2/re2.cc', line 2090
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
StringValue(text);
text = rb_str_new_frozen(text);
re2_pattern *p = unwrap_re2_regexp(self);
re2_scanner *c;
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
RB_OBJ_WRITE(scanner, &c->regexp, self);
RB_OBJ_WRITE(scanner, &c->text, text);
c->input = new(std::nothrow) re2::StringPiece(
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
if (c->input == nullptr) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPiece for input");
}
if (p->pattern->ok()) {
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
} else {
c->number_of_capturing_groups = 0;
}
c->eof = false;
return scanner;
}
|
#source ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1470 1471 1472 1473 1474 1475 1476 |
# File 'ext/re2/re2.cc', line 1470
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_s ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1470 1471 1472 1473 1474 1475 1476 |
# File 'ext/re2/re2.cc', line 1470
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_str ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1470 1471 1472 1473 1474 1475 1476 |
# File 'ext/re2/re2.cc', line 1470
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the utf8
option set to true.
1501 1502 1503 1504 1505 |
# File 'ext/re2/re2.cc', line 1501
static VALUE re2_regexp_utf8(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
}
|
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundary option set to true.
1649 1650 1651 1652 1653 |
# File 'ext/re2/re2.cc', line 1649
static VALUE re2_regexp_word_boundary(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().word_boundary());
}
|