Class: RE2::Regexp
Class Method Summary collapse
-
.compile ⇒ Object
Returns a new Regexp object with a compiled version of
patternstored inside. -
.escape(unquoted) ⇒ String
Returns a version of
strwith all potentially meaningful regexp characters escaped usingQuoteMeta. -
.match_has_endpos_argument? ⇒ Boolean
Returns whether the underlying RE2 version supports passing an
endposargument to Match. -
.quote(unquoted) ⇒ String
Returns a version of
strwith all potentially meaningful regexp characters escaped usingQuoteMeta.
Instance Method Summary collapse
-
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch. -
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch. -
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set tofalse. -
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set totrue. -
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitiveoption set tofalse. -
#error ⇒ String?
If the Regexp could not be created properly, returns an error string otherwise returns
nil. -
#error_arg ⇒ String?
If the Regexp could not be created properly, returns the offending portion of the regexp otherwise returns
nil. -
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given
textexactly and return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using
FullMatch. -
#initialize(*args) ⇒ Object
constructor
Returns a new Regexp object with a compiled version of
patternstored inside. - #initialize_copy(other) ⇒ Object
-
#inspect ⇒ String
Returns a printable version of the regular expression.
-
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literaloption set totrue. -
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errorsoption set totrue. -
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_matchoption set totrue. - #match(*args) ⇒ Object
-
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch. -
#max_mem ⇒ Integer
Returns the
max_memsetting for the regular expression. -
#named_captures ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#names ⇒ Array<String>
Returns an array of names of all named capturing groups.
-
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nloption set totrue. -
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp wasn't valid on construction.
-
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
-
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_lineoption set totrue. -
#options ⇒ Hash
Returns a hash of the options currently set for the Regexp.
-
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given
textand return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch. -
#pattern ⇒ String
Returns a string version of the regular expression.
-
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the perl_classes option set to
true. -
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntaxoption set totrue. -
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost".
-
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with
FindAndConsume. -
#source ⇒ String
Returns a string version of the regular expression.
-
#to_s ⇒ String
Returns a string version of the regular expression.
-
#to_str ⇒ String
Returns a string version of the regular expression.
-
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
utf8option set totrue. -
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundaryoption set totrue.
Constructor Details
#initialize(pattern) ⇒ RE2::Regexp #initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern stored inside.
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 |
# File 'ext/re2/re2.cc', line 1221
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
VALUE pattern, options;
re2_pattern *p;
rb_scan_args(argc, argv, "11", &pattern, &options);
/* Ensure pattern is a string. */
StringValue(pattern);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
if (p->pattern) {
delete p->pattern;
}
if (RTEST(options)) {
RE2::Options re2_options;
parse_re2_options(&re2_options, options);
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), re2_options);
} else {
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
}
if (p->pattern == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
return self;
}
|
Class Method Details
.initialize(pattern) ⇒ RE2::Regexp .initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern stored inside.
.escape(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters
escaped using
QuoteMeta. The returned string, used as a regular expression, will
exactly match the original string.
2160 2161 2162 2163 2164 2165 2166 2167 |
# File 'ext/re2/re2.cc', line 2160
static VALUE re2_escape(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
.match_has_endpos_argument? ⇒ Boolean
1965 1966 1967 1968 1969 1970 1971 |
# File 'ext/re2/re2.cc', line 1965 static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) { #ifdef HAVE_ENDPOS_ARGUMENT return Qtrue; #else return Qfalse; #endif } |
.quote(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters
escaped using
QuoteMeta. The returned string, used as a regular expression, will
exactly match the original string.
2160 2161 2162 2163 2164 2165 2166 2167 |
# File 'ext/re2/re2.cc', line 2160
static VALUE re2_escape(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
Instance Method Details
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch.
1884 1885 1886 1887 1888 1889 1890 1891 1892 |
# File 'ext/re2/re2.cc', line 1884
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch.
1884 1885 1886 1887 1888 1889 1890 1891 1892 |
# File 'ext/re2/re2.cc', line 1884
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to false.
1460 1461 1462 |
# File 'ext/re2/re2.cc', line 1460 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to true.
1444 1445 1446 1447 1448 |
# File 'ext/re2/re2.cc', line 1444
static VALUE re2_regexp_case_sensitive(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().case_sensitive());
}
|
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive option set to false.
1460 1461 1462 |
# File 'ext/re2/re2.cc', line 1460 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#error ⇒ String?
If the RE2::Regexp could not be created properly, returns an error string
otherwise returns nil.
1515 1516 1517 1518 1519 1520 1521 1522 1523 |
# File 'ext/re2/re2.cc', line 1515
static VALUE re2_regexp_error(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
if (p->pattern->ok()) {
return Qnil;
} else {
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
}
}
|
#error_arg ⇒ String?
If the RE2::Regexp could not be created properly, returns
the offending portion of the regexp otherwise returns nil.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 |
# File 'ext/re2/re2.cc', line 1535
static VALUE re2_regexp_error_arg(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
if (p->pattern->ok()) {
return Qnil;
} else {
return encoded_str_new(p->pattern->error_arg().data(),
p->pattern->error_arg().size(),
p->pattern->options().encoding());
}
}
|
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given text exactly and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
68 69 70 |
# File 'lib/re2/regexp.rb', line 68 def full_match(text, = {}) match(text, Hash().merge(anchor: :anchor_both)) end |
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using
FullMatch.
1903 1904 1905 1906 1907 1908 1909 1910 1911 |
# File 'ext/re2/re2.cc', line 1903
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(RE2::FullMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#initialize_copy(other) ⇒ Object
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 |
# File 'ext/re2/re2.cc', line 1254
static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
re2_pattern *self_p;
re2_pattern *other_p = unwrap_re2_regexp(other);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
if (self_p->pattern) {
delete self_p->pattern;
}
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
other_p->pattern->options());
if (self_p->pattern == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
return self;
}
|
#inspect ⇒ String
Returns a printable version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is
undefined).
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 |
# File 'ext/re2/re2.cc', line 1286
static VALUE re2_regexp_inspect(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
std::ostringstream output;
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literal option set to true.
1414 1415 1416 1417 1418 |
# File 'ext/re2/re2.cc', line 1414
static VALUE re2_regexp_literal(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().literal());
}
|
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errors option set to true.
1385 1386 1387 1388 1389 |
# File 'ext/re2/re2.cc', line 1385
static VALUE re2_regexp_log_errors(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().log_errors());
}
|
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_match option set to true.
1370 1371 1372 1373 1374 |
# File 'ext/re2/re2.cc', line 1370
static VALUE re2_regexp_longest_match(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().longest_match());
}
|
#match(text) ⇒ RE2::MatchData, ... #match(text, options) ⇒ RE2::MatchData, ... #match(text, submatches) ⇒ RE2::MatchData, ...
General matching: match the pattern against the given text using
Match and return a MatchData instance with the specified number of
submatches (defaults to the total number of capturing groups) or a boolean
(if no submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 |
# File 'ext/re2/re2.cc', line 1727
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
re2_pattern *p;
re2_matchdata *m;
VALUE text, options;
rb_scan_args(argc, argv, "11", &text, &options);
/* Ensure text is a string. */
StringValue(text);
p = unwrap_re2_regexp(self);
int n;
int startpos = 0;
int endpos = RSTRING_LEN(text);
RE2::Anchor anchor = RE2::UNANCHORED;
if (RTEST(options)) {
if (RB_INTEGER_TYPE_P(options)) {
n = NUM2INT(options);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (TYPE(options) != T_HASH) {
options = rb_Hash(options);
}
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
if (!NIL_P(endpos_option)) {
#ifdef HAVE_ENDPOS_ARGUMENT
endpos = NUM2INT(endpos_option);
if (endpos < 0) {
rb_raise(rb_eArgError, "endpos should be >= 0");
}
#else
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
#endif
}
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
if (!NIL_P(anchor_option)) {
Check_Type(anchor_option, T_SYMBOL);
ID id_anchor_option = SYM2ID(anchor_option);
if (id_anchor_option == id_unanchored) {
anchor = RE2::UNANCHORED;
} else if (id_anchor_option == id_anchor_start) {
anchor = RE2::ANCHOR_START;
} else if (id_anchor_option == id_anchor_both) {
anchor = RE2::ANCHOR_BOTH;
} else {
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
}
}
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
if (!NIL_P(submatches_option)) {
n = NUM2INT(submatches_option);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
if (!NIL_P(startpos_option)) {
startpos = NUM2INT(startpos_option);
if (startpos < 0) {
rb_raise(rb_eArgError, "startpos should be >= 0");
}
}
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
if (startpos > endpos) {
rb_raise(rb_eArgError, "startpos should be <= endpos");
}
if (n == 0) {
#ifdef HAVE_ENDPOS_ARGUMENT
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, endpos, anchor, 0, 0);
#else
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, anchor, 0, 0);
#endif
return BOOL2RUBY(matched);
} else {
if (n == INT_MAX) {
rb_raise(rb_eRangeError, "number of matches should be < %d", INT_MAX);
}
/* Because match returns the whole match as well. */
n += 1;
re2::StringPiece *matches = new(std::nothrow) re2::StringPiece[n];
if (matches == 0) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPieces for matches");
}
text = rb_str_new_frozen(text);
#ifdef HAVE_ENDPOS_ARGUMENT
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, endpos, anchor, matches, n);
#else
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, anchor, matches, n);
#endif
if (matched) {
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
RB_OBJ_WRITE(matchdata, &m->regexp, self);
RB_OBJ_WRITE(matchdata, &m->text, text);
m->matches = matches;
m->number_of_matches = n;
return matchdata;
} else {
delete[] matches;
return Qnil;
}
}
}
|
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch.
1884 1885 1886 1887 1888 1889 1890 1891 1892 |
# File 'ext/re2/re2.cc', line 1884
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#max_mem ⇒ Integer
Returns the max_mem setting for the regular expression.
1399 1400 1401 1402 1403 |
# File 'ext/re2/re2.cc', line 1399
static VALUE re2_regexp_max_mem(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->options().max_mem());
}
|
#named_captures ⇒ Hash
Returns a hash of names to capturing indices of groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 |
# File 'ext/re2/re2.cc', line 1630
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
encoded_str_new(it->first.data(), it->first.size(),
p->pattern->options().encoding()),
INT2FIX(it->second));
}
return capturing_groups;
}
|
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 |
# File 'ext/re2/re2.cc', line 1630
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
encoded_str_new(it->first.data(), it->first.size(),
p->pattern->options().encoding()),
INT2FIX(it->second));
}
return capturing_groups;
}
|
#names ⇒ Array<String>
Returns an array of names of all named capturing groups. Names are returned in alphabetical order rather than definition order, as RE2 stores named groups internally in a sorted map.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
296 297 298 299 300 301 302 303 304 305 306 307 308 309 |
# File 'ext/re2/re2.cc', line 296
static VALUE re2_regexp_names(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE names = rb_ary_new2(groups.size());
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_ary_push(names,
encoded_str_new(it->first.data(), it->first.size(),
p->pattern->options().encoding()));
}
return names;
}
|
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nl option set to true.
1429 1430 1431 1432 1433 |
# File 'ext/re2/re2.cc', line 1429
static VALUE re2_regexp_never_nl(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().never_nl());
}
|
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp
wasn't valid on construction. The overall match ($0) does not
count: if the regexp is "(a)(b)", returns 2.
1615 1616 1617 1618 1619 |
# File 'ext/re2/re2.cc', line 1615
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
|
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
1325 1326 1327 1328 1329 |
# File 'ext/re2/re2.cc', line 1325
static VALUE re2_regexp_ok(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->ok());
}
|
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_line option set to true.
1503 1504 1505 1506 1507 |
# File 'ext/re2/re2.cc', line 1503
static VALUE re2_regexp_one_line(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().one_line());
}
|
#options ⇒ Hash
Returns a hash of the options currently set for the RE2::Regexp.
1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 |
# File 'ext/re2/re2.cc', line 1565
static VALUE re2_regexp_options(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
VALUE options = rb_hash_new();
rb_hash_aset(options, ID2SYM(id_utf8),
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
rb_hash_aset(options, ID2SYM(id_posix_syntax),
BOOL2RUBY(p->pattern->options().posix_syntax()));
rb_hash_aset(options, ID2SYM(id_longest_match),
BOOL2RUBY(p->pattern->options().longest_match()));
rb_hash_aset(options, ID2SYM(id_log_errors),
BOOL2RUBY(p->pattern->options().log_errors()));
rb_hash_aset(options, ID2SYM(id_max_mem),
INT2FIX(p->pattern->options().max_mem()));
rb_hash_aset(options, ID2SYM(id_literal),
BOOL2RUBY(p->pattern->options().literal()));
rb_hash_aset(options, ID2SYM(id_never_nl),
BOOL2RUBY(p->pattern->options().never_nl()));
rb_hash_aset(options, ID2SYM(id_case_sensitive),
BOOL2RUBY(p->pattern->options().case_sensitive()));
rb_hash_aset(options, ID2SYM(id_perl_classes),
BOOL2RUBY(p->pattern->options().perl_classes()));
rb_hash_aset(options, ID2SYM(id_word_boundary),
BOOL2RUBY(p->pattern->options().word_boundary()));
rb_hash_aset(options, ID2SYM(id_one_line),
BOOL2RUBY(p->pattern->options().one_line()));
/* This is a read-only hash after all... */
rb_obj_freeze(options);
return options;
}
|
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given text and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
39 40 41 |
# File 'lib/re2/regexp.rb', line 39 def partial_match(text, = {}) match(text, Hash().merge(anchor: :unanchored)) end |
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch.
1884 1885 1886 1887 1888 1889 1890 1891 1892 |
# File 'ext/re2/re2.cc', line 1884
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#pattern ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1309 1310 1311 1312 1313 1314 1315 |
# File 'ext/re2/re2.cc', line 1309
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
perl_classes option set to true.
1473 1474 1475 1476 1477 |
# File 'ext/re2/re2.cc', line 1473
static VALUE re2_regexp_perl_classes(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().perl_classes());
}
|
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntax option set to true.
1355 1356 1357 1358 1359 |
# File 'ext/re2/re2.cc', line 1355
static VALUE re2_regexp_posix_syntax(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().posix_syntax());
}
|
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost". Larger numbers are more expensive than smaller numbers.
1554 1555 1556 1557 1558 |
# File 'ext/re2/re2.cc', line 1554
static VALUE re2_regexp_program_size(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return INT2FIX(p->pattern->ProgramSize());
}
|
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with
FindAndConsume.
1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 |
# File 'ext/re2/re2.cc', line 1925
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p = unwrap_re2_regexp(self);
re2_scanner *c;
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
RB_OBJ_WRITE(scanner, &c->regexp, self);
RB_OBJ_WRITE(scanner, &c->text, rb_str_new_frozen(text));
c->input = new(std::nothrow) re2::StringPiece(
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
if (c->input == 0) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPiece for input");
}
if (p->pattern->ok()) {
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
} else {
c->number_of_capturing_groups = 0;
}
c->eof = false;
return scanner;
}
|
#source ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1309 1310 1311 1312 1313 1314 1315 |
# File 'ext/re2/re2.cc', line 1309
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_s ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1309 1310 1311 1312 1313 1314 1315 |
# File 'ext/re2/re2.cc', line 1309
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_str ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1309 1310 1311 1312 1313 1314 1315 |
# File 'ext/re2/re2.cc', line 1309
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the utf8
option set to true.
1340 1341 1342 1343 1344 |
# File 'ext/re2/re2.cc', line 1340
static VALUE re2_regexp_utf8(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
}
|
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundary option set to true.
1488 1489 1490 1491 1492 |
# File 'ext/re2/re2.cc', line 1488
static VALUE re2_regexp_word_boundary(const VALUE self) {
re2_pattern *p = unwrap_re2_regexp(self);
return BOOL2RUBY(p->pattern->options().word_boundary());
}
|