Class: RE2::Regexp
Class Method Summary collapse
-
.compile ⇒ Object
Returns a new Regexp object with a compiled version of
pattern
stored inside. -
.escape(unquoted) ⇒ String
Returns a version of
str
with all potentially meaningful regexp characters escaped usingQuoteMeta
. -
.match_has_endpos_argument? ⇒ Boolean
Returns whether the underlying RE2 version supports passing an
endpos
argument to Match. -
.quote(unquoted) ⇒ String
Returns a version of
str
with all potentially meaningful regexp characters escaped usingQuoteMeta
.
Instance Method Summary collapse
-
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
. -
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
. -
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set tofalse
. -
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set totrue
. -
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set tofalse
. -
#error ⇒ String?
If the Regexp could not be created properly, returns an error string otherwise returns
nil
. -
#error_arg ⇒ String?
If the Regexp could not be created properly, returns the offending portion of the regexp otherwise returns
nil
. -
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given
text
exactly and return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using
FullMatch
. -
#initialize(*args) ⇒ Object
constructor
Returns a new Regexp object with a compiled version of
pattern
stored inside. -
#inspect ⇒ String
Returns a printable version of the regular expression.
-
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literal
option set totrue
. -
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errors
option set totrue
. -
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_match
option set totrue
. - #match(*args) ⇒ Object
-
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
. -
#max_mem ⇒ Integer
Returns the
max_mem
setting for the regular expression. -
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nl
option set totrue
. -
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp wasn't valid on construction.
-
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
-
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_line
option set totrue
. -
#options ⇒ Hash
Returns a hash of the options currently set for the Regexp.
-
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given
text
and return a MatchData instance with the specified number of submatches (defaults to the total number of capturing groups) or a boolean (if no submatches are required). -
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
. -
#pattern ⇒ String
Returns a string version of the regular expression.
-
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the perl_classes option set to
true
. -
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntax
option set totrue
. -
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost".
-
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with
FindAndConsume
. -
#source ⇒ String
Returns a string version of the regular expression.
-
#to_s ⇒ String
Returns a string version of the regular expression.
-
#to_str ⇒ String
Returns a string version of the regular expression.
-
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
utf8
option set totrue
. -
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundary
option set totrue
.
Constructor Details
#initialize(pattern) ⇒ RE2::Regexp #initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern
stored inside.
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 |
# File 'ext/re2/re2.cc', line 912
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
VALUE pattern, options;
re2_pattern *p;
rb_scan_args(argc, argv, "11", &pattern, &options);
/* Ensure pattern is a string. */
StringValue(pattern);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
if (RTEST(options)) {
RE2::Options re2_options;
parse_re2_options(&re2_options, options);
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), re2_options);
} else {
p->pattern = new(std::nothrow) RE2(
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
}
if (p->pattern == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
return self;
}
|
Class Method Details
.initialize(pattern) ⇒ RE2::Regexp .initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of
pattern
stored inside.
.escape(unquoted) ⇒ String
Returns a version of str
with all potentially meaningful regexp characters
escaped using
QuoteMeta
. The returned string, used as a regular expression, will
exactly match the original string.
1783 1784 1785 1786 1787 1788 1789 1790 |
# File 'ext/re2/re2.cc', line 1783
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
.match_has_endpos_argument? ⇒ Boolean
1656 1657 1658 1659 1660 1661 1662 |
# File 'ext/re2/re2.cc', line 1656 static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) { #ifdef HAVE_ENDPOS_ARGUMENT return Qtrue; #else return Qfalse; #endif } |
.quote(unquoted) ⇒ String
Returns a version of str
with all potentially meaningful regexp characters
escaped using
QuoteMeta
. The returned string, used as a regular expression, will
exactly match the original string.
1783 1784 1785 1786 1787 1788 1789 1790 |
# File 'ext/re2/re2.cc', line 1783
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
StringValue(unquoted);
std::string quoted_string = RE2::QuoteMeta(
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
Instance Method Details
#===(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
.
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
# File 'ext/re2/re2.cc', line 1574
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
re2_pattern *p;
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#=~(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
.
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
# File 'ext/re2/re2.cc', line 1574
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
re2_pattern *p;
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set to false
.
1140 1141 1142 |
# File 'ext/re2/re2.cc', line 1140 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set to true
.
1123 1124 1125 1126 1127 1128 |
# File 'ext/re2/re2.cc', line 1123
static VALUE re2_regexp_case_sensitive(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().case_sensitive());
}
|
#casefold? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
case_sensitive
option set to false
.
1140 1141 1142 |
# File 'ext/re2/re2.cc', line 1140 static VALUE re2_regexp_case_insensitive(const VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#error ⇒ String?
If the RE2::Regexp could not be created properly, returns an error string
otherwise returns nil
.
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 |
# File 'ext/re2/re2.cc', line 1198
static VALUE re2_regexp_error(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
if (p->pattern->ok()) {
return Qnil;
} else {
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
}
}
|
#error_arg ⇒ String?
If the RE2::Regexp could not be created properly, returns
the offending portion of the regexp otherwise returns nil
.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 |
# File 'ext/re2/re2.cc', line 1219
static VALUE re2_regexp_error_arg(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
if (p->pattern->ok()) {
return Qnil;
} else {
return encoded_str_new(p->pattern->error_arg().data(),
p->pattern->error_arg().size(),
p->pattern->options().encoding());
}
}
|
#full_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against the given text
exactly and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
68 69 70 |
# File 'lib/re2/regexp.rb', line 68 def full_match(text, = {}) match(text, Hash().merge(anchor: :anchor_both)) end |
#full_match?(text) ⇒ Boolean
Returns true if the pattern matches the given text using
FullMatch
.
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 |
# File 'ext/re2/re2.cc', line 1594
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
re2_pattern *p;
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(RE2::FullMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#inspect ⇒ String
Returns a printable version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is
undefined).
954 955 956 957 958 959 960 961 962 963 964 965 |
# File 'ext/re2/re2.cc', line 954
static VALUE re2_regexp_inspect(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
std::ostringstream output;
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#literal? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
literal
option set to true
.
1091 1092 1093 1094 1095 1096 |
# File 'ext/re2/re2.cc', line 1091
static VALUE re2_regexp_literal(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().literal());
}
|
#log_errors? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
log_errors
option set to true
.
1060 1061 1062 1063 1064 1065 |
# File 'ext/re2/re2.cc', line 1060
static VALUE re2_regexp_log_errors(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().log_errors());
}
|
#longest_match? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
longest_match
option set to true
.
1044 1045 1046 1047 1048 1049 |
# File 'ext/re2/re2.cc', line 1044
static VALUE re2_regexp_longest_match(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().longest_match());
}
|
#match(text) ⇒ RE2::MatchData, ... #match(text, options) ⇒ RE2::MatchData, ... #match(text, submatches) ⇒ RE2::MatchData, ...
General matching: match the pattern against the given text
using
Match
and return a MatchData instance with the specified number of
submatches (defaults to the total number of capturing groups) or a boolean
(if no submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 |
# File 'ext/re2/re2.cc', line 1418
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
re2_pattern *p;
re2_matchdata *m;
VALUE text, options;
rb_scan_args(argc, argv, "11", &text, &options);
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
int n;
int startpos = 0;
int endpos = RSTRING_LEN(text);
RE2::Anchor anchor = RE2::UNANCHORED;
if (RTEST(options)) {
if (FIXNUM_P(options)) {
n = NUM2INT(options);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (TYPE(options) != T_HASH) {
options = rb_Hash(options);
}
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
if (!NIL_P(endpos_option)) {
#ifdef HAVE_ENDPOS_ARGUMENT
Check_Type(endpos_option, T_FIXNUM);
endpos = NUM2INT(endpos_option);
if (endpos < 0) {
rb_raise(rb_eArgError, "endpos should be >= 0");
}
#else
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
#endif
}
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
if (!NIL_P(anchor_option)) {
Check_Type(anchor_option, T_SYMBOL);
ID id_anchor_option = SYM2ID(anchor_option);
if (id_anchor_option == id_unanchored) {
anchor = RE2::UNANCHORED;
} else if (id_anchor_option == id_anchor_start) {
anchor = RE2::ANCHOR_START;
} else if (id_anchor_option == id_anchor_both) {
anchor = RE2::ANCHOR_BOTH;
} else {
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
}
}
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
if (!NIL_P(submatches_option)) {
Check_Type(submatches_option, T_FIXNUM);
n = NUM2INT(submatches_option);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
if (!NIL_P(startpos_option)) {
Check_Type(startpos_option, T_FIXNUM);
startpos = NUM2INT(startpos_option);
if (startpos < 0) {
rb_raise(rb_eArgError, "startpos should be >= 0");
}
}
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
if (startpos > endpos) {
rb_raise(rb_eArgError, "startpos should be <= endpos");
}
if (n == 0) {
#ifdef HAVE_ENDPOS_ARGUMENT
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, endpos, anchor, 0, 0);
#else
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
startpos, anchor, 0, 0);
#endif
return BOOL2RUBY(matched);
} else {
/* Because match returns the whole match as well. */
n += 1;
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
m->matches = new(std::nothrow) re2::StringPiece[n];
RB_OBJ_WRITE(matchdata, &m->regexp, self);
if (!RTEST(rb_obj_frozen_p(text))) {
text = rb_str_freeze(rb_str_dup(text));
}
RB_OBJ_WRITE(matchdata, &m->text, text);
if (m->matches == 0) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPieces for matches");
}
m->number_of_matches = n;
#ifdef HAVE_ENDPOS_ARGUMENT
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
startpos, endpos, anchor, m->matches, n);
#else
bool matched = p->pattern->Match(
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
startpos, anchor, m->matches, n);
#endif
if (matched) {
return matchdata;
} else {
return Qnil;
}
}
}
|
#match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
.
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
# File 'ext/re2/re2.cc', line 1574
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
re2_pattern *p;
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#max_mem ⇒ Integer
Returns the max_mem
setting for the regular expression.
1075 1076 1077 1078 1079 1080 |
# File 'ext/re2/re2.cc', line 1075
static VALUE re2_regexp_max_mem(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return INT2FIX(p->pattern->options().max_mem());
}
|
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 |
# File 'ext/re2/re2.cc', line 1319
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
encoded_str_new(it->first.data(), it->first.size(),
p->pattern->options().encoding()),
INT2FIX(it->second));
}
return capturing_groups;
}
|
#never_nl? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
never_nl
option set to true
.
1107 1108 1109 1110 1111 1112 |
# File 'ext/re2/re2.cc', line 1107
static VALUE re2_regexp_never_nl(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().never_nl());
}
|
#number_of_capturing_groups ⇒ Integer
Returns the number of capturing subpatterns, or -1 if the regexp
wasn't valid on construction. The overall match ($0
) does not
count: if the regexp is "(a)(b)"
, returns 2.
1303 1304 1305 1306 1307 1308 |
# File 'ext/re2/re2.cc', line 1303
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
|
#ok? ⇒ Boolean
Returns whether or not the regular expression was compiled successfully.
996 997 998 999 1000 1001 |
# File 'ext/re2/re2.cc', line 996
static VALUE re2_regexp_ok(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->ok());
}
|
#one_line? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
one_line
option set to true
.
1185 1186 1187 1188 1189 1190 |
# File 'ext/re2/re2.cc', line 1185
static VALUE re2_regexp_one_line(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().one_line());
}
|
#options ⇒ Hash
Returns a hash of the options currently set for the RE2::Regexp.
1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 |
# File 'ext/re2/re2.cc', line 1251
static VALUE re2_regexp_options(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
VALUE options = rb_hash_new();
rb_hash_aset(options, ID2SYM(id_utf8),
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
rb_hash_aset(options, ID2SYM(id_posix_syntax),
BOOL2RUBY(p->pattern->options().posix_syntax()));
rb_hash_aset(options, ID2SYM(id_longest_match),
BOOL2RUBY(p->pattern->options().longest_match()));
rb_hash_aset(options, ID2SYM(id_log_errors),
BOOL2RUBY(p->pattern->options().log_errors()));
rb_hash_aset(options, ID2SYM(id_max_mem),
INT2FIX(p->pattern->options().max_mem()));
rb_hash_aset(options, ID2SYM(id_literal),
BOOL2RUBY(p->pattern->options().literal()));
rb_hash_aset(options, ID2SYM(id_never_nl),
BOOL2RUBY(p->pattern->options().never_nl()));
rb_hash_aset(options, ID2SYM(id_case_sensitive),
BOOL2RUBY(p->pattern->options().case_sensitive()));
rb_hash_aset(options, ID2SYM(id_perl_classes),
BOOL2RUBY(p->pattern->options().perl_classes()));
rb_hash_aset(options, ID2SYM(id_word_boundary),
BOOL2RUBY(p->pattern->options().word_boundary()));
rb_hash_aset(options, ID2SYM(id_one_line),
BOOL2RUBY(p->pattern->options().one_line()));
/* This is a read-only hash after all... */
rb_obj_freeze(options);
return options;
}
|
#partial_match(text, options = {}) ⇒ RE2::MatchData, ...
Match the pattern against any substring of the given text
and return a
MatchData instance with the specified number of submatches
(defaults to the total number of capturing groups) or a boolean (if no
submatches are required).
The number of submatches has a significant impact on performance: requesting one submatch is much faster than requesting more than one and requesting zero submatches is faster still.
39 40 41 |
# File 'lib/re2/regexp.rb', line 39 def partial_match(text, = {}) match(text, Hash().merge(anchor: :unanchored)) end |
#partial_match?(text) ⇒ Boolean
Returns true if the pattern matches any substring of the given text using
PartialMatch
.
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
# File 'ext/re2/re2.cc', line 1574
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
re2_pattern *p;
/* Ensure text is a string. */
StringValue(text);
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(RE2::PartialMatch(
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
}
|
#pattern ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
979 980 981 982 983 984 985 986 |
# File 'ext/re2/re2.cc', line 979
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
perl_classes option set to true
.
1153 1154 1155 1156 1157 1158 |
# File 'ext/re2/re2.cc', line 1153
static VALUE re2_regexp_perl_classes(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().perl_classes());
}
|
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
posix_syntax
option set to true
.
1028 1029 1030 1031 1032 1033 |
# File 'ext/re2/re2.cc', line 1028
static VALUE re2_regexp_posix_syntax(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().posix_syntax());
}
|
#program_size ⇒ Integer
Returns the program size, a very approximate measure of a regexp's "cost". Larger numbers are more expensive than smaller numbers.
1239 1240 1241 1242 1243 1244 |
# File 'ext/re2/re2.cc', line 1239
static VALUE re2_regexp_program_size(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return INT2FIX(p->pattern->ProgramSize());
}
|
#scan(text) ⇒ RE2::Scanner
Returns a Scanner for scanning the given text incrementally with
FindAndConsume
.
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 |
# File 'ext/re2/re2.cc', line 1618
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
/* Ensure text is a string. */
StringValue(text);
re2_pattern *p;
re2_scanner *c;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
c->input = new(std::nothrow) re2::StringPiece(
RSTRING_PTR(text), RSTRING_LEN(text));
RB_OBJ_WRITE(scanner, &c->regexp, self);
RB_OBJ_WRITE(scanner, &c->text, text);
if (p->pattern->ok()) {
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
} else {
c->number_of_capturing_groups = 0;
}
c->eof = false;
return scanner;
}
|
#source ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
979 980 981 982 983 984 985 986 |
# File 'ext/re2/re2.cc', line 979
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_s ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
979 980 981 982 983 984 985 986 |
# File 'ext/re2/re2.cc', line 979
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#to_str ⇒ String
Returns a string version of the regular expression.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
RE2::Regexp is set to false
(any other encoding's behaviour is undefined).
979 980 981 982 983 984 985 986 |
# File 'ext/re2/re2.cc', line 979
static VALUE re2_regexp_to_s(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return encoded_str_new(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding());
}
|
#utf8? ⇒ Boolean
Returns whether or not the regular expression was compiled with the utf8
option set to true
.
1012 1013 1014 1015 1016 1017 |
# File 'ext/re2/re2.cc', line 1012
static VALUE re2_regexp_utf8(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
}
|
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression was compiled with the
word_boundary
option set to true
.
1169 1170 1171 1172 1173 1174 |
# File 'ext/re2/re2.cc', line 1169
static VALUE re2_regexp_word_boundary(const VALUE self) {
re2_pattern *p;
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
return BOOL2RUBY(p->pattern->options().word_boundary());
}
|