Class: RE2::Regexp
- Inherits:
-
Object
- Object
- RE2::Regexp
- Defined in:
- ext/re2/re2.cc
Class Method Summary collapse
- .compile ⇒ Object
-
.escape(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters escaped.
-
.quote(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters escaped.
Instance Method Summary collapse
-
#===(text) ⇒ Boolean
Returns true or false to indicate a successful match.
-
#=~(text) ⇒ Boolean
Returns true or false to indicate a successful match.
-
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the case_sensitive option set to false. -
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the case_sensitive option set to true. -
#casefold? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the case_sensitive option set to false. -
#error ⇒ String?
If the RE2 could not be created properly, returns an error string otherwise returns nil.
-
#error_arg ⇒ String?
If the RE2 could not be created properly, returns the offending portion of the regexp otherwise returns nil.
-
#initialize(*args) ⇒ RE2::Regexp
constructor
Returns a new Regexp object with a compiled version of
pattern
stored inside. -
#inspect ⇒ String
Returns a printable version of the regular expression
re2
. -
#literal? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the literal option set to true. -
#log_errors? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the log_errors option set to true. -
#longest_match? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the longest_match option set to true. -
#match(*args) ⇒ Boolean, RE2::MatchData
Match the pattern against the given
text
and return either a boolean (if no submatches are required) or a MatchData instance. -
#match?(text) ⇒ Boolean
Returns true or false to indicate a successful match.
-
#max_mem ⇒ Fixnum
Returns the max_mem setting for the regular expression
re2
. -
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
-
#never_nl? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the never_nl option set to true. -
#number_of_capturing_groups ⇒ Fixnum
Returns the number of capturing subpatterns, or -1 if the regexp wasn’t valid on construction.
-
#ok? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled successfully or not. -
#one_line? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the one_line option set to true. -
#options ⇒ Hash
Returns a hash of the options currently set for
re2
. -
#pattern ⇒ String
Returns a string version of the regular expression
re2
. -
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the perl_classes option set to true. -
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the posix_syntax option set to true. -
#program_size ⇒ Fixnum
Returns the program size, a very approximate measure of a regexp’s “cost”.
-
#scan(text) ⇒ Object
Returns a Scanner for scanning the given text incrementally.
-
#source ⇒ String
Returns a string version of the regular expression
re2
. -
#to_s ⇒ String
Returns a string version of the regular expression
re2
. -
#to_str ⇒ String
Returns a string version of the regular expression
re2
. -
#utf8? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the utf8 option set to true. -
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression
re2
was compiled with the word_boundary option set to true.
Constructor Details
#initialize(pattern) ⇒ RE2::Regexp #initialize(pattern, options) ⇒ RE2::Regexp
Returns a new RE2::Regexp object with a compiled version of pattern
stored inside.
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 |
# File 'ext/re2/re2.cc', line 843
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
VALUE pattern, options;
re2_pattern *p;
rb_scan_args(argc, argv, "11", &pattern, &options);
Data_Get_Struct(self, re2_pattern, p);
if (RTEST(options)) {
RE2::Options re2_options;
parse_re2_options(re2_options, options);
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
} else {
p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
}
if (p->pattern == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
return self;
}
|
Class Method Details
.compile ⇒ Object
.escape(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.
1473 1474 1475 1476 1477 |
# File 'ext/re2/re2.cc', line 1473
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
UNUSED(self);
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
.quote(unquoted) ⇒ String
Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.
1473 1474 1475 1476 1477 |
# File 'ext/re2/re2.cc', line 1473
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
UNUSED(self);
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
|
Instance Method Details
#===(text) ⇒ Boolean
Returns true or false to indicate a successful match. Equivalent to re2.match(text, 0).
1351 1352 1353 1354 1355 1356 1357 |
# File 'ext/re2/re2.cc', line 1351
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
VALUE argv[2];
argv[0] = text;
argv[1] = INT2FIX(0);
return re2_regexp_match(2, argv, self);
}
|
#=~(text) ⇒ Boolean
Returns true or false to indicate a successful match. Equivalent to re2.match(text, 0).
1351 1352 1353 1354 1355 1356 1357 |
# File 'ext/re2/re2.cc', line 1351
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
VALUE argv[2];
argv[0] = text;
argv[1] = INT2FIX(0);
return re2_regexp_match(2, argv, self);
}
|
#case_insensitive? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the case_sensitive option set to false.
1050 1051 1052 |
# File 'ext/re2/re2.cc', line 1050 static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#case_sensitive? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the case_sensitive option set to true.
1034 1035 1036 1037 1038 |
# File 'ext/re2/re2.cc', line 1034
static VALUE re2_regexp_case_sensitive(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().case_sensitive());
}
|
#casefold? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the case_sensitive option set to false.
1050 1051 1052 |
# File 'ext/re2/re2.cc', line 1050 static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } |
#error ⇒ String?
If the RE2 could not be created properly, returns an error string otherwise returns nil.
1105 1106 1107 1108 1109 1110 1111 1112 1113 |
# File 'ext/re2/re2.cc', line 1105
static VALUE re2_regexp_error(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
if (p->pattern->ok()) {
return Qnil;
} else {
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
}
}
|
#error_arg ⇒ String?
If the RE2 could not be created properly, returns the offending portion of the regexp otherwise returns nil.
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 |
# File 'ext/re2/re2.cc', line 1121
static VALUE re2_regexp_error_arg(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
if (p->pattern->ok()) {
return Qnil;
} else {
return ENCODED_STR_NEW(p->pattern->error_arg().data(),
p->pattern->error_arg().size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
}
}
|
#inspect ⇒ String
Returns a printable version of the regular expression re2
.
874 875 876 877 878 879 880 881 882 883 884 885 886 887 |
# File 'ext/re2/re2.cc', line 874
static VALUE re2_regexp_inspect(VALUE self) {
re2_pattern *p;
VALUE result;
ostringstream output;
Data_Get_Struct(self, re2_pattern, p);
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
return result;
}
|
#literal? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the literal option set to true.
1004 1005 1006 1007 1008 |
# File 'ext/re2/re2.cc', line 1004
static VALUE re2_regexp_literal(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().literal());
}
|
#log_errors? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the log_errors option set to true.
974 975 976 977 978 |
# File 'ext/re2/re2.cc', line 974
static VALUE re2_regexp_log_errors(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().log_errors());
}
|
#longest_match? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the longest_match option set to true.
959 960 961 962 963 |
# File 'ext/re2/re2.cc', line 959
static VALUE re2_regexp_longest_match(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().longest_match());
}
|
#match(text) ⇒ RE2::MatchData #match(text, 0) ⇒ Boolean #match(text, number_of_matches) ⇒ RE2::MatchData
Match the pattern against the given text
and return either a boolean (if no submatches are required) or a MatchData instance.
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 |
# File 'ext/re2/re2.cc', line 1282
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
int n;
bool matched;
re2_pattern *p;
re2_matchdata *m;
VALUE text, number_of_matches, matchdata;
rb_scan_args(argc, argv, "11", &text, &number_of_matches);
/* Ensure text is a string. */
text = StringValue(text);
Data_Get_Struct(self, re2_pattern, p);
if (RTEST(number_of_matches)) {
n = NUM2INT(number_of_matches);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");
}
} else {
if (!p->pattern->ok()) {
return Qnil;
}
n = p->pattern->NumberOfCapturingGroups();
}
if (n == 0) {
matched = match(p->pattern, StringValuePtr(text), 0,
static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
return BOOL2RUBY(matched);
} else {
/* Because match returns the whole match as well. */
n += 1;
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
Data_Get_Struct(matchdata, re2_matchdata, m);
m->matches = new(nothrow) re2::StringPiece[n];
m->regexp = self;
m->text = rb_str_dup(text);
rb_str_freeze(m->text);
if (m->matches == 0) {
rb_raise(rb_eNoMemError,
"not enough memory to allocate StringPieces for matches");
}
m->number_of_matches = n;
matched = match(p->pattern, StringValuePtr(m->text), 0,
static_cast<int>(RSTRING_LEN(m->text)),
RE2::UNANCHORED, m->matches, n);
if (matched) {
return matchdata;
} else {
return Qnil;
}
}
}
|
#match?(text) ⇒ Boolean
Returns true or false to indicate a successful match. Equivalent to re2.match(text, 0).
1351 1352 1353 1354 1355 1356 1357 |
# File 'ext/re2/re2.cc', line 1351
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
VALUE argv[2];
argv[0] = text;
argv[1] = INT2FIX(0);
return re2_regexp_match(2, argv, self);
}
|
#max_mem ⇒ Fixnum
Returns the max_mem setting for the regular expression re2
.
989 990 991 992 993 |
# File 'ext/re2/re2.cc', line 989
static VALUE re2_regexp_max_mem(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return INT2FIX(p->pattern->options().max_mem());
}
|
#named_capturing_groups ⇒ Hash
Returns a hash of names to capturing indices of groups.
1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 |
# File 'ext/re2/re2.cc', line 1217
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
VALUE capturing_groups;
re2_pattern *p;
map<string, int> groups;
map<string, int>::iterator iterator;
Data_Get_Struct(self, re2_pattern, p);
groups = p->pattern->NamedCapturingGroups();
capturing_groups = rb_hash_new();
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
rb_hash_aset(capturing_groups,
ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
INT2FIX(iterator->second));
}
return capturing_groups;
}
|
#never_nl? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the never_nl option set to true.
1019 1020 1021 1022 1023 |
# File 'ext/re2/re2.cc', line 1019
static VALUE re2_regexp_never_nl(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().never_nl());
}
|
#number_of_capturing_groups ⇒ Fixnum
Returns the number of capturing subpatterns, or -1 if the regexp wasn’t valid on construction. The overall match ($0) does not count: if the regexp is “(a)(b)”, returns 2.
1205 1206 1207 1208 1209 1210 |
# File 'ext/re2/re2.cc', line 1205
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
|
#ok? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled successfully or not.
914 915 916 917 918 |
# File 'ext/re2/re2.cc', line 914
static VALUE re2_regexp_ok(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->ok());
}
|
#one_line? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the one_line option set to true.
1093 1094 1095 1096 1097 |
# File 'ext/re2/re2.cc', line 1093
static VALUE re2_regexp_one_line(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().one_line());
}
|
#options ⇒ Hash
Returns a hash of the options currently set for re2
.
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 |
# File 'ext/re2/re2.cc', line 1152
static VALUE re2_regexp_options(VALUE self) {
VALUE options;
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
options = rb_hash_new();
rb_hash_aset(options, ID2SYM(id_utf8),
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
rb_hash_aset(options, ID2SYM(id_posix_syntax),
BOOL2RUBY(p->pattern->options().posix_syntax()));
rb_hash_aset(options, ID2SYM(id_longest_match),
BOOL2RUBY(p->pattern->options().longest_match()));
rb_hash_aset(options, ID2SYM(id_log_errors),
BOOL2RUBY(p->pattern->options().log_errors()));
rb_hash_aset(options, ID2SYM(id_max_mem),
INT2FIX(p->pattern->options().max_mem()));
rb_hash_aset(options, ID2SYM(id_literal),
BOOL2RUBY(p->pattern->options().literal()));
rb_hash_aset(options, ID2SYM(id_never_nl),
BOOL2RUBY(p->pattern->options().never_nl()));
rb_hash_aset(options, ID2SYM(id_case_sensitive),
BOOL2RUBY(p->pattern->options().case_sensitive()));
rb_hash_aset(options, ID2SYM(id_perl_classes),
BOOL2RUBY(p->pattern->options().perl_classes()));
rb_hash_aset(options, ID2SYM(id_word_boundary),
BOOL2RUBY(p->pattern->options().word_boundary()));
rb_hash_aset(options, ID2SYM(id_one_line),
BOOL2RUBY(p->pattern->options().one_line()));
/* This is a read-only hash after all... */
rb_obj_freeze(options);
return options;
}
|
#pattern ⇒ String
Returns a string version of the regular expression re2
.
897 898 899 900 901 902 903 |
# File 'ext/re2/re2.cc', line 897
static VALUE re2_regexp_to_s(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return ENCODED_STR_NEW(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
}
|
#perl_classes? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the perl_classes option set to true.
1063 1064 1065 1066 1067 |
# File 'ext/re2/re2.cc', line 1063
static VALUE re2_regexp_perl_classes(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().perl_classes());
}
|
#posix_syntax? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the posix_syntax option set to true.
944 945 946 947 948 |
# File 'ext/re2/re2.cc', line 944
static VALUE re2_regexp_posix_syntax(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().posix_syntax());
}
|
#program_size ⇒ Fixnum
Returns the program size, a very approximate measure of a regexp’s “cost”. Larger numbers are more expensive than smaller numbers.
1140 1141 1142 1143 1144 |
# File 'ext/re2/re2.cc', line 1140
static VALUE re2_regexp_program_size(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return INT2FIX(p->pattern->ProgramSize());
}
|
#scan(text) ⇒ Object
Returns a Scanner for scanning the given text incrementally.
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 |
# File 'ext/re2/re2.cc', line 1365
static VALUE re2_regexp_scan(VALUE self, VALUE text) {
re2_pattern *p;
re2_scanner *c;
VALUE scanner;
Data_Get_Struct(self, re2_pattern, p);
scanner = rb_class_new_instance(0, 0, re2_cScanner);
Data_Get_Struct(scanner, re2_scanner, c);
c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
c->regexp = self;
c->text = text;
if (p->pattern->ok()) {
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
} else {
c->number_of_capturing_groups = 0;
}
c->eof = false;
return scanner;
}
|
#source ⇒ String
Returns a string version of the regular expression re2
.
897 898 899 900 901 902 903 |
# File 'ext/re2/re2.cc', line 897
static VALUE re2_regexp_to_s(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return ENCODED_STR_NEW(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
}
|
#to_s ⇒ String
Returns a string version of the regular expression re2
.
897 898 899 900 901 902 903 |
# File 'ext/re2/re2.cc', line 897
static VALUE re2_regexp_to_s(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return ENCODED_STR_NEW(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
}
|
#to_str ⇒ String
Returns a string version of the regular expression re2
.
897 898 899 900 901 902 903 |
# File 'ext/re2/re2.cc', line 897
static VALUE re2_regexp_to_s(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return ENCODED_STR_NEW(p->pattern->pattern().data(),
p->pattern->pattern().size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
}
|
#utf8? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the utf8 option set to true.
929 930 931 932 933 |
# File 'ext/re2/re2.cc', line 929
static VALUE re2_regexp_utf8(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
}
|
#word_boundary? ⇒ Boolean
Returns whether or not the regular expression re2
was compiled with the word_boundary option set to true.
1078 1079 1080 1081 1082 |
# File 'ext/re2/re2.cc', line 1078
static VALUE re2_regexp_word_boundary(VALUE self) {
re2_pattern *p;
Data_Get_Struct(self, re2_pattern, p);
return BOOL2RUBY(p->pattern->options().word_boundary());
}
|