Class: RE2::MatchData
Instance Method Summary collapse
-
#[](*args) ⇒ Array<String, nil>, ...
Retrieve zero, one or more matches by index or name.
-
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the matchdata.
-
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
-
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
-
#end(n) ⇒ Integer
Returns the offset of the character following the end of the nth element of the matchdata.
-
#inspect ⇒ String
Returns a printable version of the match.
-
#length ⇒ Integer
Returns the number of elements in the match array (including nils).
-
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
-
#size ⇒ Integer
Returns the number of elements in the match array (including nils).
-
#string ⇒ String
Returns a frozen copy of the string passed into
match
. -
#to_a ⇒ Array<String, nil>
Returns the array of matches.
-
#to_s ⇒ String
Returns the entire matched string.
Instance Method Details
#[](index) ⇒ String? #[](start, length) ⇒ Array<String, nil> #[](range) ⇒ Array<String, nil> #[](name) ⇒ String?
Retrieve zero, one or more matches by index or name.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the RE2::Regexp is set to false (any other encoding’s behaviour is undefined).
665 666 667 668 669 670 671 672 673 674 675 676 677 678 |
# File 'ext/re2/re2.cc', line 665
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
VALUE idx, rest;
rb_scan_args(argc, argv, "11", &idx, &rest);
if (TYPE(idx) == T_STRING) {
return re2_matchdata_named_match(RSTRING_PTR(idx), self);
} else if (SYMBOL_P(idx)) {
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
} else {
return re2_matchdata_nth_match(FIX2INT(idx), self);
}
}
|
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the matchdata.
470 471 472 473 474 475 476 477 478 479 480 481 482 483 |
# File 'ext/re2/re2.cc', line 470
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = match->data() - RSTRING_PTR(m->text);
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the RE2::Regexp is set to false (any other encoding’s behaviour is undefined).
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 |
# File 'ext/re2/re2.cc', line 753
static VALUE re2_matchdata_deconstruct(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches - 1);
for (int i = 1; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
As this is used by Ruby’s pattern matching, it will return an empty hash if given more keys than there are capturing groups. Given keys will populate the hash in order but an invalid name will cause the hash to be immediately returned.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the RE2::Regexp is set to false (any other encoding’s behaviour is undefined).
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 |
# File 'ext/re2/re2.cc', line 803
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
if (NIL_P(keys)) {
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
ID2SYM(rb_intern(it->first.data())),
re2_matchdata_nth_match(it->second, self));
}
} else {
Check_Type(keys, T_ARRAY);
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
VALUE key = rb_ary_entry(keys, i);
Check_Type(key, T_SYMBOL);
const char *name = rb_id2name(SYM2ID(key));
std::map<std::string, int>::const_iterator search = groups.find(name);
if (search != groups.end()) {
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
} else {
break;
}
}
}
}
return capturing_groups;
}
|
#end(n) ⇒ Integer
Returns the offset of the character following the end of the nth element of the matchdata.
495 496 497 498 499 500 501 502 503 504 505 506 507 508 |
# File 'ext/re2/re2.cc', line 495
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#inspect ⇒ String
Returns a printable version of the match.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the RE2::Regexp is set to false (any other encoding’s behaviour is undefined).
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 |
# File 'ext/re2/re2.cc', line 701
static VALUE re2_matchdata_inspect(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
std::ostringstream output;
output << "#<RE2::MatchData";
for (int i = 0; i < m->number_of_matches; ++i) {
output << " ";
if (i > 0) {
output << i << ":";
}
VALUE match = re2_matchdata_nth_match(i, self);
if (match == Qnil) {
output << "nil";
} else {
output << "\"" << RSTRING_PTR(match) << "\"";
}
}
output << ">";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#length ⇒ Integer
Returns the number of elements in the match array (including nils).
452 453 454 455 456 457 458 |
# File 'ext/re2/re2.cc', line 452
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
518 519 520 521 522 523 |
# File 'ext/re2/re2.cc', line 518
static VALUE re2_matchdata_regexp(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->regexp;
}
|
#size ⇒ Integer
Returns the number of elements in the match array (including nils).
452 453 454 455 456 457 458 |
# File 'ext/re2/re2.cc', line 452
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#string ⇒ String
Returns a frozen copy of the string passed into match
.
280 281 282 283 284 285 |
# File 'ext/re2/re2.cc', line 280
static VALUE re2_matchdata_string(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->text;
}
|
#to_a ⇒ Array<String, nil>
Returns the array of matches.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the RE2::Regexp is set to false (any other encoding’s behaviour is undefined).
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 |
# File 'ext/re2/re2.cc', line 558
static VALUE re2_matchdata_to_a(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches);
for (int i = 0; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#to_s ⇒ String
Returns the entire matched string.
685 686 687 |
# File 'ext/re2/re2.cc', line 685 static VALUE re2_matchdata_to_s(const VALUE self) { return re2_matchdata_nth_match(0, self); } |