Class: RE2::MatchData
Instance Method Summary collapse
-
#[](*args) ⇒ Object
Retrieve zero, one or more matches by index or name.
-
#begin(n) ⇒ Integer?
Returns the offset of the start of the nth element of the MatchData.
-
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
-
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
-
#end(n) ⇒ Integer?
Returns the offset of the character following the end of the nth element of the MatchData.
-
#inspect ⇒ String
Returns a printable version of the match.
-
#length ⇒ Integer
Returns the number of elements in the MatchData (including the overall match, submatches and any
nils
). -
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
-
#size ⇒ Integer
Returns the number of elements in the MatchData (including the overall match, submatches and any
nils
). -
#string ⇒ String
Returns a frozen copy of the text supplied when matching.
-
#to_a ⇒ Array<String, nil>
Returns the array of matches including the overall match, submatches and any
nil
s. -
#to_s ⇒ String
Returns the entire matched string.
Instance Method Details
#[](index) ⇒ String? #[](start, length) ⇒ Array<String, nil> #[](range) ⇒ Array<String, nil> #[](name) ⇒ String?
Retrieve zero, one or more matches by index or name.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 |
# File 'ext/re2/re2.cc', line 675
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
VALUE idx, rest;
rb_scan_args(argc, argv, "11", &idx, &rest);
if (TYPE(idx) == T_STRING) {
return re2_matchdata_named_match(
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self);
} else if (SYMBOL_P(idx)) {
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
} else {
return re2_matchdata_nth_match(FIX2INT(idx), self);
}
}
|
#begin(n) ⇒ Integer?
Returns the offset of the start of the nth element of the RE2::MatchData.
479 480 481 482 483 484 485 486 487 488 489 490 491 492 |
# File 'ext/re2/re2.cc', line 479
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = match->data() - RSTRING_PTR(m->text);
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is
undefined).
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 |
# File 'ext/re2/re2.cc', line 767
static VALUE re2_matchdata_deconstruct(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches - 1);
for (int i = 1; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
As this is used by Ruby's pattern matching, it will return an empty hash if given more keys than there are capturing groups. Given keys will populate the hash in order but an invalid name will cause the hash to be immediately returned.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 |
# File 'ext/re2/re2.cc', line 818
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
if (NIL_P(keys)) {
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
ID2SYM(rb_intern(it->first.data())),
re2_matchdata_nth_match(it->second, self));
}
} else {
Check_Type(keys, T_ARRAY);
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
VALUE key = rb_ary_entry(keys, i);
Check_Type(key, T_SYMBOL);
const char *name = rb_id2name(SYM2ID(key));
std::map<std::string, int>::const_iterator search = groups.find(name);
if (search != groups.end()) {
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
} else {
break;
}
}
}
}
return capturing_groups;
}
|
#end(n) ⇒ Integer?
Returns the offset of the character following the end of the nth element of the RE2::MatchData.
506 507 508 509 510 511 512 513 514 515 516 517 518 519 |
# File 'ext/re2/re2.cc', line 506
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#inspect ⇒ String
Returns a printable version of the match.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 |
# File 'ext/re2/re2.cc', line 712
static VALUE re2_matchdata_inspect(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
std::ostringstream output;
output << "#<RE2::MatchData";
for (int i = 0; i < m->number_of_matches; ++i) {
output << " ";
if (i > 0) {
output << i << ":";
}
VALUE match = re2_matchdata_nth_match(i, self);
if (match == Qnil) {
output << "nil";
} else {
output << "\"";
output.write(RSTRING_PTR(match), RSTRING_LEN(match));
output << "\"";
}
}
output << ">";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#length ⇒ Integer
Returns the number of elements in the RE2::MatchData (including the
overall match, submatches and any nils
).
460 461 462 463 464 465 466 |
# File 'ext/re2/re2.cc', line 460
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
529 530 531 532 533 534 |
# File 'ext/re2/re2.cc', line 529
static VALUE re2_matchdata_regexp(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->regexp;
}
|
#size ⇒ Integer
Returns the number of elements in the RE2::MatchData (including the
overall match, submatches and any nils
).
460 461 462 463 464 465 466 |
# File 'ext/re2/re2.cc', line 460
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#string ⇒ String
Returns a frozen copy of the text supplied when matching.
If the text was already a frozen string, returns the original.
273 274 275 276 277 278 |
# File 'ext/re2/re2.cc', line 273
static VALUE re2_matchdata_string(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->text;
}
|
#to_a ⇒ Array<String, nil>
Returns the array of matches including the overall match, submatches and any
nil
s.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 |
# File 'ext/re2/re2.cc', line 570
static VALUE re2_matchdata_to_a(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches);
for (int i = 0; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#to_s ⇒ String
Returns the entire matched string.
696 697 698 |
# File 'ext/re2/re2.cc', line 696 static VALUE re2_matchdata_to_s(const VALUE self) { return re2_matchdata_nth_match(0, self); } |