Class: RE2::MatchData
Instance Method Summary collapse
-
#[](*args) ⇒ Object
Retrieve zero, one or more matches by index or name.
-
#begin(n) ⇒ Integer?
Returns the offset of the start of the nth element of the MatchData.
-
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
-
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
-
#end(n) ⇒ Integer?
Returns the offset of the character following the end of the nth element of the MatchData.
-
#inspect ⇒ String
Returns a printable version of the match.
-
#length ⇒ Integer
Returns the number of elements in the MatchData (including the overall match, submatches and any
nils
). -
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
-
#size ⇒ Integer
Returns the number of elements in the MatchData (including the overall match, submatches and any
nils
). -
#string ⇒ String
Returns a frozen copy of the text supplied when matching.
-
#to_a ⇒ Array<String, nil>
Returns the array of matches including the overall match, submatches and any
nil
s. -
#to_s ⇒ String
Returns the entire matched string.
Instance Method Details
#[](index) ⇒ String? #[](start, length) ⇒ Array<String, nil> #[](range) ⇒ Array<String, nil> #[](name) ⇒ String?
Retrieve zero, one or more matches by index or name.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 |
# File 'ext/re2/re2.cc', line 687
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
VALUE idx, rest;
rb_scan_args(argc, argv, "11", &idx, &rest);
if (TYPE(idx) == T_STRING) {
return re2_matchdata_named_match(
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self);
} else if (SYMBOL_P(idx)) {
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
} else {
return re2_matchdata_nth_match(FIX2INT(idx), self);
}
}
|
#begin(n) ⇒ Integer?
Returns the offset of the start of the nth element of the RE2::MatchData.
491 492 493 494 495 496 497 498 499 500 501 502 503 504 |
# File 'ext/re2/re2.cc', line 491
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = match->data() - RSTRING_PTR(m->text);
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#deconstruct ⇒ Array<String, nil>
Returns the array of submatches for pattern matching.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is
undefined).
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 |
# File 'ext/re2/re2.cc', line 779
static VALUE re2_matchdata_deconstruct(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches - 1);
for (int i = 1; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#deconstruct_keys(keys) ⇒ Hash
Returns a hash of capturing group names to submatches for pattern matching.
As this is used by Ruby's pattern matching, it will return an empty hash if given more keys than there are capturing groups. Given keys will populate the hash in order but an invalid name will cause the hash to be immediately returned.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 |
# File 'ext/re2/re2.cc', line 830
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
VALUE capturing_groups = rb_hash_new();
if (NIL_P(keys)) {
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
rb_hash_aset(capturing_groups,
ID2SYM(rb_intern(it->first.data())),
re2_matchdata_nth_match(it->second, self));
}
} else {
Check_Type(keys, T_ARRAY);
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
VALUE key = rb_ary_entry(keys, i);
Check_Type(key, T_SYMBOL);
const char *name = rb_id2name(SYM2ID(key));
std::map<std::string, int>::const_iterator search = groups.find(name);
if (search != groups.end()) {
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
} else {
break;
}
}
}
}
return capturing_groups;
}
|
#end(n) ⇒ Integer?
Returns the offset of the character following the end of the nth element of the RE2::MatchData.
518 519 520 521 522 523 524 525 526 527 528 529 530 531 |
# File 'ext/re2/re2.cc', line 518
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
re2::StringPiece *match = re2_matchdata_find_match(n, self);
if (match == NULL) {
return Qnil;
} else {
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
return LONG2NUM(rb_str_sublen(m->text, offset));
}
}
|
#inspect ⇒ String
Returns a printable version of the match.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 |
# File 'ext/re2/re2.cc', line 724
static VALUE re2_matchdata_inspect(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
std::ostringstream output;
output << "#<RE2::MatchData";
for (int i = 0; i < m->number_of_matches; ++i) {
output << " ";
if (i > 0) {
output << i << ":";
}
VALUE match = re2_matchdata_nth_match(i, self);
if (match == Qnil) {
output << "nil";
} else {
output << "\"";
output.write(RSTRING_PTR(match), RSTRING_LEN(match));
output << "\"";
}
}
output << ">";
return encoded_str_new(output.str().data(), output.str().length(),
p->pattern->options().encoding());
}
|
#length ⇒ Integer
Returns the number of elements in the RE2::MatchData (including the
overall match, submatches and any nils
).
472 473 474 475 476 477 478 |
# File 'ext/re2/re2.cc', line 472
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#regexp ⇒ RE2::Regexp
Returns the Regexp used in the match.
541 542 543 544 545 546 |
# File 'ext/re2/re2.cc', line 541
static VALUE re2_matchdata_regexp(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->regexp;
}
|
#size ⇒ Integer
Returns the number of elements in the RE2::MatchData (including the
overall match, submatches and any nils
).
472 473 474 475 476 477 478 |
# File 'ext/re2/re2.cc', line 472
static VALUE re2_matchdata_size(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return INT2FIX(m->number_of_matches);
}
|
#string ⇒ String
Returns a frozen copy of the text supplied when matching.
If the text was already a frozen string, returns the original.
285 286 287 288 289 290 |
# File 'ext/re2/re2.cc', line 285
static VALUE re2_matchdata_string(const VALUE self) {
re2_matchdata *m;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
return m->text;
}
|
#to_a ⇒ Array<String, nil>
Returns the array of matches including the overall match, submatches and any
nil
s.
Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
returned in UTF-8 by default or ISO-8859-1 if the :utf8
option for the
Regexp is set to false
(any other encoding's behaviour is undefined).
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 |
# File 'ext/re2/re2.cc', line 582
static VALUE re2_matchdata_to_a(const VALUE self) {
re2_matchdata *m;
re2_pattern *p;
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
VALUE array = rb_ary_new2(m->number_of_matches);
for (int i = 0; i < m->number_of_matches; ++i) {
re2::StringPiece *match = &m->matches[i];
if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
p->pattern->options().encoding()));
}
}
return array;
}
|
#to_s ⇒ String
Returns the entire matched string.
708 709 710 |
# File 'ext/re2/re2.cc', line 708 static VALUE re2_matchdata_to_s(const VALUE self) { return re2_matchdata_nth_match(0, self); } |