Class: MatchData
Overview
MatchData encapsulates the result of matching a Regexp against string. It is returned by Regexp#match and String#match, and also stored in a global variable returned by Regexp.last_match.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0">
m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
m.regexp # => /(\d\.?)+/
# entire matched substring:
m[0] # => "2.5.0"
# Working with unnamed captures
m = url.match(%r{([^/]+)/([^/]+)\.html$})
m.captures # => ["2.5.0", "MatchData"]
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
# Working with named captures
m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
m.captures # => ["2.5.0", "MatchData"]
m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"}
m[:version] # => "2.5.0"
m.values_at(:version, :module)
# => ["2.5.0", "MatchData"]
# Numerical indexes are working, too
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence
Parts of last MatchData (returned by Regexp.last_match) are also aliased as global variables:
-
$~
is Regexp.last_match; -
$&
is Regexp.last_match[0]
; -
$1
,$2
, and so on are Regexp.last_match[i]
(captures by number); -
$`
is Regexp.last_match.pre_match
; -
$'
is Regexp.last_match.post_match
; -
$+
is Regexp.last_match[-1]
(the last capture).
See also “Special global variables” section in Regexp documentation.
Instance Method Summary collapse
-
#==(match2) ⇒ Object
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
-
#[](*args) ⇒ Object
Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques.
-
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string.
-
#captures ⇒ Array
Returns the array of captures; equivalent to
mtch.to_a[1..-1]
. -
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string.
-
#eql?(match2) ⇒ Object
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
-
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
-
#initialize_copy(orig) ⇒ Object
:nodoc:.
-
#inspect ⇒ String
Returns a printable version of mtch.
-
#length ⇒ Object
Returns the number of elements in the match array.
-
#named_captures ⇒ Hash
Returns a Hash using named capture.
-
#names ⇒ Array
Returns a list of names of captures as an array of strings.
-
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match.
-
#post_match ⇒ String
Returns the portion of the original string after the current match.
-
#pre_match ⇒ String
Returns the portion of the original string before the current match.
-
#regexp ⇒ Regexp
Returns the regexp.
-
#size ⇒ Object
Returns the number of elements in the match array.
-
#string ⇒ String
Returns a frozen copy of the string passed in to
match
. -
#to_a ⇒ Array
Returns the array of matches.
-
#to_s ⇒ String
Returns the entire matched string.
-
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
Instance Method Details
#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean
Equality—Two matchdata are equal if their target strings,
patterns, and matched positions are identical.
3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 |
# File 're.c', line 3075
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
|
#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?
Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0]
is equivalent to the special variable $&
, and returns the entire matched string. mtch[1]
, mtch[2]
, and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0] #=> "HX1138"
m[1, 2] #=> ["H", "X"]
m[1..3] #=> ["H", "X", "113"]
m[-3, 2] #=> ["X", "113"]
m = /(?<foo>a+)b/.match("ccaaab")
m #=> #<MatchData "aaab" foo:"aaa">
m["foo"] #=> "aaa"
m[:foo] #=> "aaa"
2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 |
# File 're.c', line 2007
static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
VALUE idx, length;
match_check(match);
rb_scan_args(argc, argv, "11", &idx, &length);
if (NIL_P(length)) {
if (FIXNUM_P(idx)) {
return rb_reg_nth_match(FIX2INT(idx), match);
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
if (num >= 0) {
return rb_reg_nth_match(num, match);
}
else {
return match_ary_aref(match, idx, Qnil);
}
}
}
else {
long beg = NUM2LONG(idx);
long len = NUM2LONG(length);
long num_regs = RMATCH_REGS(match)->num_regs;
if (len < 0) {
return Qnil;
}
if (beg < 0) {
beg += num_regs;
if (beg < 0) return Qnil;
}
else if (beg > num_regs) {
return Qnil;
}
else if (beg+len > num_regs) {
len = num_regs - beg;
}
return match_ary_subseq(match, beg, len, Qnil);
}
}
|
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0) #=> 1
m.begin(2) #=> 2
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo) #=> 0
p m.begin(:bar) #=> 2
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 |
# File 're.c', line 1232
static VALUE
match_begin(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}
|
#captures ⇒ Array
Returns the array of captures; equivalent to mtch.to_a[1..-1]
.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
f4 #=> "8"
1899 1900 1901 1902 1903 |
# File 're.c', line 1899
static VALUE
match_captures(VALUE match)
{
return match_array(match, 1);
}
|
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0) #=> 7
m.end(2) #=> 3
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo) #=> 1
p m.end(:bar) #=> 3
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 |
# File 're.c', line 1267
static VALUE
match_end(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}
|
#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean
Equality—Two matchdata are equal if their target strings,
patterns, and matched positions are identical.
3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 |
# File 're.c', line 3075
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
|
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
See also Object#hash.
3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 |
# File 're.c', line 3049
static VALUE
match_hash(VALUE match)
{
const struct re_registers *regs;
st_index_t hashval;
match_check(match);
hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
regs = RMATCH_REGS(match);
hashval = rb_hash_uint(hashval, regs->num_regs);
hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
hashval = rb_hash_end(hashval);
return ST2FIX(hashval);
}
|
#initialize_copy(orig) ⇒ Object
:nodoc:
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 |
# File 're.c', line 1049
static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
struct rmatch *rm;
if (!OBJ_INIT_COPY(obj, orig)) return obj;
RMATCH(obj)->str = RMATCH(orig)->str;
RMATCH(obj)->regexp = RMATCH(orig)->regexp;
rm = RMATCH(obj)->rmatch;
if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
rb_memerror();
if (RMATCH(orig)->rmatch->char_offset_num_allocated) {
if (rm->char_offset_num_allocated < rm->regs.num_regs) {
REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
rm->char_offset_num_allocated = rm->regs.num_regs;
}
MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
struct rmatch_offset, rm->regs.num_regs);
RB_GC_GUARD(orig);
}
return obj;
}
|
#inspect ⇒ String
Returns a printable version of mtch.
puts /.$/.match("foo").inspect
#=> #<MatchData "o">
puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 |
# File 're.c', line 2239
static VALUE
match_inspect(VALUE match)
{
VALUE cname = rb_class_path(rb_obj_class(match));
VALUE str;
int i;
struct re_registers *regs = RMATCH_REGS(match);
int num_regs = regs->num_regs;
struct backref_name_tag *names;
VALUE regexp = RMATCH(match)->regexp;
if (regexp == 0) {
return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
}
else if (NIL_P(regexp)) {
return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
cname, rb_reg_nth_match(0, match));
}
names = ALLOCA_N(struct backref_name_tag, num_regs);
MEMZERO(names, struct backref_name_tag, num_regs);
onig_foreach_name(RREGEXP_PTR(regexp),
match_inspect_name_iter, names);
str = rb_str_buf_new2("#<");
rb_str_append(str, cname);
for (i = 0; i < num_regs; i++) {
VALUE v;
rb_str_buf_cat2(str, " ");
if (0 < i) {
if (names[i].name)
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
else {
rb_str_catf(str, "%d", i);
}
rb_str_buf_cat2(str, ":");
}
v = rb_reg_nth_match(i, match);
if (v == Qnil)
rb_str_buf_cat2(str, "nil");
else
rb_str_buf_append(str, rb_str_inspect(v));
}
rb_str_buf_cat2(str, ">");
return str;
}
|
#length ⇒ Integer #size ⇒ Integer
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
1136 1137 1138 1139 1140 1141 |
# File 're.c', line 1136
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
|
#named_captures ⇒ Hash
Returns a Hash using named capture.
A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.
m = /(?<a>.)(?<b>.)/.match("01")
m.named_captures #=> {"a" => "0", "b" => "1"}
m = /(?<a>.)(?<b>.)?/.match("0")
m.named_captures #=> {"a" => "0", "b" => nil}
m = /(?<a>.)(?<a>.)/.match("01")
m.named_captures #=> {"a" => "1"}
m = /(?<a>x)|(?<a>y)/.match("x")
m.named_captures #=> {"a" => "x"}
2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 |
# File 're.c', line 2165
static VALUE
match_named_captures(VALUE match)
{
VALUE hash;
struct MEMO *memo;
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_hash_new();
hash = rb_hash_new();
memo = MEMO_NEW(hash, match, 0);
onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);
return hash;
}
|
#names ⇒ Array
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]
m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names #=> ["x", "y"]
1115 1116 1117 1118 1119 1120 1121 1122 |
# File 're.c', line 1115
static VALUE
match_names(VALUE match)
{
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_ary_new_capa(0);
return rb_reg_names(RMATCH(match)->regexp);
}
|
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0) #=> [1, 7]
m.offset(4) #=> [6, 7]
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]
1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 |
# File 're.c', line 1196
static VALUE
match_offset(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return rb_assoc_new(Qnil, Qnil);
update_char_offset(match);
return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}
|
#post_match ⇒ String
Returns the portion of the original string after the current match. Equivalent to the special variable $'
.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match #=> ": The Movie"
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 |
# File 're.c', line 1774
VALUE
rb_reg_match_post(VALUE match)
{
VALUE str;
long pos;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = RMATCH(match)->str;
pos = END(0);
str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
return str;
}
|
#pre_match ⇒ String
Returns the portion of the original string before the current match. Equivalent to the special variable $`
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match #=> "T"
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 |
# File 're.c', line 1748
VALUE
rb_reg_match_pre(VALUE match)
{
VALUE str;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
return str;
}
|
#regexp ⇒ Regexp
Returns the regexp.
m = /a.*b/.match("abc")
m.regexp #=> /a.*b/
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 |
# File 're.c', line 1087
static VALUE
match_regexp(VALUE match)
{
VALUE regexp;
match_check(match);
regexp = RMATCH(match)->regexp;
if (NIL_P(regexp)) {
VALUE str = rb_reg_nth_match(0, match);
regexp = rb_reg_regcomp(rb_reg_quote(str));
RMATCH(match)->regexp = regexp;
}
return regexp;
}
|
#length ⇒ Integer #size ⇒ Integer
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
1136 1137 1138 1139 1140 1141 |
# File 're.c', line 1136
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
|
#string ⇒ String
Returns a frozen copy of the string passed in to match
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string #=> "THX1138."
2193 2194 2195 2196 2197 2198 |
# File 're.c', line 2193
static VALUE
match_string(VALUE match)
{
match_check(match);
return RMATCH(match)->str; /* str is frozen */
}
|
#to_a ⇒ Array
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a
is called when expanding *
variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
all #=> "HX1138"
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
1880 1881 1882 1883 1884 |
# File 're.c', line 1880
static VALUE
match_to_a(VALUE match)
{
return match_array(match, 0);
}
|
#to_s ⇒ String
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s #=> "HX1138"
2104 2105 2106 2107 2108 2109 2110 2111 2112 |
# File 're.c', line 2104
static VALUE
match_to_s(VALUE match)
{
VALUE str = rb_reg_last_match(match);
match_check(match);
if (NIL_P(str)) str = rb_str_new(0,0);
return str;
}
|
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
m.to_a #=> ["1 + 2", "1", "+", "2"]
m.values_at(:a, :b, :op) #=> ["1", "2", "+"]
2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 |
# File 're.c', line 2067
static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
VALUE result;
int i;
match_check(match);
result = rb_ary_new2(argc);
for (i=0; i<argc; i++) {
if (FIXNUM_P(argv[i])) {
rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
if (num >= 0) {
rb_ary_push(result, rb_reg_nth_match(num, match));
}
else {
match_ary_aref(match, argv[i], result);
}
}
}
return result;
}
|