Class: MatchData
Overview
MatchData encapsulates the result of matching a Regexp against string. It is returned by Regexp#match and String#match, and also stored in a global variable returned by Regexp.last_match.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0">
m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
m.regexp # => /(\d\.?)+/
# entire matched substring:
m[0] # => "2.5.0"
# Working with unnamed captures
m = url.match(%r{([^/]+)/([^/]+)\.html$})
m.captures # => ["2.5.0", "MatchData"]
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
# Working with named captures
m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
m.captures # => ["2.5.0", "MatchData"]
m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"}
m[:version] # => "2.5.0"
m.values_at(:version, :module)
# => ["2.5.0", "MatchData"]
# Numerical indexes are working, too
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence
Parts of last MatchData (returned by Regexp.last_match) are also aliased as global variables:
-
$~
is Regexp.last_match; -
$&
is Regexp.last_match[0]
; -
$1
,$2
, and so on are Regexp.last_match[i]
(captures by number); -
$`
is Regexp.last_match.pre_match
; -
$'
is Regexp.last_match.post_match
; -
$+
is Regexp.last_match[-1]
(the last capture).
See also “Special global variables” section in Regexp documentation.
Instance Method Summary collapse
-
#==(match2) ⇒ Object
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
-
#[](*args) ⇒ Object
Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques.
-
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string.
-
#captures ⇒ Array
Returns the array of captures; equivalent to
mtch.to_a[1..-1]
. -
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string.
-
#eql?(match2) ⇒ Object
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
-
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
-
#initialize_copy(orig) ⇒ Object
:nodoc:.
-
#inspect ⇒ String
Returns a printable version of mtch.
-
#length ⇒ Object
Returns the number of elements in the match array.
-
#named_captures ⇒ Hash
Returns a Hash using named capture.
-
#names ⇒ Array
Returns a list of names of captures as an array of strings.
-
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match.
-
#post_match ⇒ String
Returns the portion of the original string after the current match.
-
#pre_match ⇒ String
Returns the portion of the original string before the current match.
-
#regexp ⇒ Regexp
Returns the regexp.
-
#size ⇒ Object
Returns the number of elements in the match array.
-
#string ⇒ String
Returns a frozen copy of the string passed in to
match
. -
#to_a ⇒ Array
Returns the array of matches.
-
#to_s ⇒ String
Returns the entire matched string.
-
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
Instance Method Details
#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean
Equality—Two matchdata are equal if their target strings,
patterns, and matched positions are identical.
3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 |
# File 're.c', line 3082
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
|
#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?
Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0]
is equivalent to the special variable $&
, and returns the entire matched string. mtch[1]
, mtch[2]
, and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0] #=> "HX1138"
m[1, 2] #=> ["H", "X"]
m[1..3] #=> ["H", "X", "113"]
m[-3, 2] #=> ["X", "113"]
m = /(?<foo>a+)b/.match("ccaaab")
m #=> #<MatchData "aaab" foo:"aaa">
m["foo"] #=> "aaa"
m[:foo] #=> "aaa"
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 |
# File 're.c', line 2014
static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
VALUE idx, length;
match_check(match);
rb_scan_args(argc, argv, "11", &idx, &length);
if (NIL_P(length)) {
if (FIXNUM_P(idx)) {
return rb_reg_nth_match(FIX2INT(idx), match);
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
if (num >= 0) {
return rb_reg_nth_match(num, match);
}
else {
return match_ary_aref(match, idx, Qnil);
}
}
}
else {
long beg = NUM2LONG(idx);
long len = NUM2LONG(length);
long num_regs = RMATCH_REGS(match)->num_regs;
if (len < 0) {
return Qnil;
}
if (beg < 0) {
beg += num_regs;
if (beg < 0) return Qnil;
}
else if (beg > num_regs) {
return Qnil;
}
else if (beg+len > num_regs) {
len = num_regs - beg;
}
return match_ary_subseq(match, beg, len, Qnil);
}
}
|
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0) #=> 1
m.begin(2) #=> 2
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo) #=> 0
p m.begin(:bar) #=> 2
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 |
# File 're.c', line 1239
static VALUE
match_begin(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}
|
#captures ⇒ Array
Returns the array of captures; equivalent to mtch.to_a[1..-1]
.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
f4 #=> "8"
1906 1907 1908 1909 1910 |
# File 're.c', line 1906
static VALUE
match_captures(VALUE match)
{
return match_array(match, 1);
}
|
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0) #=> 7
m.end(2) #=> 3
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo) #=> 1
p m.end(:bar) #=> 3
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 |
# File 're.c', line 1274
static VALUE
match_end(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}
|
#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean
Equality—Two matchdata are equal if their target strings,
patterns, and matched positions are identical.
3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 |
# File 're.c', line 3082
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
|
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
See also Object#hash.
3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 |
# File 're.c', line 3056
static VALUE
match_hash(VALUE match)
{
const struct re_registers *regs;
st_index_t hashval;
match_check(match);
hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
regs = RMATCH_REGS(match);
hashval = rb_hash_uint(hashval, regs->num_regs);
hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
hashval = rb_hash_end(hashval);
return ST2FIX(hashval);
}
|
#initialize_copy(orig) ⇒ Object
:nodoc:
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 |
# File 're.c', line 1056
static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
struct rmatch *rm;
if (!OBJ_INIT_COPY(obj, orig)) return obj;
RMATCH(obj)->str = RMATCH(orig)->str;
RMATCH(obj)->regexp = RMATCH(orig)->regexp;
rm = RMATCH(obj)->rmatch;
if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
rb_memerror();
if (RMATCH(orig)->rmatch->char_offset_num_allocated) {
if (rm->char_offset_num_allocated < rm->regs.num_regs) {
REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
rm->char_offset_num_allocated = rm->regs.num_regs;
}
MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
struct rmatch_offset, rm->regs.num_regs);
RB_GC_GUARD(orig);
}
return obj;
}
|
#inspect ⇒ String
Returns a printable version of mtch.
puts /.$/.match("foo").inspect
#=> #<MatchData "o">
puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 |
# File 're.c', line 2246
static VALUE
match_inspect(VALUE match)
{
VALUE cname = rb_class_path(rb_obj_class(match));
VALUE str;
int i;
struct re_registers *regs = RMATCH_REGS(match);
int num_regs = regs->num_regs;
struct backref_name_tag *names;
VALUE regexp = RMATCH(match)->regexp;
if (regexp == 0) {
return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
}
else if (NIL_P(regexp)) {
return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
cname, rb_reg_nth_match(0, match));
}
names = ALLOCA_N(struct backref_name_tag, num_regs);
MEMZERO(names, struct backref_name_tag, num_regs);
onig_foreach_name(RREGEXP_PTR(regexp),
match_inspect_name_iter, names);
str = rb_str_buf_new2("#<");
rb_str_append(str, cname);
for (i = 0; i < num_regs; i++) {
VALUE v;
rb_str_buf_cat2(str, " ");
if (0 < i) {
if (names[i].name)
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
else {
rb_str_catf(str, "%d", i);
}
rb_str_buf_cat2(str, ":");
}
v = rb_reg_nth_match(i, match);
if (v == Qnil)
rb_str_buf_cat2(str, "nil");
else
rb_str_buf_append(str, rb_str_inspect(v));
}
rb_str_buf_cat2(str, ">");
return str;
}
|
#length ⇒ Integer #size ⇒ Integer
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
1143 1144 1145 1146 1147 1148 |
# File 're.c', line 1143
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
|
#named_captures ⇒ Hash
Returns a Hash using named capture.
A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.
m = /(?<a>.)(?<b>.)/.match("01")
m.named_captures #=> {"a" => "0", "b" => "1"}
m = /(?<a>.)(?<b>.)?/.match("0")
m.named_captures #=> {"a" => "0", "b" => nil}
m = /(?<a>.)(?<a>.)/.match("01")
m.named_captures #=> {"a" => "1"}
m = /(?<a>x)|(?<a>y)/.match("x")
m.named_captures #=> {"a" => "x"}
2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 |
# File 're.c', line 2172
static VALUE
match_named_captures(VALUE match)
{
VALUE hash;
struct MEMO *memo;
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_hash_new();
hash = rb_hash_new();
memo = MEMO_NEW(hash, match, 0);
onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);
return hash;
}
|
#names ⇒ Array
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]
m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names #=> ["x", "y"]
1122 1123 1124 1125 1126 1127 1128 1129 |
# File 're.c', line 1122
static VALUE
match_names(VALUE match)
{
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_ary_new_capa(0);
return rb_reg_names(RMATCH(match)->regexp);
}
|
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0) #=> [1, 7]
m.offset(4) #=> [6, 7]
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 |
# File 're.c', line 1203
static VALUE
match_offset(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return rb_assoc_new(Qnil, Qnil);
update_char_offset(match);
return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}
|
#post_match ⇒ String
Returns the portion of the original string after the current match. Equivalent to the special variable $'
.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match #=> ": The Movie"
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 |
# File 're.c', line 1781
VALUE
rb_reg_match_post(VALUE match)
{
VALUE str;
long pos;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = RMATCH(match)->str;
pos = END(0);
str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
return str;
}
|
#pre_match ⇒ String
Returns the portion of the original string before the current match. Equivalent to the special variable $`
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match #=> "T"
1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 |
# File 're.c', line 1755
VALUE
rb_reg_match_pre(VALUE match)
{
VALUE str;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
return str;
}
|
#regexp ⇒ Regexp
Returns the regexp.
m = /a.*b/.match("abc")
m.regexp #=> /a.*b/
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 |
# File 're.c', line 1094
static VALUE
match_regexp(VALUE match)
{
VALUE regexp;
match_check(match);
regexp = RMATCH(match)->regexp;
if (NIL_P(regexp)) {
VALUE str = rb_reg_nth_match(0, match);
regexp = rb_reg_regcomp(rb_reg_quote(str));
RMATCH(match)->regexp = regexp;
}
return regexp;
}
|
#length ⇒ Integer #size ⇒ Integer
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
1143 1144 1145 1146 1147 1148 |
# File 're.c', line 1143
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
|
#string ⇒ String
Returns a frozen copy of the string passed in to match
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string #=> "THX1138."
2200 2201 2202 2203 2204 2205 |
# File 're.c', line 2200
static VALUE
match_string(VALUE match)
{
match_check(match);
return RMATCH(match)->str; /* str is frozen */
}
|
#to_a ⇒ Array
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a
is called when expanding *
variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
all #=> "HX1138"
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
1887 1888 1889 1890 1891 |
# File 're.c', line 1887
static VALUE
match_to_a(VALUE match)
{
return match_array(match, 0);
}
|
#to_s ⇒ String
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s #=> "HX1138"
2111 2112 2113 2114 2115 2116 2117 2118 2119 |
# File 're.c', line 2111
static VALUE
match_to_s(VALUE match)
{
VALUE str = rb_reg_last_match(match);
match_check(match);
if (NIL_P(str)) str = rb_str_new(0,0);
return str;
}
|
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
m.to_a #=> ["1 + 2", "1", "+", "2"]
m.values_at(:a, :b, :op) #=> ["1", "2", "+"]
2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 |
# File 're.c', line 2074
static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
VALUE result;
int i;
match_check(match);
result = rb_ary_new2(argc);
for (i=0; i<argc; i++) {
if (FIXNUM_P(argv[i])) {
rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
if (num >= 0) {
rb_ary_push(result, rb_reg_nth_match(num, match));
}
else {
match_ary_aref(match, argv[i], result);
}
}
}
return result;
}
|