Class: MatchData

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

MatchData encapsulates the result of matching a Regexp against string. It is returned by Regexp#match and String#match, and also stored in a global variable returned by Regexp.last_match.

Usage:

url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
m = url.match(/(\d\.?)+/)   # => #<MatchData "2.5.0" 1:"0">
m.string                    # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
m.regexp                    # => /(\d\.?)+/
# entire matched substring:
m[0]                        # => "2.5.0"

# Working with unnamed captures
m = url.match(%r{([^/]+)/([^/]+)\.html$})
m.captures                  # => ["2.5.0", "MatchData"]
m[1]                        # => "2.5.0"
m.values_at(1, 2)           # => ["2.5.0", "MatchData"]

# Working with named captures
m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
m.captures                  # => ["2.5.0", "MatchData"]
m.named_captures            # => {"version"=>"2.5.0", "module"=>"MatchData"}
m[:version]                 # => "2.5.0"
m.values_at(:version, :module)
                            # => ["2.5.0", "MatchData"]
# Numerical indexes are working, too
m[1]                        # => "2.5.0"
m.values_at(1, 2)           # => ["2.5.0", "MatchData"]

Global variables equivalence

Parts of last MatchData (returned by Regexp.last_match) are also aliased as global variables:

  • $~ is Regexp.last_match;

  • $& is Regexp.last_match[0];

  • $1, $2, and so on are Regexp.last_match[i] (captures by number);

  • $` is Regexp.last_match.pre_match;

  • $' is Regexp.last_match.post_match;

  • $+ is Regexp.last_match[-1] (the last capture).

See also “Special global variables” section in Regexp documentation.

Instance Method Summary collapse

Instance Method Details

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)


3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
# File 're.c', line 3082

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;

    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?

Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"

Overloads:



2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
# File 're.c', line 2014

static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, length;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &length);

    if (NIL_P(length)) {
	if (FIXNUM_P(idx)) {
	    return rb_reg_nth_match(FIX2INT(idx), match);
	}
	else {
	    int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
	    if (num >= 0) {
		return rb_reg_nth_match(num, match);
	    }
	    else {
		return match_ary_aref(match, idx, Qnil);
	    }
	}
    }
    else {
	long beg = NUM2LONG(idx);
	long len = NUM2LONG(length);
	long num_regs = RMATCH_REGS(match)->num_regs;
	if (len < 0) {
	    return Qnil;
	}
	if (beg < 0) {
	    beg += num_regs;
	    if (beg < 0) return Qnil;
	}
	else if (beg > num_regs) {
	    return Qnil;
	}
	else if (beg+len > num_regs) {
	    len = num_regs - beg;
	}
	return match_ary_subseq(match, beg, len, Qnil);
    }
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2

Returns:



1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
# File 're.c', line 1239

static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:



1906
1907
1908
1909
1910
# File 're.c', line 1906

static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3

Returns:



1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
# File 're.c', line 1274

static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)


3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
# File 're.c', line 3082

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;

    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#hashInteger

Produce a hash based on the target string, regexp and matched positions of this matchdata.

See also Object#hash.

Returns:



3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
# File 're.c', line 3056

static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval;

    match_check(match);
    hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
    hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return ST2FIX(hashval);
}

#initialize_copy(orig) ⇒ Object

:nodoc:



1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
# File 're.c', line 1056

static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
    struct rmatch *rm;

    if (!OBJ_INIT_COPY(obj, orig)) return obj;

    RMATCH(obj)->str = RMATCH(orig)->str;
    RMATCH(obj)->regexp = RMATCH(orig)->regexp;

    rm = RMATCH(obj)->rmatch;
    if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
	rb_memerror();

    if (RMATCH(orig)->rmatch->char_offset_num_allocated) {
        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
            rm->char_offset_num_allocated = rm->regs.num_regs;
        }
        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
               struct rmatch_offset, rm->regs.num_regs);
	RB_GC_GUARD(orig);
    }

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">

Returns:



2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
# File 're.c', line 2246

static VALUE
match_inspect(VALUE match)
{
    VALUE cname = rb_class_path(rb_obj_class(match));
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
    }
    else if (NIL_P(regexp)) {
        return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
			  cname, rb_reg_nth_match(0, match));
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP_PTR(regexp),
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_append(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1143
1144
1145
1146
1147
1148
# File 're.c', line 1143

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#named_capturesHash

Returns a Hash using named capture.

A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.

m = /(?<a>.)(?<b>.)/.match("01")
m.named_captures #=> {"a" => "0", "b" => "1"}

m = /(?<a>.)(?<b>.)?/.match("0")
m.named_captures #=> {"a" => "0", "b" => nil}

m = /(?<a>.)(?<a>.)/.match("01")
m.named_captures #=> {"a" => "1"}

m = /(?<a>x)|(?<a>y)/.match("x")
m.named_captures #=> {"a" => "x"}

Returns:



2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
# File 're.c', line 2172

static VALUE
match_named_captures(VALUE match)
{
    VALUE hash;
    struct MEMO *memo;

    match_check(match);
    if (NIL_P(RMATCH(match)->regexp))
	return rb_hash_new();

    hash = rb_hash_new();
    memo = MEMO_NEW(hash, match, 0);

    onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);

    return hash;
}

#namesArray

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]

Returns:



1122
1123
1124
1125
1126
1127
1128
1129
# File 're.c', line 1122

static VALUE
match_names(VALUE match)
{
    match_check(match);
    if (NIL_P(RMATCH(match)->regexp))
	return rb_ary_new_capa(0);
    return rb_reg_names(RMATCH(match)->regexp);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]

Returns:



1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
# File 're.c', line 1203

static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:



1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
# File 're.c', line 1781

VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:



1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
# File 're.c', line 1755

VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    return str;
}

#regexpRegexp

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/

Returns:



1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
# File 're.c', line 1094

static VALUE
match_regexp(VALUE match)
{
    VALUE regexp;
    match_check(match);
    regexp = RMATCH(match)->regexp;
    if (NIL_P(regexp)) {
	VALUE str = rb_reg_nth_match(0, match);
	regexp = rb_reg_regcomp(rb_reg_quote(str));
	RMATCH(match)->regexp = regexp;
    }
    return regexp;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1143
1144
1145
1146
1147
1148
# File 're.c', line 1143

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:



2200
2201
2202
2203
2204
2205
# File 're.c', line 2200

static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;	/* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:



1887
1888
1889
1890
1891
# File 're.c', line 1887

static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:



2111
2112
2113
2114
2115
2116
2117
2118
2119
# File 're.c', line 2111

static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    return str;
}

#values_at(index, ...) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
m.to_a               #=> ["1 + 2", "1", "+", "2"]
m.values_at(:a, :b, :op) #=> ["1", "2", "+"]

Returns:



2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
# File 're.c', line 2074

static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    VALUE result;
    int i;

    match_check(match);
    result = rb_ary_new2(argc);

    for (i=0; i<argc; i++) {
	if (FIXNUM_P(argv[i])) {
	    rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
	}
	else {
	    int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
	    if (num >= 0) {
		rb_ary_push(result, rb_reg_nth_match(num, match));
	    }
	    else {
		match_ary_aref(match, argv[i], result);
	    }
	}
    }
    return result;
}