Class: MatchData

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

MatchData encapsulates the result of matching a Regexp against string. It is returned by Regexp#match and String#match, and also stored in a global variable returned by Regexp.last_match.

Usage:

url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
m = url.match(/(\d\.?)+/)   # => #<MatchData "2.5.0" 1:"0">
m.string                    # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
m.regexp                    # => /(\d\.?)+/
# entire matched substring:
m[0]                        # => "2.5.0"

# Working with unnamed captures
m = url.match(%r{([^/]+)/([^/]+)\.html$})
m.captures                  # => ["2.5.0", "MatchData"]
m[1]                        # => "2.5.0"
m.values_at(1, 2)           # => ["2.5.0", "MatchData"]

# Working with named captures
m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
m.captures                  # => ["2.5.0", "MatchData"]
m.named_captures            # => {"version"=>"2.5.0", "module"=>"MatchData"}
m[:version]                 # => "2.5.0"
m.values_at(:version, :module)
                            # => ["2.5.0", "MatchData"]
# Numerical indexes are working, too
m[1]                        # => "2.5.0"
m.values_at(1, 2)           # => ["2.5.0", "MatchData"]

Global variables equivalence

Parts of last MatchData (returned by Regexp.last_match) are also aliased as global variables:

  • $~ is Regexp.last_match;

  • $& is Regexp.last_match[0];

  • $1, $2, and so on are Regexp.last_match[i] (captures by number);

  • $` is Regexp.last_match.pre_match;

  • $' is Regexp.last_match.post_match;

  • $+ is Regexp.last_match[-1] (the last capture).

See also “Special global variables” section in Regexp documentation.

Instance Method Summary collapse

Instance Method Details

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)


3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
# File 're.c', line 3075

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;

    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?

Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"

Overloads:



2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
# File 're.c', line 2007

static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, length;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &length);

    if (NIL_P(length)) {
	if (FIXNUM_P(idx)) {
	    return rb_reg_nth_match(FIX2INT(idx), match);
	}
	else {
	    int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
	    if (num >= 0) {
		return rb_reg_nth_match(num, match);
	    }
	    else {
		return match_ary_aref(match, idx, Qnil);
	    }
	}
    }
    else {
	long beg = NUM2LONG(idx);
	long len = NUM2LONG(length);
	long num_regs = RMATCH_REGS(match)->num_regs;
	if (len < 0) {
	    return Qnil;
	}
	if (beg < 0) {
	    beg += num_regs;
	    if (beg < 0) return Qnil;
	}
	else if (beg > num_regs) {
	    return Qnil;
	}
	else if (beg+len > num_regs) {
	    len = num_regs - beg;
	}
	return match_ary_subseq(match, beg, len, Qnil);
    }
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2

Returns:



1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
# File 're.c', line 1232

static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:



1899
1900
1901
1902
1903
# File 're.c', line 1899

static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3

Returns:



1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
# File 're.c', line 1267

static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)


3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
# File 're.c', line 3075

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;

    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#hashInteger

Produce a hash based on the target string, regexp and matched positions of this matchdata.

See also Object#hash.

Returns:



3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
# File 're.c', line 3049

static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval;

    match_check(match);
    hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
    hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return ST2FIX(hashval);
}

#initialize_copy(orig) ⇒ Object

:nodoc:



1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
# File 're.c', line 1049

static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
    struct rmatch *rm;

    if (!OBJ_INIT_COPY(obj, orig)) return obj;

    RMATCH(obj)->str = RMATCH(orig)->str;
    RMATCH(obj)->regexp = RMATCH(orig)->regexp;

    rm = RMATCH(obj)->rmatch;
    if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
	rb_memerror();

    if (RMATCH(orig)->rmatch->char_offset_num_allocated) {
        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
            rm->char_offset_num_allocated = rm->regs.num_regs;
        }
        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
               struct rmatch_offset, rm->regs.num_regs);
	RB_GC_GUARD(orig);
    }

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">

Returns:



2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
# File 're.c', line 2239

static VALUE
match_inspect(VALUE match)
{
    VALUE cname = rb_class_path(rb_obj_class(match));
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
    }
    else if (NIL_P(regexp)) {
        return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
			  cname, rb_reg_nth_match(0, match));
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP_PTR(regexp),
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_append(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1136
1137
1138
1139
1140
1141
# File 're.c', line 1136

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#named_capturesHash

Returns a Hash using named capture.

A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.

m = /(?<a>.)(?<b>.)/.match("01")
m.named_captures #=> {"a" => "0", "b" => "1"}

m = /(?<a>.)(?<b>.)?/.match("0")
m.named_captures #=> {"a" => "0", "b" => nil}

m = /(?<a>.)(?<a>.)/.match("01")
m.named_captures #=> {"a" => "1"}

m = /(?<a>x)|(?<a>y)/.match("x")
m.named_captures #=> {"a" => "x"}

Returns:



2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
# File 're.c', line 2165

static VALUE
match_named_captures(VALUE match)
{
    VALUE hash;
    struct MEMO *memo;

    match_check(match);
    if (NIL_P(RMATCH(match)->regexp))
	return rb_hash_new();

    hash = rb_hash_new();
    memo = MEMO_NEW(hash, match, 0);

    onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);

    return hash;
}

#namesArray

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]

Returns:



1115
1116
1117
1118
1119
1120
1121
1122
# File 're.c', line 1115

static VALUE
match_names(VALUE match)
{
    match_check(match);
    if (NIL_P(RMATCH(match)->regexp))
	return rb_ary_new_capa(0);
    return rb_reg_names(RMATCH(match)->regexp);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]

Returns:



1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
# File 're.c', line 1196

static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:



1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
# File 're.c', line 1774

VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:



1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
# File 're.c', line 1748

VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    return str;
}

#regexpRegexp

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/

Returns:



1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
# File 're.c', line 1087

static VALUE
match_regexp(VALUE match)
{
    VALUE regexp;
    match_check(match);
    regexp = RMATCH(match)->regexp;
    if (NIL_P(regexp)) {
	VALUE str = rb_reg_nth_match(0, match);
	regexp = rb_reg_regcomp(rb_reg_quote(str));
	RMATCH(match)->regexp = regexp;
    }
    return regexp;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1136
1137
1138
1139
1140
1141
# File 're.c', line 1136

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:



2193
2194
2195
2196
2197
2198
# File 're.c', line 2193

static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;	/* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:



1880
1881
1882
1883
1884
# File 're.c', line 1880

static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:



2104
2105
2106
2107
2108
2109
2110
2111
2112
# File 're.c', line 2104

static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    return str;
}

#values_at(index, ...) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
m.to_a               #=> ["1 + 2", "1", "+", "2"]
m.values_at(:a, :b, :op) #=> ["1", "2", "+"]

Returns:



2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
# File 're.c', line 2067

static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    VALUE result;
    int i;

    match_check(match);
    result = rb_ary_new2(argc);

    for (i=0; i<argc; i++) {
	if (FIXNUM_P(argv[i])) {
	    rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
	}
	else {
	    int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
	    if (num >= 0) {
		rb_ary_push(result, rb_reg_nth_match(num, match));
	    }
	    else {
		match_ary_aref(match, argv[i], result);
	    }
	}
    }
    return result;
}