Class: MatchData

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp.last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on.

Instance Method Summary collapse

Instance Method Details

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)


2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
# File 're.c', line 2736

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?

Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"

Overloads:



1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
# File 're.c', line 1787

static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, rest, re;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (NIL_P(rest)) {
	if (FIXNUM_P(idx)) {
	    if (FIX2INT(idx) >= 0) {
		return rb_reg_nth_match(FIX2INT(idx), match);
	    }
	}
	else {
	    const char *p;
	    int num;

	    switch (TYPE(idx)) {
	      case T_SYMBOL:
		idx = rb_sym2str(idx);
		/* fall through */
	      case T_STRING:
		p = StringValuePtr(idx);
		re = RMATCH(match)->regexp;
		if (NIL_P(re) || !rb_enc_compatible(RREGEXP(re)->src, idx) ||
		    (num = name_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp,
						  p, p + RSTRING_LEN(idx))) < 1) {
		    name_to_backref_error(idx);
		}
		return rb_reg_nth_match(num, match);

	      default:
		break;
	    }
	}
    }

    return rb_ary_aref(argc, argv, match_to_a(match));
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2

Returns:



1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
# File 're.c', line 1176

static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:



1739
1740
1741
1742
1743
# File 're.c', line 1739

static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3

Returns:



1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
# File 're.c', line 1211

static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)

Returns:

  • (Boolean)


2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
# File 're.c', line 2736

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#hashInteger

Produce a hash based on the target string, regexp and matched positions of this matchdata.

See also Object#hash.

Returns:



2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
# File 're.c', line 2712

static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));

    rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return LONG2FIX(hashval);
}

#initialize_copy(orig) ⇒ Object

:nodoc:



985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
# File 're.c', line 985

static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
    struct rmatch *rm;

    if (!OBJ_INIT_COPY(obj, orig)) return obj;

    RMATCH(obj)->str = RMATCH(orig)->str;
    RMATCH(obj)->regexp = RMATCH(orig)->regexp;

    rm = RMATCH(obj)->rmatch;
    if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
	rb_memerror();

    if (!RMATCH(orig)->rmatch->char_offset_updated) {
        rm->char_offset_updated = 0;
    }
    else {
        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
            rm->char_offset_num_allocated = rm->regs.num_regs;
        }
        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
               struct rmatch_offset, rm->regs.num_regs);
        rm->char_offset_updated = 1;
	RB_GC_GUARD(orig);
    }

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">

Returns:



1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
# File 're.c', line 1939

static VALUE
match_inspect(VALUE match)
{
    VALUE cname = rb_class_path(rb_obj_class(match));
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
    }
    else if (NIL_P(regexp)) {
        return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
			  cname, rb_reg_nth_match(0, match));
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP(regexp)->ptr,
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_append(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1074
1075
1076
1077
1078
1079
# File 're.c', line 1074

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#namesArray

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]

Returns:



1055
1056
1057
1058
1059
1060
# File 're.c', line 1055

static VALUE
match_names(VALUE match)
{
    match_check(match);
    return rb_reg_names(RMATCH(match)->regexp);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]

Returns:



1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
# File 're.c', line 1140

static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:



1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
# File 're.c', line 1607

VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:



1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
# File 're.c', line 1580

VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#regexpRegexp

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/

Returns:



1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
# File 're.c', line 1027

static VALUE
match_regexp(VALUE match)
{
    VALUE regexp;
    match_check(match);
    regexp = RMATCH(match)->regexp;
    if (NIL_P(regexp)) {
	VALUE str = rb_reg_nth_match(0, match);
	regexp = rb_reg_regcomp(rb_reg_quote(str));
	RMATCH(match)->regexp = regexp;
    }
    return regexp;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



1074
1075
1076
1077
1078
1079
# File 're.c', line 1074

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:



1893
1894
1895
1896
1897
1898
# File 're.c', line 1893

static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;	/* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:



1720
1721
1722
1723
1724
# File 're.c', line 1720

static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:



1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
# File 're.c', line 1870

static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}

#values_at([index]) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

Returns:



1849
1850
1851
1852
1853
1854
1855
1856
1857
# File 're.c', line 1849

static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    struct re_registers *regs;

    match_check(match);
    regs = RMATCH_REGS(match);
    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}