Class: MatchData

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp.last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on.

Instance Method Summary collapse

Instance Method Details

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)

2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
# File 're.c', line 2671

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?

Match Reference – MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"

Overloads:


1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
# File 're.c', line 1737

static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, rest;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (NIL_P(rest)) {
	if (FIXNUM_P(idx)) {
	    if (FIX2INT(idx) >= 0) {
		return rb_reg_nth_match(FIX2INT(idx), match);
	    }
	}
	else {
	    const char *p;
	    int num;

	    switch (TYPE(idx)) {
	      case T_SYMBOL:
		p = rb_id2name(SYM2ID(idx));
		goto name_to_backref;
		break;
	      case T_STRING:
		p = StringValuePtr(idx);

	      name_to_backref:
		num = name_to_backref_number(RMATCH_REGS(match),
					     RMATCH(match)->regexp, p, p + strlen(p));
		return rb_reg_nth_match(num, match);
		break;

	      default:
		break;
	    }
	}
    }

    return rb_ary_aref(argc, argv, match_to_a(match));
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2

Returns:


1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
# File 're.c', line 1159

static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:


1685
1686
1687
1688
1689
# File 're.c', line 1685

static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3

Returns:


1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
# File 're.c', line 1194

static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}

#==(mtch2) ⇒ Boolean #eql?(mtch2) ⇒ Boolean

Equality—Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Overloads:

  • #==(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(mtch2) ⇒ Boolean

    Returns:

    • (Boolean)

Returns:

  • (Boolean)

2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
# File 're.c', line 2671

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#hashInteger

Produce a hash based on the target string, regexp and matched positions of this matchdata.

Returns:


2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
# File 're.c', line 2647

static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));

    rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return LONG2FIX(hashval);
}

#initialize_copyObject

:nodoc:


977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
# File 're.c', line 977

static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
    struct rmatch *rm;

    if (!OBJ_INIT_COPY(obj, orig)) return obj;

    RMATCH(obj)->str = RMATCH(orig)->str;
    RMATCH(obj)->regexp = RMATCH(orig)->regexp;

    rm = RMATCH(obj)->rmatch;
    onig_region_copy(&rm->regs, RMATCH_REGS(orig));

    if (!RMATCH(orig)->rmatch->char_offset_updated) {
        rm->char_offset_updated = 0;
    }
    else {
        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
            rm->char_offset_num_allocated = rm->regs.num_regs;
        }
        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
               struct rmatch_offset, rm->regs.num_regs);
        rm->char_offset_updated = 1;
    }

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">

Returns:


1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
# File 're.c', line 1889

static VALUE
match_inspect(VALUE match)
{
    const char *cname = rb_obj_classname(match);
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%s:%p>", cname, (void*)match);
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP(regexp)->ptr,
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_buf_cat2(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:


1057
1058
1059
1060
1061
1062
# File 're.c', line 1057

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#namesArray

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]

Returns:


1038
1039
1040
1041
1042
1043
# File 're.c', line 1038

static VALUE
match_names(VALUE match)
{
    match_check(match);
    return rb_reg_names(RMATCH(match)->regexp);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]

Returns:


1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
# File 're.c', line 1123

static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:


1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
# File 're.c', line 1553

VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:


1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
# File 're.c', line 1526

VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#regexpRegexp

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/

Returns:


1017
1018
1019
1020
1021
1022
# File 're.c', line 1017

static VALUE
match_regexp(VALUE match)
{
    match_check(match);
    return RMATCH(match)->regexp;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:


1057
1058
1059
1060
1061
1062
# File 're.c', line 1057

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:


1843
1844
1845
1846
1847
1848
# File 're.c', line 1843

static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;	/* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:


1666
1667
1668
1669
1670
# File 're.c', line 1666

static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:


1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
# File 're.c', line 1820

static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}

#values_at([index]) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

Returns:


1799
1800
1801
1802
1803
1804
1805
1806
1807
# File 're.c', line 1799

static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    struct re_registers *regs;

    match_check(match);
    regs = RMATCH_REGS(match);
    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}