Class: MatchData

Inherits:
Object show all
Defined in:
re.c

Overview

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp#last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on. Matchdata is also known as MatchingData.

Instance Method Summary collapse

Instance Method Details

#[](i) ⇒ Object #[](start, length) ⇒ Array #[](range) ⇒ Array

Match Reference---MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

Overloads:



# File 're.c'

/*
 *  call-seq:
 *     mtch[i]               => obj
 *     mtch[start, length]   => array
 *     mtch[range]           => array
 *  
 *  Match Reference---<code>MatchData</code> acts as an array, and may be
 *  accessed using the normal array indexing techniques.  <i>mtch</i>[0] is
 *  equivalent to the special variable <code>$&</code>, and returns the entire
 *  matched string.  <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
 *  of the matched backreferences (portions of the pattern between parentheses).
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m[0]       #=> "HX1138"
 *     m[1, 2]    #=> ["H", "X"]
 *     m[1..3]    #=> ["H", "X", "113"]
 *     m[-3, 2]   #=> ["X", "113"]
 */

static VALUE
match_aref(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    VALUE idx, rest;

    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
    return rb_ary_aref(argc, argv, match_to_a(match));
    }
    return rb_reg_nth_match(FIX2INT(idx), match);
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)   #=> 1
m.begin(2)   #=> 2

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.begin(n)   => integer
 *  
 *  Returns the offset of the start of the <em>n</em>th element of the match
 *  array in the string.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.begin(0)   #=> 1
 *     m.begin(2)   #=> 2
 */

static VALUE
match_begin(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
    rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
    return Qnil;

    return INT2FIX(RMATCH(match)->regs->beg[i]);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.captures   => array
 *
 *  Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
 *
 *     f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
 *     f1    #=> "H"
 *     f2    #=> "X"
 *     f3    #=> "113"
 *     f4    #=> "8"
 */
static VALUE
match_captures(match)
    VALUE match;
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)   #=> 7
m.end(2)   #=> 3

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.end(n)   => integer
 *  
 *  Returns the offset of the character immediately following the end of the
 *  <em>n</em>th element of the match array in the string.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.end(0)   #=> 7
 *     m.end(2)   #=> 3
 */

static VALUE
match_end(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
    rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
    return Qnil;

    return INT2FIX(RMATCH(match)->regs->end[i]);
}

#initialize_copyObject

:nodoc:



# File 're.c'

/* :nodoc: */
static VALUE
match_init_copy(obj, orig)
    VALUE obj, orig;
{
    if (obj == orig) return obj;

    if (!rb_obj_is_instance_of(orig, rb_obj_class(obj))) {
    rb_raise(rb_eTypeError, "wrong argument class");
    }
    RMATCH(obj)->str = RMATCH(orig)->str;
    re_free_registers(RMATCH(obj)->regs);
    RMATCH(obj)->regs->allocated = 0;
    re_copy_registers(RMATCH(obj)->regs, RMATCH(orig)->regs);

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

Returns:



# File 're.c'

/*
 * call-seq:
 *    mtch.inspect   => str
 *
 * Returns a printable version of <i>mtch</i>.
 *
 *     puts /.$/.match("foo").inspect
 *     #=> #<MatchData "o">
 *
 *     puts /(.)(.)(.)/.match("foo").inspect
 *     #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
 *
 *     puts /(.)(.)?(.)/.match("fo").inspect
 *     #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
 *
 */

static VALUE
match_inspect(VALUE match)
{
    const char *cname = rb_obj_classname(match);
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH(match)->regs;
    int num_regs = regs->num_regs;

    str = rb_str_buf_new2("#<");
    rb_str_buf_cat2(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            char buf[sizeof(i)*3+1];
            snprintf(buf, sizeof(buf), "%d", i);
            rb_str_buf_cat2(str, buf);
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



# File 're.c'

/*
 *  call-seq:
 *     mtch.length   => integer
 *     mtch.size     => integer
 *  
 *  Returns the number of elements in the match array.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.length   #=> 5
 *     m.size     #=> 5
 */

static VALUE
match_size(match)
    VALUE match;
{
    match_check(match);
    return INT2FIX(RMATCH(match)->regs->num_regs);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)   #=> [1, 7]
m.offset(4)   #=> [6, 7]

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.offset(n)   => array
 *  
 *  Returns a two-element array containing the beginning and ending offsets of
 *  the <em>n</em>th match.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.offset(0)   #=> [1, 7]
 *     m.offset(4)   #=> [6, 7]
 */

static VALUE
match_offset(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
    rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
    return rb_assoc_new(Qnil, Qnil);

    return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]),
            INT2FIX(RMATCH(match)->regs->end[i]));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.post_match   => str
 *  
 *  Returns the portion of the original string after the current match.
 *  Equivalent to the special variable <code>$'</code>.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
 *     m.post_match   #=> ": The Movie"
 */

VALUE
rb_reg_match_post(match)
    VALUE match;
{
    VALUE str;
    long pos;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    if (RMATCH(match)->BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = RMATCH(match)->END(0);
    str = rb_str_substr(str, pos, RSTRING(str)->len - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.pre_match   => str
 *  
 *  Returns the portion of the original string before the current match.
 *  Equivalent to the special variable <code>$`</code>.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.pre_match   #=> "T"
 */

VALUE
rb_reg_match_pre(match)
    VALUE match;
{
    VALUE str;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    if (RMATCH(match)->BEG(0) == -1) return Qnil;
    str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#select {|obj| ... } ⇒ Array

Returns an array containing match strings for which block gives true. MatchData#select will be removed from Ruby 1.9.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
p m.select{|x| /X/ =~ x}   #=> ["HX1138", "X"]

Yields:

  • (obj)

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.select{|obj| block}   => array
 *  
 *  Returns an array containing match strings for which <em>block</em>
 *  gives <code>true</code>.  MatchData#select will be removed from Ruby 1.9.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
 *     p m.select{|x| /X/ =~ x}   #=> ["HX1138", "X"]
 */

static VALUE
match_select(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    if (argc > 0) {
    rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
    }
    else {
    struct re_registers *regs;
    VALUE target;
    VALUE result = rb_ary_new();
    int i;
    int taint = OBJ_TAINTED(match);

    match_check(match);
    regs = RMATCH(match)->regs;
    target = RMATCH(match)->str;

    for (i=0; i<regs->num_regs; i++) {
        VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]);
        if (taint) OBJ_TAINT(str);
        if (RTEST(rb_yield(str))) {
        rb_ary_push(result, str);
        }
    }
    return result;
    }
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:



# File 're.c'

/*
 *  call-seq:
 *     mtch.length   => integer
 *     mtch.size     => integer
 *  
 *  Returns the number of elements in the match array.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.length   #=> 5
 *     m.size     #=> 5
 */

static VALUE
match_size(match)
    VALUE match;
{
    match_check(match);
    return INT2FIX(RMATCH(match)->regs->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.string   => str
 *  
 *  Returns a frozen copy of the string passed in to <code>match</code>.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.string   #=> "THX1138."
 */

static VALUE
match_string(match)
    VALUE match;
{
    match_check(match);
    return RMATCH(match)->str;  /* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.to_a   => anArray
 *  
 *  Returns the array of matches.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.to_a   #=> ["HX1138", "H", "X", "113", "8"]
 *     
 *  Because <code>to_a</code> is called when expanding
 *  <code>*</code><em>variable</em>, there's a useful assignment
 *  shortcut for extracting matched fields. This is slightly slower than
 *  accessing the fields directly (as an intermediate array is
 *  generated).
 *     
 *     all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
 *     all   #=> "HX1138"
 *     f1    #=> "H"
 *     f2    #=> "X"
 *     f3    #=> "113"
 */

static VALUE
match_to_a(match)
    VALUE match;
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.to_s   => str
 *  
 *  Returns the entire matched string.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
 *     m.to_s   #=> "HX1138"
 */

static VALUE
match_to_s(match)
    VALUE match;
{
    VALUE str = rb_reg_last_match(match);

    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}

#values_at([index]) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

Returns:



# File 're.c'

/*
 *  call-seq:
 *     mtch.values_at([index]*)   => array
 *  
 *  Uses each <i>index</i> to access the matching values, returning an array of
 *  the corresponding matches.
 *     
 *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
 *     m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
 *     m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]
 */

static VALUE
match_values_at(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    match_check(match);
    return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry);
}