Class: Regexp

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

A Regexp holds a regular expression, used to match a pattern against strings. Regexps are created using the /.../ and %r{...} literals, and by the Regexp::new constructor.

:include: doc/regexp.rdoc

Constant Summary collapse

IGNORECASE =

see Regexp.options and Regexp.new

INT2FIX(ONIG_OPTION_IGNORECASE)
EXTENDED =

see Regexp.options and Regexp.new

INT2FIX(ONIG_OPTION_EXTEND)
MULTILINE =

see Regexp.options and Regexp.new

INT2FIX(ONIG_OPTION_MULTILINE)
FIXEDENCODING =

see Regexp.options and Regexp.new

INT2FIX(ARG_ENCODING_FIXED)
NOENCODING =

see Regexp.options and Regexp.new

INT2FIX(ARG_ENCODING_NONE)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(string, [options [, kcode]]) ⇒ Regexp #new(regexp) ⇒ Regexp #compile(string, [options [, kcode]]) ⇒ Regexp #compile(regexp) ⇒ Regexp

Constructs a new regular expression from pattern, which can be either a String or a Regexp (in which case that regexp's options are propagated), and new options may not be specified (a change as of Ruby 1.8).

If options is a Fixnum, it should be one or more of the constants Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE, or-ed together. Otherwise, if options is not nil or false, the regexp will be case insensitive.

When the kcode parameter is `n' or `N' sets the regexp no encoding. It means that the regexp is for binary strings.

r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
r2 = Regexp.new('cat', true)     #=> /cat/i
r3 = Regexp.new(r2)              #=> /cat/i
r4 = Regexp.new('dog', Regexp::EXTENDED | Regexp::IGNORECASE) #=> /dog/ix

Overloads:

  • #new(string, [options [, kcode]]) ⇒ Regexp
  • #new(regexp) ⇒ Regexp
  • #compile(string, [options [, kcode]]) ⇒ Regexp
  • #compile(regexp) ⇒ Regexp

2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
# File 're.c', line 2934

static VALUE
rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
{
    onig_errmsg_buffer err = "";
    int flags = 0;
    VALUE str;
    rb_encoding *enc;
    const char *ptr;
    long len;

    rb_check_arity(argc, 1, 3);
    if (RB_TYPE_P(argv[0], T_REGEXP)) {
	VALUE re = argv[0];

	if (argc > 1) {
	    rb_warn("flags ignored");
	}
	rb_reg_check(re);
	flags = rb_reg_options(re);
	ptr = RREGEXP_SRC_PTR(re);
	len = RREGEXP_SRC_LEN(re);
	enc = rb_enc_get(re);
	if (rb_reg_initialize(self, ptr, len, enc, flags, err, NULL, 0)) {
	    str = rb_enc_str_new(ptr, len, enc);
	    rb_reg_raise_str(str, flags, err);
	}
    }
    else {
	if (argc >= 2) {
	    if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
	    else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
	}
	enc = 0;
	if (argc == 3 && !NIL_P(argv[2])) {
	    char *kcode = StringValuePtr(argv[2]);
	    if (kcode[0] == 'n' || kcode[0] == 'N') {
		enc = rb_ascii8bit_encoding();
		flags |= ARG_ENCODING_NONE;
	    }
	    else {
		rb_warn("encoding option is ignored - %s", kcode);
	    }
	}
	str = argv[0];
	ptr = StringValuePtr(str);
	if (enc
	    ? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
	    : rb_reg_initialize_str(self, str, flags, err, NULL, 0)) {
	    rb_reg_raise_str(str, flags, err);
	}
    }
    return self;
}

Class Method Details

.compileObject

Synonym for Regexp.new

.escape(str) ⇒ String .quote(str) ⇒ String

Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.new(Regexp.escape(str))=~str will be true.

Regexp.escape('\*?{}.')   #=> \\\*\?\{\}\.

Overloads:


3100
3101
3102
3103
3104
# File 're.c', line 3100

static VALUE
rb_reg_s_quote(VALUE c, VALUE str)
{
    return rb_reg_quote(reg_operand(str, TRUE));
}

.last_matchMatchData .last_match(n) ⇒ String

The first form returns the MatchData object generated by the last successful pattern match. Equivalent to reading the special global variable $~ (see Special global variables in Regexp for details).

The second form returns the nth field in this MatchData object. n can be a string or symbol to reference a named capture.

Note that the last_match is local to the thread and method scope of the method that did the pattern match.

/c(.)t/ =~ 'cat'        #=> 0
Regexp.last_match       #=> #<MatchData "cat" 1:"a">
Regexp.last_match(0)    #=> "cat"
Regexp.last_match(1)    #=> "a"
Regexp.last_match(2)    #=> nil

/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
Regexp.last_match       #=> #<MatchData "var = val" lhs:"var" rhs:"val">
Regexp.last_match(:lhs) #=> "var"
Regexp.last_match(:rhs) #=> "val"

Overloads:


3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
# File 're.c', line 3504

static VALUE
rb_reg_s_last_match(int argc, VALUE *argv)
{
    VALUE nth;

    if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) {
        VALUE match = rb_backref_get();
        int n;
        if (NIL_P(match)) return Qnil;
        n = match_backref_number(match, nth);
	return rb_reg_nth_match(n, match);
    }
    return match_getter();
}

.escape(str) ⇒ String .quote(str) ⇒ String

Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.new(Regexp.escape(str))=~str will be true.

Regexp.escape('\*?{}.')   #=> \\\*\?\{\}\.

Overloads:


3100
3101
3102
3103
3104
# File 're.c', line 3100

static VALUE
rb_reg_s_quote(VALUE c, VALUE str)
{
    return rb_reg_quote(reg_operand(str, TRUE));
}

.try_convert(obj) ⇒ nil

Try to convert obj into a Regexp, using to_regexp method. Returns converted regexp or nil if obj cannot be converted for any reason.

Regexp.try_convert(/re/)         #=> /re/
Regexp.try_convert("re")         #=> nil

o = Object.new
Regexp.try_convert(o)            #=> nil
def o.to_regexp() /foo/ end
Regexp.try_convert(o)            #=> /foo/

Returns:

  • (nil)

3141
3142
3143
3144
3145
# File 're.c', line 3141

static VALUE
rb_reg_s_try_convert(VALUE dummy, VALUE re)
{
    return rb_check_regexp_type(re);
}

.union(pat1, pat2, ...) ⇒ Regexp .union(pats_ary) ⇒ Regexp

Return a Regexp object that is the union of the given patterns, i.e., will match any of its parts. The patterns can be Regexp objects, in which case their options will be preserved, or Strings. If no patterns are given, returns /(?!)/. The behavior is unspecified if any given pattern contains capture.

Regexp.union                         #=> /(?!)/
Regexp.union("penzance")             #=> /penzance/
Regexp.union("a+b*c")                #=> /a\+b\*c/
Regexp.union("skiing", "sledding")   #=> /skiing|sledding/
Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/

Note: the arguments for ::union will try to be converted into a regular expression literal via #to_regexp.

Overloads:


3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
# File 're.c', line 3282

static VALUE
rb_reg_s_union_m(VALUE self, VALUE args)
{
    VALUE v;
    if (RARRAY_LEN(args) == 1 &&
        !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
        return rb_reg_s_union(self, v);
    }
    return rb_reg_s_union(self, args);
}

Instance Method Details

#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean

Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold? values are the same.

/abc/  == /abc/x   #=> false
/abc/  == /abc/i   #=> false
/abc/  == /abc/u   #=> false
/abc/u == /abc/n   #=> false

Overloads:

  • #==(other_rxp) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(other_rxp) ⇒ Boolean

    Returns:

    • (Boolean)

2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
# File 're.c', line 2623

static VALUE
rb_reg_equal(VALUE re1, VALUE re2)
{
    if (re1 == re2) return Qtrue;
    if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
    rb_reg_check(re1); rb_reg_check(re2);
    if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
    if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
    if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
    if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
    if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
	return Qtrue;
    }
    return Qfalse;
}

#===(str) ⇒ Boolean

Case Equality—Used in case statements.

a = "HELLO"
case a
when /^[a-z]*$/; print "Lower case\n"
when /^[A-Z]*$/; print "Upper case\n"
else;            print "Mixed case\n"
end
#=> "Upper case"

Following a regular expression literal with the #=== operator allows you to compare against a String.

/^[a-z]*$/ === “HELLO” #=> false /^[A-Z]*$/ === “HELLO” #=> true

Returns:

  • (Boolean)

2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
# File 're.c', line 2799

VALUE
rb_reg_eqq(VALUE re, VALUE str)
{
    long start;

    str = reg_operand(str, FALSE);
    if (NIL_P(str)) {
	rb_backref_set(Qnil);
	return Qfalse;
    }
    start = rb_reg_search(re, str, 0, 0);
    if (start < 0) {
	return Qfalse;
    }
    return Qtrue;
}

#=~(str) ⇒ Integer?

Match—Matches rxp against str.

/at/ =~ "input data"   #=> 7
/ax/ =~ "input data"   #=> nil

If =~ is used with a regexp literal with named captures, captured strings (or nil) is assigned to local variables named by the capture names.

/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = y  "
p lhs    #=> "x"
p rhs    #=> "y"

If it is not matched, nil is assigned for the variables.

/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = "
p lhs    #=> nil
p rhs    #=> nil

This assignment is implemented in the Ruby parser. The parser detects 'regexp-literal =~ expression' for the assignment. The regexp must be a literal without interpolation and placed at left hand side.

The assignment does not occur if the regexp is not a literal.

re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
re =~ "  x = y  "
p lhs    # undefined local variable
p rhs    # undefined local variable

A regexp interpolation, #{}, also disables the assignment.

rhs_pat = /(?<rhs>\w+)/
/(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
p lhs    # undefined local variable

The assignment does not occur if the regexp is placed at the right hand side.

"  x = y  " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
p lhs, rhs # undefined local variable

Returns:


2769
2770
2771
2772
2773
2774
2775
2776
# File 're.c', line 2769

VALUE
rb_reg_match(VALUE re, VALUE str)
{
    long pos = reg_match_pos(re, &str, 0);
    if (pos < 0) return Qnil;
    pos = rb_str_sublen(str, pos);
    return LONG2FIX(pos);
}

#casefold?Boolean

Returns the value of the case-insensitive flag.

/a/.casefold?           #=> false
/a/i.casefold?          #=> true
/(?i:a)/.casefold?      #=> false

Returns:

  • (Boolean)

Returns:

  • (Boolean)

686
687
688
689
690
691
692
# File 're.c', line 686

static VALUE
rb_reg_casefold_p(VALUE re)
{
    rb_reg_check(re);
    if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
    return Qfalse;
}

#encodingEncoding

Returns the Encoding object that represents the encoding of obj.

Returns:


923
924
925
926
927
928
929
930
931
# File 'encoding.c', line 923

VALUE
rb_obj_encoding(VALUE obj)
{
    int idx = rb_enc_get_index(obj);
    if (idx < 0) {
	rb_raise(rb_eTypeError, "unknown encoding");
    }
    return rb_enc_from_encoding_index(idx);
}

#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean

Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold? values are the same.

/abc/  == /abc/x   #=> false
/abc/  == /abc/i   #=> false
/abc/  == /abc/u   #=> false
/abc/u == /abc/n   #=> false

Overloads:

  • #==(other_rxp) ⇒ Boolean

    Returns:

    • (Boolean)
  • #eql?(other_rxp) ⇒ Boolean

    Returns:

    • (Boolean)

Returns:

  • (Boolean)

2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
# File 're.c', line 2623

static VALUE
rb_reg_equal(VALUE re1, VALUE re2)
{
    if (re1 == re2) return Qtrue;
    if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
    rb_reg_check(re1); rb_reg_check(re2);
    if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
    if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
    if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
    if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
    if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
	return Qtrue;
    }
    return Qfalse;
}

#fixed_encoding?Boolean

Returns false if rxp is applicable to a string with any ASCII compatible encoding. Returns true otherwise.

r = /a/
r.fixed_encoding?                               #=> false
r =~ "\u{6666} a"                               #=> 2
r =~ "\xa1\xa2 a".force_encoding("euc-jp")      #=> 2
r =~ "abc".force_encoding("euc-jp")             #=> 0

r = /a/u
r.fixed_encoding?                               #=> true
r.encoding                                      #=> #<Encoding:UTF-8>
r =~ "\u{6666} a"                               #=> 2
r =~ "\xa1\xa2".force_encoding("euc-jp")        #=> ArgumentError
r =~ "abc".force_encoding("euc-jp")             #=> 0

r = /\u{6666}/
r.fixed_encoding?                               #=> true
r.encoding                                      #=> #<Encoding:UTF-8>
r =~ "\u{6666} a"                               #=> 0
r =~ "\xa1\xa2".force_encoding("euc-jp")        #=> ArgumentError
r =~ "abc".force_encoding("euc-jp")             #=> nil

Returns:

  • (Boolean)

Returns:

  • (Boolean)

1248
1249
1250
1251
1252
1253
1254
1255
# File 're.c', line 1248

static VALUE
rb_reg_fixed_encoding_p(VALUE re)
{
    if (FL_TEST(re, KCODE_FIXED))
        return Qtrue;
    else
        return Qfalse;
}

#hashFixnum

Produce a hash based on the text and options of this regular expression.

Returns:


2589
2590
2591
2592
2593
2594
# File 're.c', line 2589

static VALUE
rb_reg_hash(VALUE re)
{
    st_index_t hashval = reg_hash(re);
    return LONG2FIX(hashval);
}

#initialize_copyObject

:nodoc:


3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
# File 're.c', line 3294

static VALUE
rb_reg_init_copy(VALUE copy, VALUE re)
{
    onig_errmsg_buffer err = "";
    const char *s;
    long len;

    if (!OBJ_INIT_COPY(copy, re)) return copy;
    rb_reg_check(re);
    s = RREGEXP_SRC_PTR(re);
    len = RREGEXP_SRC_LEN(re);
    if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re),
		err, NULL, 0) != 0) {
	rb_reg_raise(s, len, err, re);
    }
    return copy;
}

#inspectString

Produce a nicely formatted string-version of rxp. Perhaps surprisingly, #inspect actually produces the more natural version of the string than #to_s.

/ab+c/ix.inspect        #=> "/ab+c/ix"

Returns:


490
491
492
493
494
495
496
497
# File 're.c', line 490

static VALUE
rb_reg_inspect(VALUE re)
{
    if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
        return rb_any_to_s(re);
    }
    return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
}

#match(str) ⇒ MatchData? #match(str, pos) ⇒ MatchData?

Returns a MatchData object describing the match, or nil if there was no match. This is equivalent to retrieving the value of the special variable $~ following a normal match. If the second parameter is present, it specifies the position in the string to begin the search.

/(.)(.)(.)/.match("abc")[2]   #=> "b"
/(.)(.)/.match("abc", 1)[2]   #=> "c"

If a block is given, invoke the block with MatchData if match succeed, so that you can write

pat.match(str) {|m| ...}

instead of

if m = pat.match(str)
  ...
end

The return value is a value from block execution in this case.

Overloads:


2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
# File 're.c', line 2876

static VALUE
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
{
    VALUE result, str, initpos;
    long pos;

    if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
	pos = NUM2LONG(initpos);
    }
    else {
	pos = 0;
    }

    pos = reg_match_pos(re, &str, pos);
    if (pos < 0) {
	rb_backref_set(Qnil);
	return Qnil;
    }
    result = rb_backref_get();
    rb_match_busy(result);
    if (!NIL_P(result) && rb_block_given_p()) {
	return rb_yield(result);
    }
    return result;
}

#named_capturesHash

Returns a hash representing information about named captures of rxp.

A key of the hash is a name of the named captures. A value of the hash is an array which is list of indexes of corresponding named captures.

/(?<foo>.)(?<bar>.)/.named_captures
#=> {"foo"=>[1], "bar"=>[2]}

/(?<foo>.)(?<foo>.)/.named_captures
#=> {"foo"=>[1, 2]}

If there are no named captures, an empty hash is returned.

/(.)(.)/.named_captures
#=> {}

Returns:


797
798
799
800
801
802
803
804
# File 're.c', line 797

static VALUE
rb_reg_named_captures(VALUE re)
{
    VALUE hash = rb_hash_new();
    rb_reg_check(re);
    onig_foreach_name(RREGEXP(re)->ptr, reg_named_captures_iter, (void*)hash);
    return hash;
}

#namesArray

Returns a list of names of captures as an array of strings.

/(?<foo>.)(?<bar>.)(?<baz>.)/.names
#=> ["foo", "bar", "baz"]

/(?<foo>.)(?<foo>.)/.names
#=> ["foo"]

/(.)(.)/.names
#=> []

Returns:


750
751
752
753
754
755
756
757
# File 're.c', line 750

static VALUE
rb_reg_names(VALUE re)
{
    VALUE ary = rb_ary_new();
    rb_reg_check(re);
    onig_foreach_name(RREGEXP(re)->ptr, reg_names_iter, (void*)ary);
    return ary;
}

#optionsFixnum

Returns the set of bits corresponding to the options used when creating this Regexp (see Regexp::new for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed to Regexp::new.

Regexp::IGNORECASE                  #=> 1
Regexp::EXTENDED                    #=> 2
Regexp::MULTILINE                   #=> 4

/cat/.options                       #=> 0
/cat/ix.options                     #=> 3
Regexp.new('cat', true).options     #=> 1
/\xa1\xa2/e.options                 #=> 16

r = /cat/ix
Regexp.new(r.source, r.options)     #=> /cat/ix

Returns:


718
719
720
721
722
723
# File 're.c', line 718

static VALUE
rb_reg_options_m(VALUE re)
{
    int options = rb_reg_options(re);
    return INT2NUM(options);
}

#sourceString

Returns the original string of the pattern.

/ab+c/ix.source #=> "ab+c"

Note that escape sequences are retained as is.

/\x20\+/.source  #=> "\\x20\\+"

Returns:


467
468
469
470
471
472
473
474
475
476
# File 're.c', line 467

static VALUE
rb_reg_source(VALUE re)
{
    VALUE str;

    rb_reg_check(re);
    str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re));
    if (OBJ_TAINTED(re)) OBJ_TAINT(str);
    return str;
}

#to_sString

Returns a string containing the regular expression and its options (using the (?opts:source) notation. This string can be fed back in to Regexp::new to a regular expression with the same semantics as the original. (However, Regexp#== may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows). Regexp#inspect produces a generally more readable version of rxp.

r1 = /ab+c/ix           #=> /ab+c/ix
s1 = r1.to_s            #=> "(?ix-m:ab+c)"
r2 = Regexp.new(s1)     #=> /(?ix-m:ab+c)/
r1 == r2                #=> false
r1.source               #=> "ab+c"
r2.source               #=> "(?ix-m:ab+c)"

Returns:


520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
# File 're.c', line 520

static VALUE
rb_reg_to_s(VALUE re)
{
    int options, opt;
    const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
    long len;
    const UChar* ptr;
    VALUE str = rb_str_buf_new2("(?");
    char optbuf[5];
    rb_encoding *enc = rb_enc_get(re);

    rb_reg_check(re);

    rb_enc_copy(str, re);
    options = RREGEXP(re)->ptr->options;
    ptr = (UChar*)RREGEXP_SRC_PTR(re);
    len = RREGEXP_SRC_LEN(re);
  again:
    if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
	int err = 1;
	ptr += 2;
	if ((len -= 2) > 0) {
	    do {
                opt = char_to_option((int )*ptr);
                if (opt != 0) {
                    options |= opt;
                }
                else {
                    break;
                }
		++ptr;
	    } while (--len > 0);
	}
	if (len > 1 && *ptr == '-') {
	    ++ptr;
	    --len;
	    do {
                opt = char_to_option((int )*ptr);
                if (opt != 0) {
                    options &= ~opt;
                }
                else {
                    break;
                }
		++ptr;
	    } while (--len > 0);
	}
	if (*ptr == ')') {
	    --len;
	    ++ptr;
	    goto again;
	}
	if (*ptr == ':' && ptr[len-1] == ')') {
	    Regexp *rp;
	    VALUE verbose = ruby_verbose;
	    ruby_verbose = Qfalse;

	    ++ptr;
	    len -= 2;
            err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
			   enc, OnigDefaultSyntax, NULL);
	    onig_free(rp);
	    ruby_verbose = verbose;
	}
	if (err) {
	    options = RREGEXP(re)->ptr->options;
	    ptr = (UChar*)RREGEXP_SRC_PTR(re);
	    len = RREGEXP_SRC_LEN(re);
	}
    }

    if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);

    if ((options & embeddable) != embeddable) {
	optbuf[0] = '-';
	option_to_str(optbuf + 1, ~options);
	rb_str_buf_cat2(str, optbuf);
    }

    rb_str_buf_cat2(str, ":");
    if (rb_enc_asciicompat(enc)) {
	rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
	rb_str_buf_cat2(str, ")");
    }
    else {
	const char *s, *e;
	char *paren;
	ptrdiff_t n;
	rb_str_buf_cat2(str, ")");
	rb_enc_associate(str, rb_usascii_encoding());
	str = rb_str_encode(str, rb_enc_from_encoding(enc), 0, Qnil);

	/* backup encoded ")" to paren */
	s = RSTRING_PTR(str);
	e = RSTRING_END(str);
	s = rb_enc_left_char_head(s, e-1, e, enc);
	n = e - s;
	paren = ALLOCA_N(char, n);
	memcpy(paren, s, n);
	rb_str_resize(str, RSTRING_LEN(str) - n);

	rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
	rb_str_buf_cat(str, paren, n);
    }
    rb_enc_copy(str, re);

    OBJ_INFECT(str, re);
    return str;
}

#~(rxp) ⇒ Integer?

Match—Matches rxp against the contents of $_. Equivalent to rxp =~ $_.

$_ = "input data"
~ /at/   #=> 7

Returns:


2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
# File 're.c', line 2828

VALUE
rb_reg_match2(VALUE re)
{
    long start;
    VALUE line = rb_lastline_get();

    if (!RB_TYPE_P(line, T_STRING)) {
	rb_backref_set(Qnil);
	return Qnil;
    }

    start = rb_reg_search(re, line, 0, 0);
    if (start < 0) {
	return Qnil;
    }
    start = rb_str_sublen(line, start);
    return LONG2FIX(start);
}