Class: Regexp
Overview
A Regexp
holds a regular expression, used to match a pattern against strings. Regexps are created using the /.../
and %r{...}
literals, and by the Regexp::new
constructor.
:include: doc/regexp.rdoc
Constant Summary collapse
- IGNORECASE =
see Regexp.options and Regexp.new
INT2FIX(ONIG_OPTION_IGNORECASE)
- EXTENDED =
see Regexp.options and Regexp.new
INT2FIX(ONIG_OPTION_EXTEND)
- MULTILINE =
see Regexp.options and Regexp.new
INT2FIX(ONIG_OPTION_MULTILINE)
- FIXEDENCODING =
see Regexp.options and Regexp.new
INT2FIX(ARG_ENCODING_FIXED)
- NOENCODING =
see Regexp.options and Regexp.new
INT2FIX(ARG_ENCODING_NONE)
Class Method Summary collapse
-
.compile ⇒ Object
Synonym for
Regexp.new
. -
.escape(str) ⇒ Object
Escapes any characters that would have special meaning in a regular expression.
-
.last_match(*args) ⇒ Object
The first form returns the MatchData object generated by the last successful pattern match.
-
.quote(str) ⇒ Object
Escapes any characters that would have special meaning in a regular expression.
-
.try_convert(obj) ⇒ nil
Try to convert obj into a Regexp, using to_regexp method.
-
.union(args) ⇒ Object
Return a
Regexp
object that is the union of the given patterns, i.e., will match any of its parts.
Instance Method Summary collapse
-
#==(re2) ⇒ Object
Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their
casefold?
values are the same. -
#===(str) ⇒ Boolean
Case Equality—Used in case statements.
-
#=~(str) ⇒ Integer?
Match—Matches rxp against str.
-
#casefold? ⇒ Boolean
Returns the value of the case-insensitive flag.
-
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
-
#eql?(re2) ⇒ Boolean
Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their
casefold?
values are the same. -
#fixed_encoding? ⇒ Boolean
Returns false if rxp is applicable to a string with any ASCII compatible encoding.
-
#hash ⇒ Fixnum
Produce a hash based on the text and options of this regular expression.
-
#initialize(*args) ⇒ Object
constructor
Constructs a new regular expression from
pattern
, which can be either a String or a Regexp (in which case that regexp’s options are propagated), and new options may not be specified (a change as of Ruby 1.8). -
#initialize_copy(re) ⇒ Object
:nodoc:.
-
#inspect ⇒ String
Produce a nicely formatted string-version of rxp.
-
#match(*args) ⇒ Object
Returns a
MatchData
object describing the match, ornil
if there was no match. -
#named_captures ⇒ Hash
Returns a hash representing information about named captures of rxp.
-
#names ⇒ Array
Returns a list of names of captures as an array of strings.
-
#options ⇒ Fixnum
Returns the set of bits corresponding to the options used when creating this Regexp (see
Regexp::new
for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed toRegexp::new
. -
#source ⇒ String
Returns the original string of the pattern.
-
#to_s ⇒ String
Returns a string containing the regular expression and its options (using the
(?opts:source)
notation. This string can be fed back in toRegexp::new
to a regular expression with the same semantics as the original. (However,Regexp#==
may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows).Regexp#inspect
produces a generally more readable version of rxp. -
#~(rxp) ⇒ Integer?
Match—Matches rxp against the contents of
$_
.
Constructor Details
#new(string, [options [, kcode]]) ⇒ Regexp #new(regexp) ⇒ Regexp #compile(string, [options [, kcode]]) ⇒ Regexp #compile(regexp) ⇒ Regexp
Constructs a new regular expression from pattern
, which can be either a String or a Regexp (in which case that regexp’s options are propagated), and new options may not be specified (a change as of Ruby 1.8).
If options
is a Fixnum, it should be one or more of the constants Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE, or-ed together. Otherwise, if options
is not nil
or false
, the regexp will be case insensitive.
When the kcode
parameter is ‘n’ or ‘N’ sets the regexp no encoding. It means that the regexp is for binary strings.
r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
r2 = Regexp.new('cat', true) #=> /cat/i
r3 = Regexp.new(r2) #=> /cat/i
r4 = Regexp.new('dog', Regexp::EXTENDED | Regexp::IGNORECASE) #=> /dog/ix
2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 |
# File 're.c', line 2999
static VALUE
rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
{
onig_errmsg_buffer err = "";
int flags = 0;
VALUE str;
rb_encoding *enc;
const char *ptr;
long len;
rb_check_arity(argc, 1, 3);
if (RB_TYPE_P(argv[0], T_REGEXP)) {
VALUE re = argv[0];
if (argc > 1) {
rb_warn("flags ignored");
}
rb_reg_check(re);
flags = rb_reg_options(re);
ptr = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
enc = rb_enc_get(re);
if (rb_reg_initialize(self, ptr, len, enc, flags, err, NULL, 0)) {
str = rb_enc_str_new(ptr, len, enc);
rb_reg_raise_str(str, flags, err);
}
}
else {
if (argc >= 2) {
if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
}
enc = 0;
if (argc == 3 && !NIL_P(argv[2])) {
char *kcode = StringValuePtr(argv[2]);
if (kcode[0] == 'n' || kcode[0] == 'N') {
enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE;
}
else {
rb_warn("encoding option is ignored - %s", kcode);
}
}
str = argv[0];
ptr = StringValuePtr(str);
if (enc
? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
: rb_reg_initialize_str(self, str, flags, err, NULL, 0)) {
rb_reg_raise_str(str, flags, err);
}
}
return self;
}
|
Class Method Details
.compile ⇒ Object
Synonym for Regexp.new
.escape(str) ⇒ String .quote(str) ⇒ String
Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.new(Regexp.escape(str))=~str
will be true.
Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
3165 3166 3167 3168 3169 |
# File 're.c', line 3165
static VALUE
rb_reg_s_quote(VALUE c, VALUE str)
{
return rb_reg_quote(reg_operand(str, TRUE));
}
|
.last_match ⇒ MatchData .last_match(n) ⇒ String
The first form returns the MatchData object generated by the last successful pattern match. Equivalent to reading the special global variable $~
(see Special global variables in Regexp for details).
The second form returns the nth field in this MatchData object. n can be a string or symbol to reference a named capture.
Note that the last_match is local to the thread and method scope of the method that did the pattern match.
/c(.)t/ =~ 'cat' #=> 0
Regexp.last_match #=> #<MatchData "cat" 1:"a">
Regexp.last_match(0) #=> "cat"
Regexp.last_match(1) #=> "a"
Regexp.last_match(2) #=> nil
/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
Regexp.last_match(:lhs) #=> "var"
Regexp.last_match(:rhs) #=> "val"
3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 |
# File 're.c', line 3574
static VALUE
rb_reg_s_last_match(int argc, VALUE *argv)
{
VALUE nth;
if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) {
VALUE match = rb_backref_get();
int n;
if (NIL_P(match)) return Qnil;
n = match_backref_number(match, nth);
return rb_reg_nth_match(n, match);
}
return match_getter();
}
|
.escape(str) ⇒ String .quote(str) ⇒ String
3165 3166 3167 3168 3169 |
# File 're.c', line 3165
static VALUE
rb_reg_s_quote(VALUE c, VALUE str)
{
return rb_reg_quote(reg_operand(str, TRUE));
}
|
.try_convert(obj) ⇒ nil
Try to convert obj into a Regexp, using to_regexp method. Returns converted regexp or nil if obj cannot be converted for any reason.
Regexp.try_convert(/re/) #=> /re/
Regexp.try_convert("re") #=> nil
o = Object.new
Regexp.try_convert(o) #=> nil
def o.to_regexp() /foo/ end
Regexp.try_convert(o) #=> /foo/
3206 3207 3208 3209 3210 |
# File 're.c', line 3206
static VALUE
rb_reg_s_try_convert(VALUE dummy, VALUE re)
{
return rb_check_regexp_type(re);
}
|
.union(pat1, pat2, ...) ⇒ Regexp .union(pats_ary) ⇒ Regexp
Return a Regexp
object that is the union of the given patterns, i.e., will match any of its parts. The patterns can be Regexp objects, in which case their options will be preserved, or Strings. If no patterns are given, returns /(?!)/
. The behavior is unspecified if any given pattern contains capture.
Regexp.union #=> /(?!)/
Regexp.union("penzance") #=> /penzance/
Regexp.union("a+b*c") #=> /a\+b\*c/
Regexp.union("skiing", "sledding") #=> /skiing|sledding/
Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
Note: the arguments for ::union will try to be converted into a regular expression literal via #to_regexp.
3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 |
# File 're.c', line 3347
static VALUE
rb_reg_s_union_m(VALUE self, VALUE args)
{
VALUE v;
if (RARRAY_LEN(args) == 1 &&
!NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
return rb_reg_s_union(self, v);
}
return rb_reg_s_union(self, args);
}
|
Instance Method Details
#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean
Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold?
values are the same.
/abc/ == /abc/x #=> false
/abc/ == /abc/i #=> false
/abc/ == /abc/u #=> false
/abc/u == /abc/n #=> false
2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 |
# File 're.c', line 2686
static VALUE
rb_reg_equal(VALUE re1, VALUE re2)
{
if (re1 == re2) return Qtrue;
if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
}
|
#===(str) ⇒ Boolean
Case Equality—Used in case statements.
a = "HELLO"
case a
when /^[a-z]*$/; print "Lower case\n"
when /^[A-Z]*$/; print "Upper case\n"
else; print "Mixed case\n"
end
#=> "Upper case"
Following a regular expression literal with the #=== operator allows you to compare against a String.
/^[a-z]*$/ === “HELLO” #=> false /^[A-Z]*$/ === “HELLO” #=> true
2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 |
# File 're.c', line 2864
VALUE
rb_reg_eqq(VALUE re, VALUE str)
{
long start;
str = reg_operand(str, FALSE);
if (NIL_P(str)) {
rb_backref_set(Qnil);
return Qfalse;
}
start = rb_reg_search(re, str, 0, 0);
if (start < 0) {
return Qfalse;
}
return Qtrue;
}
|
#=~(str) ⇒ Integer?
Match—Matches rxp against str.
/at/ =~ "input data" #=> 7
/ax/ =~ "input data" #=> nil
If =~
is used with a regexp literal with named captures, captured strings (or nil) is assigned to local variables named by the capture names.
/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
p lhs #=> "x"
p rhs #=> "y"
If it is not matched, nil is assigned for the variables.
/(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
p lhs #=> nil
p rhs #=> nil
This assignment is implemented in the Ruby parser. The parser detects ‘regexp-literal =~ expression’ for the assignment. The regexp must be a literal without interpolation and placed at left hand side.
The assignment does not occur if the regexp is not a literal.
re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
re =~ " x = y "
p lhs # undefined local variable
p rhs # undefined local variable
A regexp interpolation, #{}
, also disables the assignment.
rhs_pat = /(?<rhs>\w+)/
/(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
p lhs # undefined local variable
The assignment does not occur if the regexp is placed at the right hand side.
" x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
p lhs, rhs # undefined local variable
2834 2835 2836 2837 2838 2839 2840 2841 |
# File 're.c', line 2834
VALUE
rb_reg_match(VALUE re, VALUE str)
{
long pos = reg_match_pos(re, &str, 0);
if (pos < 0) return Qnil;
pos = rb_str_sublen(str, pos);
return LONG2FIX(pos);
}
|
#casefold? ⇒ Boolean
Returns the value of the case-insensitive flag.
/a/.casefold? #=> false
/a/i.casefold? #=> true
/(?i:a)/.casefold? #=> false
684 685 686 687 688 689 690 |
# File 're.c', line 684
static VALUE
rb_reg_casefold_p(VALUE re)
{
rb_reg_check(re);
if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
return Qfalse;
}
|
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
935 936 937 938 939 940 941 942 943 |
# File 'encoding.c', line 935
VALUE
rb_obj_encoding(VALUE obj)
{
int idx = rb_enc_get_index(obj);
if (idx < 0) {
rb_raise(rb_eTypeError, "unknown encoding");
}
return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}
|
#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean
Equality—Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold?
values are the same.
/abc/ == /abc/x #=> false
/abc/ == /abc/i #=> false
/abc/ == /abc/u #=> false
/abc/u == /abc/n #=> false
2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 |
# File 're.c', line 2686
static VALUE
rb_reg_equal(VALUE re1, VALUE re2)
{
if (re1 == re2) return Qtrue;
if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
}
|
#fixed_encoding? ⇒ Boolean
Returns false if rxp is applicable to a string with any ASCII compatible encoding. Returns true otherwise.
r = /a/
r.fixed_encoding? #=> false
r =~ "\u{6666} a" #=> 2
r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2
r =~ "abc".force_encoding("euc-jp") #=> 0
r = /a/u
r.fixed_encoding? #=> true
r.encoding #=> #<Encoding:UTF-8>
r =~ "\u{6666} a" #=> 2
r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError
r =~ "abc".force_encoding("euc-jp") #=> 0
r = /\u{6666}/
r.fixed_encoding? #=> true
r.encoding #=> #<Encoding:UTF-8>
r =~ "\u{6666} a" #=> 0
r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError
r =~ "abc".force_encoding("euc-jp") #=> nil
1291 1292 1293 1294 1295 1296 1297 1298 |
# File 're.c', line 1291
static VALUE
rb_reg_fixed_encoding_p(VALUE re)
{
if (FL_TEST(re, KCODE_FIXED))
return Qtrue;
else
return Qfalse;
}
|
#hash ⇒ Fixnum
Produce a hash based on the text and options of this regular expression.
See also Object#hash.
2652 2653 2654 2655 2656 2657 |
# File 're.c', line 2652
static VALUE
rb_reg_hash(VALUE re)
{
st_index_t hashval = reg_hash(re);
return LONG2FIX(hashval);
}
|
#initialize_copy(re) ⇒ Object
:nodoc:
3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 |
# File 're.c', line 3359
static VALUE
rb_reg_init_copy(VALUE copy, VALUE re)
{
onig_errmsg_buffer err = "";
const char *s;
long len;
if (!OBJ_INIT_COPY(copy, re)) return copy;
rb_reg_check(re);
s = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re),
err, NULL, 0) != 0) {
rb_reg_raise(s, len, err, re);
}
return copy;
}
|
#inspect ⇒ String
Produce a nicely formatted string-version of rxp. Perhaps surprisingly, #inspect
actually produces the more natural version of the string than #to_s
.
/ab+c/ix.inspect #=> "/ab+c/ix"
488 489 490 491 492 493 494 495 |
# File 're.c', line 488
static VALUE
rb_reg_inspect(VALUE re)
{
if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
return rb_any_to_s(re);
}
return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
}
|
#match(str) ⇒ MatchData? #match(str, pos) ⇒ MatchData?
Returns a MatchData
object describing the match, or nil
if there was no match. This is equivalent to retrieving the value of the special variable $~
following a normal match. If the second parameter is present, it specifies the position in the string to begin the search.
/(.)(.)(.)/.match("abc")[2] #=> "b"
/(.)(.)/.match("abc", 1)[2] #=> "c"
If a block is given, invoke the block with MatchData if match succeed, so that you can write
pat.match(str) {|m| ...}
instead of
if m = pat.match(str)
...
end
The return value is a value from block execution in this case.
2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 |
# File 're.c', line 2941
static VALUE
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
{
VALUE result, str, initpos;
long pos;
if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
pos = NUM2LONG(initpos);
}
else {
pos = 0;
}
pos = reg_match_pos(re, &str, pos);
if (pos < 0) {
rb_backref_set(Qnil);
return Qnil;
}
result = rb_backref_get();
rb_match_busy(result);
if (!NIL_P(result) && rb_block_given_p()) {
return rb_yield(result);
}
return result;
}
|
#named_captures ⇒ Hash
Returns a hash representing information about named captures of rxp.
A key of the hash is a name of the named captures. A value of the hash is an array which is list of indexes of corresponding named captures.
/(?<foo>.)(?<bar>.)/.named_captures
#=> {"foo"=>[1], "bar"=>[2]}
/(?<foo>.)(?<foo>.)/.named_captures
#=> {"foo"=>[1, 2]}
If there are no named captures, an empty hash is returned.
/(.)(.)/.named_captures
#=> {}
795 796 797 798 799 800 801 802 |
# File 're.c', line 795
static VALUE
rb_reg_named_captures(VALUE re)
{
VALUE hash = rb_hash_new();
rb_reg_check(re);
onig_foreach_name(RREGEXP(re)->ptr, reg_named_captures_iter, (void*)hash);
return hash;
}
|
#names ⇒ Array
Returns a list of names of captures as an array of strings.
/(?<foo>.)(?<bar>.)(?<baz>.)/.names
#=> ["foo", "bar", "baz"]
/(?<foo>.)(?<foo>.)/.names
#=> ["foo"]
/(.)(.)/.names
#=> []
748 749 750 751 752 753 754 755 |
# File 're.c', line 748
static VALUE
rb_reg_names(VALUE re)
{
VALUE ary = rb_ary_new();
rb_reg_check(re);
onig_foreach_name(RREGEXP(re)->ptr, reg_names_iter, (void*)ary);
return ary;
}
|
#options ⇒ Fixnum
Returns the set of bits corresponding to the options used when creating this Regexp (see Regexp::new
for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed to Regexp::new
.
Regexp::IGNORECASE #=> 1
Regexp::EXTENDED #=> 2
Regexp::MULTILINE #=> 4
/cat/. #=> 0
/cat/ix. #=> 3
Regexp.new('cat', true). #=> 1
/\xa1\xa2/e. #=> 16
r = /cat/ix
Regexp.new(r.source, r.) #=> /cat/ix
716 717 718 719 720 721 |
# File 're.c', line 716
static VALUE
rb_reg_options_m(VALUE re)
{
int options = rb_reg_options(re);
return INT2NUM(options);
}
|
#source ⇒ String
Returns the original string of the pattern.
/ab+c/ix.source #=> "ab+c"
Note that escape sequences are retained as is.
/\x20\+/.source #=> "\\x20\\+"
465 466 467 468 469 470 471 472 473 474 |
# File 're.c', line 465
static VALUE
rb_reg_source(VALUE re)
{
VALUE str;
rb_reg_check(re);
str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re));
if (OBJ_TAINTED(re)) OBJ_TAINT(str);
return str;
}
|
#to_s ⇒ String
Returns a string containing the regular expression and its options (using the (?opts:source)
notation. This string can be fed back in to Regexp::new
to a regular expression with the same semantics as the original. (However, Regexp#==
may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows). Regexp#inspect
produces a generally more readable version of rxp.
r1 = /ab+c/ix #=> /ab+c/ix
s1 = r1.to_s #=> "(?ix-m:ab+c)"
r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
r1 == r2 #=> false
r1.source #=> "ab+c"
r2.source #=> "(?ix-m:ab+c)"
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 |
# File 're.c', line 518
static VALUE
rb_reg_to_s(VALUE re)
{
int options, opt;
const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
long len;
const UChar* ptr;
VALUE str = rb_str_buf_new2("(?");
char optbuf[5];
rb_encoding *enc = rb_enc_get(re);
rb_reg_check(re);
rb_enc_copy(str, re);
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
again:
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
int err = 1;
ptr += 2;
if ((len -= 2) > 0) {
do {
opt = char_to_option((int )*ptr);
if (opt != 0) {
options |= opt;
}
else {
break;
}
++ptr;
} while (--len > 0);
}
if (len > 1 && *ptr == '-') {
++ptr;
--len;
do {
opt = char_to_option((int )*ptr);
if (opt != 0) {
options &= ~opt;
}
else {
break;
}
++ptr;
} while (--len > 0);
}
if (*ptr == ')') {
--len;
++ptr;
goto again;
}
if (*ptr == ':' && ptr[len-1] == ')') {
Regexp *rp;
VALUE verbose = ruby_verbose;
ruby_verbose = Qfalse;
++ptr;
len -= 2;
err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
enc, OnigDefaultSyntax, NULL);
onig_free(rp);
ruby_verbose = verbose;
}
if (err) {
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
}
}
if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);
if ((options & embeddable) != embeddable) {
optbuf[0] = '-';
option_to_str(optbuf + 1, ~options);
rb_str_buf_cat2(str, optbuf);
}
rb_str_buf_cat2(str, ":");
if (rb_enc_asciicompat(enc)) {
rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
rb_str_buf_cat2(str, ")");
}
else {
const char *s, *e;
char *paren;
ptrdiff_t n;
rb_str_buf_cat2(str, ")");
rb_enc_associate(str, rb_usascii_encoding());
str = rb_str_encode(str, rb_enc_from_encoding(enc), 0, Qnil);
/* backup encoded ")" to paren */
s = RSTRING_PTR(str);
e = RSTRING_END(str);
s = rb_enc_left_char_head(s, e-1, e, enc);
n = e - s;
paren = ALLOCA_N(char, n);
memcpy(paren, s, n);
rb_str_resize(str, RSTRING_LEN(str) - n);
rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
rb_str_buf_cat(str, paren, n);
}
rb_enc_copy(str, re);
OBJ_INFECT(str, re);
return str;
}
|
#~(rxp) ⇒ Integer?
Match—Matches rxp against the contents of $_
. Equivalent to rxp =~ $_
.
$_ = "input data"
~ /at/ #=> 7
2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 |
# File 're.c', line 2893
VALUE
rb_reg_match2(VALUE re)
{
long start;
VALUE line = rb_lastline_get();
if (!RB_TYPE_P(line, T_STRING)) {
rb_backref_set(Qnil);
return Qnil;
}
start = rb_reg_search(re, line, 0, 0);
if (start < 0) {
return Qnil;
}
start = rb_str_sublen(line, start);
return LONG2FIX(start);
}
|