Class: Regexp
Overview
A Regexp
holds a regular expression, used to match a pattern against strings. Regexps are created using the /.../
and %r{...}
literals, and by the Regexp::new
constructor.
Constant Summary collapse
- IGNORECASE =
INT2FIX(RE_OPTION_IGNORECASE)
- EXTENDED =
INT2FIX(RE_OPTION_EXTENDED)
- MULTILINE =
INT2FIX(RE_OPTION_MULTILINE)
Class Method Summary collapse
-
.compile ⇒ Object
Synonym for
Regexp.new
. -
.escape ⇒ Object
Escapes any characters that would have special meaning in a regular expression.
-
.last_match ⇒ Object
The first form returns the
MatchData
object generated by the last successful pattern match. -
.quote ⇒ Object
Escapes any characters that would have special meaning in a regular expression.
-
.union ⇒ Object
Return a
Regexp
object that is the union of the given patterns, i.e., will match any of its parts.
Instance Method Summary collapse
-
#== ⇒ Object
Equality---Two regexps are equal if their patterns are identical, they have the same character set code, and their
casefold?
values are the same. -
#===(str) ⇒ Boolean
Case Equality---Synonym for
Regexp#=~
used in case statements. -
#match(str) ⇒ MatchData?
Returns a
MatchData
object describing the match, ornil
if there was no match. -
#casefold? ⇒ Boolean
Returns the value of the case-insensitive flag.
-
#eql? ⇒ Object
Equality---Two regexps are equal if their patterns are identical, they have the same character set code, and their
casefold?
values are the same. -
#hash ⇒ Fixnum
Produce a hash based on the text and options of this regular expression.
-
#initialize ⇒ Object
constructor
Constructs a new regular expression from pattern, which can be either a
String
or aRegexp
(in which case that regexp's options are propagated, and new options may not be specified (a change as of Ruby 1.8). If options is aFixnum
, it should be one or more of the constantsRegexp::EXTENDED
,Regexp::IGNORECASE
, andRegexp::MULTILINE
, or-ed together. Otherwise, if options is notnil
, the regexp will be case insensitive. The lang parameter enables multibyte support for the regexp: 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8. -
#initialize_copy ⇒ Object
:nodoc:.
-
#inspect ⇒ String
Produce a nicely formatted string-version of rxp.
-
#kcode ⇒ String
Returns the character set code for the regexp.
-
#match(str) ⇒ MatchData?
Returns a
MatchData
object describing the match, ornil
if there was no match. -
#options ⇒ Fixnum
Returns the set of bits corresponding to the options used when creating this Regexp (see
Regexp::new
for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed toRegexp::new
. -
#source ⇒ String
Returns the original string of the pattern.
-
#to_s ⇒ String
Returns a string containing the regular expression and its options (using the
(?xxx:yyy)
notation. This string can be fed back in toRegexp::new
to a regular expression with the same semantics as the original. (However,Regexp#==
may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows).Regexp#inspect
produces a generally more readable version of rxp. -
#~(rxp) ⇒ Integer?
Match---Matches rxp against the contents of
$_
.
Constructor Details
#new(string[, options [, lang]]) ⇒ Regexp #new(regexp) ⇒ Regexp #compile(string[, options [, lang]]) ⇒ Regexp #compile(regexp) ⇒ Regexp
Constructs a new regular expression from pattern, which can be either a String
or a Regexp
(in which case that regexp's options are propagated, and new options may not be specified (a change as of Ruby 1.8). If options is a Fixnum
, it should be one or more of the constants Regexp::EXTENDED
, Regexp::IGNORECASE
, and Regexp::MULTILINE
, or-ed together. Otherwise, if options is not nil
, the regexp will be case insensitive. The lang parameter enables multibyte support for the regexp: 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8.
r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
r2 = Regexp.new('cat', true) #=> /cat/i
r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x
r4 = Regexp.new(r2) #=> /cat/i
|
# File 're.c'
/*
* call-seq:
* Regexp.new(string [, options [, lang]]) => regexp
* Regexp.new(regexp) => regexp
* Regexp.compile(string [, options [, lang]]) => regexp
* Regexp.compile(regexp) => regexp
*
* Constructs a new regular expression from <i>pattern</i>, which can be either
* a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
* options are propagated, and new options may not be specified (a change as of
* Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
* more of the constants <code>Regexp::EXTENDED</code>,
* <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
* <em>or</em>-ed together. Otherwise, if <i>options</i> is not
* <code>nil</code>, the regexp will be case insensitive. The <i>lang</i>
* parameter enables multibyte support for the regexp: `n', `N' = none, `e',
* `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8.
*
* r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
* r2 = Regexp.new('cat', true) #=> /cat/i
* r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x
* r4 = Regexp.new(r2) #=> /cat/i
*/
static VALUE
rb_reg_initialize_m(argc, argv, self)
int argc;
VALUE *argv;
VALUE self;
{
const char *s;
long len;
int flags = 0;
if (argc == 0 || argc > 3) {
rb_raise(rb_eArgError, "wrong number of arguments");
}
if (TYPE(argv[0]) == T_REGEXP) {
if (argc > 1) {
rb_warn("flags%s ignored", (argc == 3) ? " and encoding": "");
}
rb_reg_check(argv[0]);
flags = RREGEXP(argv[0])->ptr->options & 0xf;
if (FL_TEST(argv[0], KCODE_FIXED)) {
switch (RBASIC(argv[0])->flags & KCODE_MASK) {
case KCODE_NONE:
flags |= 16;
break;
case KCODE_EUC:
flags |= 32;
break;
case KCODE_SJIS:
flags |= 48;
break;
case KCODE_UTF8:
flags |= 64;
break;
default:
break;
}
}
s = RREGEXP(argv[0])->str;
len = RREGEXP(argv[0])->len;
}
else {
if (argc >= 2) {
if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
else if (RTEST(argv[1])) flags = RE_OPTION_IGNORECASE;
}
if (argc == 3 && !NIL_P(argv[2])) {
char *kcode = StringValuePtr(argv[2]);
flags &= ~0x70;
switch (kcode[0]) {
case 'n': case 'N':
flags |= 16;
break;
case 'e': case 'E':
flags |= 32;
break;
case 's': case 'S':
flags |= 48;
break;
case 'u': case 'U':
flags |= 64;
break;
default:
break;
}
}
s = StringValuePtr(argv[0]);
len = RSTRING(argv[0])->len;
}
rb_reg_initialize(self, s, len, flags);
return self;
}
|
Class Method Details
.compile ⇒ Object
Synonym for Regexp.new
.escape(str) ⇒ String .quote(str) ⇒ String
Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.escape(str)=~str
will be true.
Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
|
# File 're.c'
/*
* call-seq:
* Regexp.escape(str) => a_str
* Regexp.quote(str) => a_str
*
* Escapes any characters that would have special meaning in a regular
* expression. Returns a new escaped string, or self if no characters are
* escaped. For any string,
* <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
*
* Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
*/
static VALUE
rb_reg_s_quote(argc, argv)
int argc;
VALUE *argv;
{
VALUE str, kcode;
int kcode_saved = reg_kcode;
rb_scan_args(argc, argv, "11", &str, &kcode);
if (!NIL_P(kcode)) {
rb_set_kcode(StringValuePtr(kcode));
curr_kcode = reg_kcode;
reg_kcode = kcode_saved;
}
StringValue(str);
str = rb_reg_quote(str);
rb_kcode_reset_option();
return str;
}
|
.last_match ⇒ MatchData .last_match(fixnum) ⇒ String
The first form returns the MatchData
object generated by the last successful pattern match. Equivalent to reading the global variable $~
. The second form returns the nth field in this MatchData
object.
/c(.)t/ =~ 'cat' #=> 0
Regexp.last_match #=> #<MatchData:0x401b3d30>
Regexp.last_match(0) #=> "cat"
Regexp.last_match(1) #=> "a"
Regexp.last_match(2) #=> nil
|
# File 're.c'
/*
* call-seq:
* Regexp.last_match => matchdata
* Regexp.last_match(fixnum) => str
*
* The first form returns the <code>MatchData</code> object generated by the
* last successful pattern match. Equivalent to reading the global variable
* <code>$~</code>. The second form returns the nth field in this
* <code>MatchData</code> object.
*
* /c(.)t/ =~ 'cat' #=> 0
* Regexp.last_match #=> #<MatchData:0x401b3d30>
* Regexp.last_match(0) #=> "cat"
* Regexp.last_match(1) #=> "a"
* Regexp.last_match(2) #=> nil
*/
static VALUE
rb_reg_s_last_match(argc, argv)
int argc;
VALUE *argv;
{
VALUE nth;
if (rb_scan_args(argc, argv, "01", &nth) == 1) {
return rb_reg_nth_match(NUM2INT(nth), rb_backref_get());
}
return match_getter();
}
|
.escape(str) ⇒ String .quote(str) ⇒ String
Escapes any characters that would have special meaning in a regular expression. Returns a new escaped string, or self if no characters are escaped. For any string, Regexp.escape(str)=~str
will be true.
Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
|
# File 're.c'
/*
* call-seq:
* Regexp.escape(str) => a_str
* Regexp.quote(str) => a_str
*
* Escapes any characters that would have special meaning in a regular
* expression. Returns a new escaped string, or self if no characters are
* escaped. For any string,
* <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
*
* Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
*/
static VALUE
rb_reg_s_quote(argc, argv)
int argc;
VALUE *argv;
{
VALUE str, kcode;
int kcode_saved = reg_kcode;
rb_scan_args(argc, argv, "11", &str, &kcode);
if (!NIL_P(kcode)) {
rb_set_kcode(StringValuePtr(kcode));
curr_kcode = reg_kcode;
reg_kcode = kcode_saved;
}
StringValue(str);
str = rb_reg_quote(str);
rb_kcode_reset_option();
return str;
}
|
.union(pat1, pat2, ...) ⇒ Regexp .union(pats_ary) ⇒ Regexp
Return a Regexp
object that is the union of the given patterns, i.e., will match any of its parts. The patterns can be Regexp objects, in which case their options will be preserved, or Strings. If no patterns are given, returns /(?!)/
.
Regexp.union #=> /(?!)/
Regexp.union("penzance") #=> /penzance/
Regexp.union("a+b*c") #=> /a\+b\*c/
Regexp.union("skiing", "sledding") #=> /skiing|sledding/
Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
|
# File 're.c'
/*
* call-seq:
* Regexp.union(pat1, pat2, ...) => new_regexp
* Regexp.union(pats_ary) => new_regexp
*
* Return a <code>Regexp</code> object that is the union of the given
* <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
* can be Regexp objects, in which case their options will be preserved, or
* Strings. If no patterns are given, returns <code>/(?!)/</code>.
*
* Regexp.union #=> /(?!)/
* Regexp.union("penzance") #=> /penzance/
* Regexp.union("a+b*c") #=> /a\+b\*c/
* Regexp.union("skiing", "sledding") #=> /skiing|sledding/
* Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
* Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
*/
static VALUE
rb_reg_s_union_m(VALUE self, VALUE args)
{
VALUE v;
if (RARRAY_LEN(args) == 1 &&
!NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
return rb_reg_s_union(self, v);
}
return rb_reg_s_union(self, args);
}
|
Instance Method Details
#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean
Equality---Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold?
values are the same.
/abc/ == /abc/x #=> false
/abc/ == /abc/i #=> false
/abc/u == /abc/n #=> false
|
# File 're.c'
/*
* call-seq:
* rxp == other_rxp => true or false
* rxp.eql?(other_rxp) => true or false
*
* Equality---Two regexps are equal if their patterns are identical, they have
* the same character set code, and their <code>casefold?</code> values are the
* same.
*
* /abc/ == /abc/x #=> false
* /abc/ == /abc/i #=> false
* /abc/u == /abc/n #=> false
*/
static VALUE
rb_reg_equal(re1, re2)
VALUE re1, re2;
{
if (re1 == re2) return Qtrue;
if (TYPE(re2) != T_REGEXP) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0 &&
rb_reg_cur_kcode(re1) == rb_reg_cur_kcode(re2) &&
RREGEXP(re1)->ptr->options == RREGEXP(re2)->ptr->options) {
return Qtrue;
}
return Qfalse;
}
|
#===(str) ⇒ Boolean
Case Equality---Synonym for Regexp#=~
used in case statements.
a = "HELLO"
case a
when /^[a-z]*$/; print "Lower case\n"
when /^[A-Z]*$/; print "Upper case\n"
else; print "Mixed case\n"
end
produces:
Upper case
|
# File 're.c'
/*
* call-seq:
* rxp === str => true or false
*
* Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
*
* a = "HELLO"
* case a
* when /^[a-z]*$/; print "Lower case\n"
* when /^[A-Z]*$/; print "Upper case\n"
* else; print "Mixed case\n"
* end
*
* <em>produces:</em>
*
* Upper case
*/
VALUE
rb_reg_eqq(re, str)
VALUE re, str;
{
long start;
if (TYPE(str) != T_STRING) {
str = rb_check_string_type(str);
if (NIL_P(str)) {
rb_backref_set(Qnil);
return Qfalse;
}
}
StringValue(str);
start = rb_reg_search(re, str, 0, 0);
if (start < 0) {
return Qfalse;
}
return Qtrue;
}
|
#match(str) ⇒ MatchData?
Returns a MatchData
object describing the match, or nil
if there was no match. This is equivalent to retrieving the value of the special variable $~
following a normal match.
/(.)(.)(.)/.match("abc")[2] #=> "b"
|
# File 're.c'
/*
* call-seq:
* rxp.match(str) => matchdata or nil
*
* Returns a <code>MatchData</code> object describing the match, or
* <code>nil</code> if there was no match. This is equivalent to retrieving the
* value of the special variable <code>$~</code> following a normal match.
*
* /(.)(.)(.)/.match("abc")[2] #=> "b"
*/
VALUE
rb_reg_match(re, str)
VALUE re, str;
{
long start;
if (NIL_P(str)) {
rb_backref_set(Qnil);
return Qnil;
}
StringValue(str);
start = rb_reg_search(re, str, 0, 0);
if (start < 0) {
return Qnil;
}
return LONG2FIX(start);
}
|
#casefold? ⇒ Boolean
Returns the value of the case-insensitive flag.
|
# File 're.c'
/*
* call-seq:
* rxp.casefold? => true or false
*
* Returns the value of the case-insensitive flag.
*/
static VALUE
rb_reg_casefold_p(re)
VALUE re;
{
rb_reg_check(re);
if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) return Qtrue;
return Qfalse;
}
|
#==(other_rxp) ⇒ Boolean #eql?(other_rxp) ⇒ Boolean
Equality---Two regexps are equal if their patterns are identical, they have the same character set code, and their casefold?
values are the same.
/abc/ == /abc/x #=> false
/abc/ == /abc/i #=> false
/abc/u == /abc/n #=> false
|
# File 're.c'
/*
* call-seq:
* rxp == other_rxp => true or false
* rxp.eql?(other_rxp) => true or false
*
* Equality---Two regexps are equal if their patterns are identical, they have
* the same character set code, and their <code>casefold?</code> values are the
* same.
*
* /abc/ == /abc/x #=> false
* /abc/ == /abc/i #=> false
* /abc/u == /abc/n #=> false
*/
static VALUE
rb_reg_equal(re1, re2)
VALUE re1, re2;
{
if (re1 == re2) return Qtrue;
if (TYPE(re2) != T_REGEXP) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0 &&
rb_reg_cur_kcode(re1) == rb_reg_cur_kcode(re2) &&
RREGEXP(re1)->ptr->options == RREGEXP(re2)->ptr->options) {
return Qtrue;
}
return Qfalse;
}
|
#hash ⇒ Fixnum
Produce a hash based on the text and options of this regular expression.
|
# File 're.c'
/*
* call-seq:
* rxp.hash => fixnum
*
* Produce a hash based on the text and options of this regular expression.
*/
static VALUE
rb_reg_hash(re)
VALUE re;
{
int hashval, len;
char *p;
rb_reg_check(re);
hashval = RREGEXP(re)->ptr->options;
len = RREGEXP(re)->len;
p = RREGEXP(re)->str;
while (len--) {
hashval = hashval * 33 + *p++;
}
hashval = hashval + (hashval>>5);
return INT2FIX(hashval);
}
|
#initialize_copy ⇒ Object
:nodoc:
|
# File 're.c'
/* :nodoc: */
static VALUE
rb_reg_init_copy(copy, re)
VALUE copy, re;
{
if (copy == re) return copy;
rb_check_frozen(copy);
/* need better argument type check */
if (!rb_obj_is_instance_of(re, rb_obj_class(copy))) {
rb_raise(rb_eTypeError, "wrong argument type");
}
rb_reg_check(re);
rb_reg_initialize(copy, RREGEXP(re)->str, RREGEXP(re)->len,
rb_reg_options(re));
return copy;
}
|
#inspect ⇒ String
Produce a nicely formatted string-version of rxp. Perhaps surprisingly, #inspect
actually produces the more natural version of the string than #to_s
.
/ab+c/ix.to_s #=> /ab+c/ix
|
# File 're.c'
/*
* call-seq:
* rxp.inspect => string
*
* Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
* <code>#inspect</code> actually produces the more natural version of
* the string than <code>#to_s</code>.
*
* /ab+c/ix.to_s #=> /ab+c/ix
*/
static VALUE
rb_reg_inspect(re)
VALUE re;
{
rb_reg_check(re);
return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
}
|
#kcode ⇒ String
Returns the character set code for the regexp.
|
# File 're.c'
/*
* call-seq:
* rxp.kcode => str
*
* Returns the character set code for the regexp.
*/
static VALUE
rb_reg_kcode_m(re)
VALUE re;
{
const char *kcode;
if (FL_TEST(re, KCODE_FIXED)) {
switch (RBASIC(re)->flags & KCODE_MASK) {
case KCODE_NONE:
kcode = "none"; break;
case KCODE_EUC:
kcode = "euc"; break;
case KCODE_SJIS:
kcode = "sjis"; break;
case KCODE_UTF8:
kcode = "utf8"; break;
default:
rb_bug("unknown kcode - should not happen");
break;
}
return rb_str_new2(kcode);
}
return Qnil;
}
|
#match(str) ⇒ MatchData?
Returns a MatchData
object describing the match, or nil
if there was no match. This is equivalent to retrieving the value of the special variable $~
following a normal match.
/(.)(.)(.)/.match("abc")[2] #=> "b"
|
# File 're.c'
/*
* call-seq:
* rxp.match(str) => matchdata or nil
*
* Returns a <code>MatchData</code> object describing the match, or
* <code>nil</code> if there was no match. This is equivalent to retrieving the
* value of the special variable <code>$~</code> following a normal match.
*
* /(.)(.)(.)/.match("abc")[2] #=> "b"
*/
static VALUE
rb_reg_match_m(re, str)
VALUE re, str;
{
VALUE result = rb_reg_match(re, str);
if (NIL_P(result)) return Qnil;
result = rb_backref_get();
rb_match_busy(result);
return result;
}
|
#options ⇒ Fixnum
Returns the set of bits corresponding to the options used when creating this Regexp (see Regexp::new
for details. Note that additional bits may be set in the returned options: these are used internally by the regular expression code. These extra bits are ignored if the options are passed to Regexp::new
.
Regexp::IGNORECASE #=> 1
Regexp::EXTENDED #=> 2
Regexp::MULTILINE #=> 4
/cat/.options #=> 128
/cat/ix.options #=> 131
Regexp.new('cat', true).options #=> 129
Regexp.new('cat', 0, 's').options #=> 384
r = /cat/ix
Regexp.new(r.source, r.options) #=> /cat/ix
|
# File 're.c'
/*
* call-seq:
* rxp.options => fixnum
*
* Returns the set of bits corresponding to the options used when creating this
* Regexp (see <code>Regexp::new</code> for details. Note that additional bits
* may be set in the returned options: these are used internally by the regular
* expression code. These extra bits are ignored if the options are passed to
* <code>Regexp::new</code>.
*
* Regexp::IGNORECASE #=> 1
* Regexp::EXTENDED #=> 2
* Regexp::MULTILINE #=> 4
*
* /cat/.options #=> 128
* /cat/ix.options #=> 131
* Regexp.new('cat', true).options #=> 129
* Regexp.new('cat', 0, 's').options #=> 384
*
* r = /cat/ix
* Regexp.new(r.source, r.options) #=> /cat/ix
*/
static VALUE
rb_reg_options_m(re)
VALUE re;
{
int options = rb_reg_options(re);
return INT2NUM(options);
}
|
#source ⇒ String
Returns the original string of the pattern.
/ab+c/ix.source #=> "ab+c"
|
# File 're.c'
/*
* call-seq:
* rxp.source => str
*
* Returns the original string of the pattern.
*
* /ab+c/ix.source #=> "ab+c"
*/
static VALUE
rb_reg_source(re)
VALUE re;
{
VALUE str;
rb_reg_check(re);
str = rb_str_new(RREGEXP(re)->str,RREGEXP(re)->len);
if (OBJ_TAINTED(re)) OBJ_TAINT(str);
return str;
}
|
#to_s ⇒ String
Returns a string containing the regular expression and its options (using the (?xxx:yyy)
notation. This string can be fed back in to Regexp::new
to a regular expression with the same semantics as the original. (However, Regexp#==
may not return true when comparing the two, as the source of the regular expression itself may differ, as the example shows). Regexp#inspect
produces a generally more readable version of rxp.
r1 = /ab+c/ix #=> /ab+c/ix
s1 = r1.to_s #=> "(?ix-m:ab+c)"
r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
r1 == r2 #=> false
r1.source #=> "ab+c"
r2.source #=> "(?ix-m:ab+c)"
|
# File 're.c'
/*
* call-seq:
* rxp.to_s => str
*
* Returns a string containing the regular expression and its options (using the
* <code>(?xxx:yyy)</code> notation. This string can be fed back in to
* <code>Regexp::new</code> to a regular expression with the same semantics as
* the original. (However, <code>Regexp#==</code> may not return true when
* comparing the two, as the source of the regular expression itself may
* differ, as the example shows). <code>Regexp#inspect</code> produces a
* generally more readable version of <i>rxp</i>.
*
* r1 = /ab+c/ix #=> /ab+c/ix
* s1 = r1.to_s #=> "(?ix-m:ab+c)"
* r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
* r1 == r2 #=> false
* r1.source #=> "ab+c"
* r2.source #=> "(?ix-m:ab+c)"
*/
static VALUE
rb_reg_to_s(re)
VALUE re;
{
int options;
const int embeddable = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED;
long len;
const char* ptr;
VALUE str = rb_str_buf_new2("(?");
rb_reg_check(re);
options = RREGEXP(re)->ptr->options;
ptr = RREGEXP(re)->str;
len = RREGEXP(re)->len;
again:
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
int err = 1;
ptr += 2;
if ((len -= 2) > 0) {
do {
if (*ptr == 'm') {
options |= RE_OPTION_MULTILINE;
}
else if (*ptr == 'i') {
options |= RE_OPTION_IGNORECASE;
}
else if (*ptr == 'x') {
options |= RE_OPTION_EXTENDED;
}
else break;
++ptr;
} while (--len > 0);
}
if (len > 1 && *ptr == '-') {
++ptr;
--len;
do {
if (*ptr == 'm') {
options &= ~RE_OPTION_MULTILINE;
}
else if (*ptr == 'i') {
options &= ~RE_OPTION_IGNORECASE;
}
else if (*ptr == 'x') {
options &= ~RE_OPTION_EXTENDED;
}
else break;
++ptr;
} while (--len > 0);
}
if (*ptr == ')') {
--len;
++ptr;
goto again;
}
if (*ptr == ':' && ptr[len-1] == ')') {
Regexp *rp;
rb_kcode_set_option(re);
rp = ALLOC(Regexp);
MEMZERO((char *)rp, Regexp, 1);
err = re_compile_pattern(++ptr, len -= 2, rp) != 0;
rb_kcode_reset_option();
re_free_pattern(rp);
}
if (err) {
options = RREGEXP(re)->ptr->options;
ptr = RREGEXP(re)->str;
len = RREGEXP(re)->len;
}
}
if (options & RE_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
if (options & RE_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
if (options & RE_OPTION_EXTENDED) rb_str_buf_cat2(str, "x");
if ((options & embeddable) != embeddable) {
rb_str_buf_cat2(str, "-");
if (!(options & RE_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
if (!(options & RE_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
if (!(options & RE_OPTION_EXTENDED)) rb_str_buf_cat2(str, "x");
}
rb_str_buf_cat2(str, ":");
rb_reg_expr_str(str, ptr, len);
rb_str_buf_cat2(str, ")");
OBJ_INFECT(str, re);
return str;
}
|
#~(rxp) ⇒ Integer?
Match---Matches rxp against the contents of $_
. Equivalent to rxp =~ $_
.
$_ = "input data"
~ /at/ #=> 7
|
# File 're.c'
/*
* call-seq:
* ~ rxp => integer or nil
*
* Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
* Equivalent to <code><i>rxp</i> =~ $_</code>.
*
* $_ = "input data"
* ~ /at/ #=> 7
*/
VALUE
rb_reg_match2(re)
VALUE re;
{
long start;
VALUE line = rb_lastline_get();
if (TYPE(line) != T_STRING) {
rb_backref_set(Qnil);
return Qnil;
}
start = rb_reg_search(re, line, 0, 0);
if (start < 0) {
return Qnil;
}
return LONG2FIX(start);
}
|