Class: Ripper

Inherits:
Object show all
Defined in:
lib/ripper.rb,
lib/ripper/core.rb,
lib/ripper/sexp.rb,
lib/ripper/lexer.rb,
lib/ripper/filter.rb,
ripper.c

Overview

Ripper is a Ruby script parser.

You can get information from the parser with event-based style. Information such as abstract syntax trees or simple lexical analysis of the Ruby program.

Usage

Ripper provides an easy interface for parsing your program into a symbolic expression tree (or S-expression).

Understanding the output of the parser may come as a challenge, it’s recommended you use PP to format the output for legibility.

require 'ripper'
require 'pp'

pp Ripper.sexp('def hello(world) "Hello, #{world}!"; end')
  #=> [:program,
       [[:def,
         [:@ident, "hello", [1, 4]],
         [:paren,
          [:params, [[:@ident, "world", [1, 10]]], nil, nil, nil, nil, nil, nil]],
         [:bodystmt,
          [[:string_literal,
            [:string_content,
             [:@tstring_content, "Hello, ", [1, 18]],
             [:string_embexpr, [[:var_ref, [:@ident, "world", [1, 27]]]]],
             [:@tstring_content, "!", [1, 33]]]]],
          nil,
          nil,
          nil]]]]

You can see in the example above, the expression starts with :program.

From here, a method definition at :def, followed by the method’s identifier :@ident. After the method’s identifier comes the parentheses :paren and the method parameters under :params.

Next is the method body, starting at :bodystmt (stmt meaning statement), which contains the full definition of the method.

In our case, we’re simply returning a String, so next we have the :string_literal expression.

Within our :string_literal you’ll notice two @tstring_content, this is the literal part for Hello, and !. Between the two @tstring_content statements is a :string_embexpr, where embexpr is an embedded expression. Our expression consists of a local variable, or var_ref, with the identifier (@ident) of world.

Resources

Requirements

  • ruby 1.9 (support CVS HEAD only)

  • bison 1.28 or later (Other yaccs do not work)

License

Ruby License.

                                              Minero Aoki
                                      aamine@loveruby.net
                                    http://i.loveruby.net

Direct Known Subclasses

Lexer, SexpBuilder

Defined Under Namespace

Classes: Filter, Lexer, SexpBuilder, SexpBuilderPP, TokenPattern

Constant Summary collapse

PARSER_EVENTS =

This array contains name of parser events.

PARSER_EVENT_TABLE.keys
SCANNER_EVENTS =

This array contains name of scanner events.

SCANNER_EVENT_TABLE.keys
EVENTS =

This array contains name of all ripper events.

PARSER_EVENTS + SCANNER_EVENTS
Version =

version of Ripper

rb_usascii_str_new2(RIPPER_VERSION)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(src, filename = "(ripper)", lineno = 1) ⇒ Object

Create a new Ripper object. src must be a String, an IO, or an Object which has #gets method.

This method does not starts parsing. See also Ripper#parse and Ripper.parse.



17987
17988
17989
17990
17991
17992
17993
17994
17995
17996
17997
17998
17999
18000
18001
18002
18003
18004
18005
18006
18007
18008
18009
18010
18011
18012
18013
18014
18015
18016
18017
18018
18019
# File 'ripper.c', line 17987

static VALUE
ripper_initialize(int argc, VALUE *argv, VALUE self)
{
    struct parser_params *parser;
    VALUE src, fname, lineno;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
    if (RB_TYPE_P(src, T_FILE)) {
        lex_gets = ripper_lex_get_generic;
    }
    else {
        StringValue(src);
        lex_gets = lex_get_str;
    }
    lex_input = src;
    parser->eofp = 0;
    if (NIL_P(fname)) {
        fname = STR_NEW2("(ripper)");
	OBJ_FREEZE(fname);
    }
    else {
        StringValue(fname);
	fname = rb_str_new_frozen(fname);
    }
    parser_initialize(parser);

    ruby_sourcefile_string = fname;
    ruby_sourcefile = RSTRING_PTR(fname);
    ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1;

    return Qnil;
}

Class Method Details

.dedent_string(input, width) ⇒ Object



13345
13346
13347
13348
13349
13350
13351
13352
13353
13354
13355
13356
13357
13358
13359
13360
# File 'ripper.c', line 13345

static VALUE
parser_dedent_string(VALUE self, VALUE input, VALUE width)
{
    char *str;
    long len;
    int wid, col;

    StringValue(input);
    wid = NUM2UINT(width);
    rb_str_modify(input);
    RSTRING_GETMEM(input, str, len);
    col = dedent_pos(str, len, wid);
    MEMMOVE(str, str + col, char, len - col);
    rb_str_set_len(input, len - col);
    return INT2NUM(col);
}

.lex(src, filename = '-', lineno = 1) ⇒ Object

Tokenizes the Ruby program and returns an array of an array, which is formatted like [[lineno, column], type, token].

require 'ripper'
require 'pp'

pp Ripper.lex("def m(a) nil end")
  #=> [[[1,  0], :on_kw,     "def"],
       [[1,  3], :on_sp,     " "  ],
       [[1,  4], :on_ident,  "m"  ],
       [[1,  5], :on_lparen, "("  ],
       [[1,  6], :on_ident,  "a"  ],
       [[1,  7], :on_rparen, ")"  ],
       [[1,  8], :on_sp,     " "  ],
       [[1,  9], :on_kw,     "nil"],
       [[1, 12], :on_sp,     " "  ],
       [[1, 13], :on_kw,     "end"]]


43
44
45
# File 'lib/ripper/lexer.rb', line 43

def Ripper.lex(src, filename = '-', lineno = 1)
  Lexer.new(src, filename, lineno).lex
end

.parse(src, filename = '(ripper)', lineno = 1) ⇒ Object

Parses the given Ruby program read from src. src must be a String or an IO or a object with a #gets method.



18
19
20
# File 'lib/ripper/core.rb', line 18

def Ripper.parse(src, filename = '(ripper)', lineno = 1)
  new(src, filename, lineno).parse
end

.sexp(src, filename = '-', lineno = 1) ⇒ Object

EXPERIMENTAL

Parses src and create S-exp tree. Returns more readable tree rather than Ripper.sexp_raw. This method is mainly for developer use.

require 'ripper'
require 'pp'

pp Ripper.sexp("def m(a) nil end")
  #=> [:program,
       [[:def,
        [:@ident, "m", [1, 4]],
        [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil]],
        [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]


31
32
33
34
35
# File 'lib/ripper/sexp.rb', line 31

def Ripper.sexp(src, filename = '-', lineno = 1)
  builder = SexpBuilderPP.new(src, filename, lineno)
  sexp = builder.parse
  sexp unless builder.error?
end

.sexp_raw(src, filename = '-', lineno = 1) ⇒ Object

EXPERIMENTAL

Parses src and create S-exp tree. This method is mainly for developer use.

require 'ripper'
require 'pp'

pp Ripper.sexp_raw("def m(a) nil end")
  #=> [:program,
       [:stmts_add,
        [:stmts_new],
        [:def,
         [:@ident, "m", [1, 4]],
         [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]],
         [:bodystmt,
          [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]],
          nil,
          nil,
          nil]]]]


57
58
59
60
61
# File 'lib/ripper/sexp.rb', line 57

def Ripper.sexp_raw(src, filename = '-', lineno = 1)
  builder = SexpBuilder.new(src, filename, lineno)
  sexp = builder.parse
  sexp unless builder.error?
end

.slice(src, pattern, n = 0) ⇒ Object

EXPERIMENTAL

Parses src and return a string which was matched to pattern. pattern should be described as Regexp.

require 'ripper'

p Ripper.slice('def m(a) nil end', 'ident')                   #=> "m"
p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+')  #=> "m(a)"
p Ripper.slice("<<EOS\nstring\nEOS",
               'heredoc_beg nl $(tstring_content*) heredoc_end', 1)
    #=> "string\n"


113
114
115
116
117
118
# File 'lib/ripper/lexer.rb', line 113

def Ripper.slice(src, pattern, n = 0)
  if m = token_match(src, pattern)
  then m.string(n)
  else nil
  end
end

.token_match(src, pattern) ⇒ Object

:nodoc:



120
121
122
# File 'lib/ripper/lexer.rb', line 120

def Ripper.token_match(src, pattern)   #:nodoc:
  TokenPattern.compile(pattern).match(src)
end

.tokenize(src, filename = '-', lineno = 1) ⇒ Object

Tokenizes the Ruby program and returns an array of strings.

p Ripper.tokenize("def m(a) nil end")
   # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]


21
22
23
# File 'lib/ripper/lexer.rb', line 21

def Ripper.tokenize(src, filename = '-', lineno = 1)
  Lexer.new(src, filename, lineno).tokenize
end

Instance Method Details

#columnInteger

Return column number of current parsing line. This number starts from 0.

Returns:

  • (Integer)


18082
18083
18084
18085
18086
18087
18088
18089
18090
18091
18092
18093
18094
18095
# File 'ripper.c', line 18082

static VALUE
ripper_column(VALUE self)
{
    struct parser_params *parser;
    long col;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (NIL_P(parser->parsing_thread)) return Qnil;
    col = parser->tokp - lex_pbeg;
    return LONG2NUM(col);
}

#encodingEncoding

Return encoding of the source.

Returns:

  • (Encoding)


17605
17606
17607
17608
17609
17610
17611
17612
# File 'ripper.c', line 17605

VALUE
rb_parser_encoding(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return rb_enc_from_encoding(current_enc);
}

#end_seen?Boolean

Return true if parsed source ended by _END_.

Returns:

  • (Boolean)

Returns:

  • (Boolean)


17590
17591
17592
17593
17594
17595
17596
17597
# File 'ripper.c', line 17590

VALUE
rb_parser_end_seen_p(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return ruby__end__seen ? Qtrue : Qfalse;
}

#error?Boolean

Return true if parsed source has errors.

Returns:

  • (Boolean)

Returns:

  • (Boolean)


17574
17575
17576
17577
17578
17579
17580
17581
# File 'ripper.c', line 17574

static VALUE
ripper_error_p(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return parser->error_p ? Qtrue : Qfalse;
}

#filenameString

Return current parsing filename.

Returns:

  • (String)


18103
18104
18105
18106
18107
18108
18109
18110
18111
18112
18113
# File 'ripper.c', line 18103

static VALUE
ripper_filename(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    return ruby_sourcefile_string;
}

#linenoInteger

Return line number of current parsing line. This number starts from 1.

Returns:

  • (Integer)


18122
18123
18124
18125
18126
18127
18128
18129
18130
18131
18132
18133
# File 'ripper.c', line 18122

static VALUE
ripper_lineno(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (NIL_P(parser->parsing_thread)) return Qnil;
    return INT2NUM(ruby_sourceline);
}

#parseObject

Start parsing and returns the value of the root action.



18054
18055
18056
18057
18058
18059
18060
18061
18062
18063
18064
18065
18066
18067
18068
18069
18070
18071
18072
18073
# File 'ripper.c', line 18054

static VALUE
ripper_parse(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (!NIL_P(parser->parsing_thread)) {
        if (parser->parsing_thread == rb_thread_current())
            rb_raise(rb_eArgError, "Ripper#parse is not reentrant");
        else
            rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe");
    }
    parser->parsing_thread = rb_thread_current();
    rb_ensure(ripper_parse0, self, ripper_ensure, self);

    return parser->result;
}

#yydebugBoolean

Get yydebug.

Returns:

  • (Boolean)


17620
17621
17622
17623
17624
17625
17626
17627
# File 'ripper.c', line 17620

VALUE
rb_parser_get_yydebug(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    return yydebug ? Qtrue : Qfalse;
}

#yydebug=(flag) ⇒ Object

Set yydebug.



17635
17636
17637
17638
17639
17640
17641
17642
17643
# File 'ripper.c', line 17635

VALUE
rb_parser_set_yydebug(VALUE self, VALUE flag)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    yydebug = RTEST(flag);
    return flag;
}