Class: Ripper

Inherits:
Object show all
Defined in:
lib/ripper.rb,
lib/ripper/core.rb,
lib/ripper/sexp.rb,
lib/ripper/lexer.rb,
lib/ripper/filter.rb,
ripper.c

Overview

Ripper is a Ruby script parser.

You can get information from the parser with event-based style. Information such as abstract syntax trees or simple lexical analysis of the Ruby program.

Usage

Ripper provides an easy interface for parsing your program into a symbolic expression tree (or S-expression).

Understanding the output of the parser may come as a challenge, it's recommended you use PP to format the output for legibility.

require 'ripper'
require 'pp'

pp Ripper.sexp('def hello(world) "Hello, #{world}!"; end')
  #=> [:program,
       [[:def,
         [:@ident, "hello", [1, 4]],
         [:paren,
          [:params, [[:@ident, "world", [1, 10]]], nil, nil, nil, nil, nil, nil]],
         [:bodystmt,
          [[:string_literal,
            [:string_content,
             [:@tstring_content, "Hello, ", [1, 18]],
             [:string_embexpr, [[:var_ref, [:@ident, "world", [1, 27]]]]],
             [:@tstring_content, "!", [1, 33]]]]],
          nil,
          nil,
          nil]]]]

You can see in the example above, the expression starts with :program.

From here, a method definition at :def, followed by the method's identifier :@ident. After the method's identifier comes the parentheses :paren and the method parameters under :params.

Next is the method body, starting at :bodystmt (stmt meaning statement), which contains the full definition of the method.

In our case, we're simply returning a String, so next we have the :string_literal expression.

Within our :string_literal you'll notice two @tstring_content, this is the literal part for Hello, and !. Between the two @tstring_content statements is a :string_embexpr, where embexpr is an embedded expression. Our expression consists of a local variable, or var_ref, with the identifier (@ident) of world.

Resources

Requirements

  • ruby 1.9 (support CVS HEAD only)

  • bison 1.28 or later (Other yaccs do not work)

License

Ruby License.

                                              Minero Aoki
                                      aamine@loveruby.net
                                    http://i.loveruby.net

Direct Known Subclasses

Lexer, SexpBuilder, SexpBuilderPP

Defined Under Namespace

Classes: Filter, Lexer, SexpBuilder, SexpBuilderPP, TokenPattern

Constant Summary collapse

PARSER_EVENTS =

This array contains name of parser events.

PARSER_EVENT_TABLE.keys
SCANNER_EVENTS =

This array contains name of scanner events.

SCANNER_EVENT_TABLE.keys
EVENTS =

This array contains name of all ripper events.

PARSER_EVENTS + SCANNER_EVENTS
Version =

version of Ripper

rb_usascii_str_new2(RIPPER_VERSION)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(src, filename = "(ripper)", lineno = 1) ⇒ Object

Create a new Ripper object. src must be a String, an IO, or an Object which has #gets method.

This method does not starts parsing. See also Ripper#parse and Ripper.parse.


17751
17752
17753
17754
17755
17756
17757
17758
17759
17760
17761
17762
17763
17764
17765
17766
17767
17768
17769
17770
17771
17772
17773
17774
17775
17776
17777
17778
17779
17780
17781
17782
17783
# File 'ripper.c', line 17751

static VALUE
ripper_initialize(int argc, VALUE *argv, VALUE self)
{
    struct parser_params *parser;
    VALUE src, fname, lineno;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
    if (RB_TYPE_P(src, T_FILE)) {
        parser->parser_lex_gets = ripper_lex_get_generic;
    }
    else {
        StringValue(src);
        parser->parser_lex_gets = lex_get_str;
    }
    parser->parser_lex_input = src;
    parser->eofp = Qfalse;
    if (NIL_P(fname)) {
        fname = STR_NEW2("(ripper)");
  OBJ_FREEZE(fname);
    }
    else {
        StringValue(fname);
  fname = rb_str_new_frozen(fname);
    }
    parser_initialize(parser);

    parser->parser_ruby_sourcefile_string = fname;
    parser->parser_ruby_sourcefile = RSTRING_PTR(fname);
    parser->parser_ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1;

    return Qnil;
}

Class Method Details

.lex(src, filename = '-', lineno = 1) ⇒ Object

Tokenizes the Ruby program and returns an array of an array, which is formatted like [[lineno, column], type, token].

require 'ripper'
require 'pp'

pp Ripper.lex("def m(a) nil end")
  #=> [[[1,  0], :on_kw,     "def"],
       [[1,  3], :on_sp,     " "  ],
       [[1,  4], :on_ident,  "m"  ],
       [[1,  5], :on_lparen, "("  ],
       [[1,  6], :on_ident,  "a"  ],
       [[1,  7], :on_rparen, ")"  ],
       [[1,  8], :on_sp,     " "  ],
       [[1,  9], :on_kw,     "nil"],
       [[1, 12], :on_sp,     " "  ],
       [[1, 13], :on_kw,     "end"]]

42
43
44
# File 'lib/ripper/lexer.rb', line 42

def Ripper.lex(src, filename = '-', lineno = 1)
  Lexer.new(src, filename, lineno).lex
end

.parse(src, filename = '(ripper)', lineno = 1) ⇒ Object

Parses the given Ruby program read from src. src must be a String or an IO or a object with a #gets method.


17
18
19
# File 'lib/ripper/core.rb', line 17

def Ripper.parse(src, filename = '(ripper)', lineno = 1)
  new(src, filename, lineno).parse
end

.sexp(src, filename = '-', lineno = 1) ⇒ Object

EXPERIMENTAL

Parses src and create S-exp tree. Returns more readable tree rather than Ripper.sexp_raw. This method is mainly for developer use.

require 'ripper'
require 'pp'

pp Ripper.sexp("def m(a) nil end")
  #=> [:program,
       [[:def,
        [:@ident, "m", [1, 4]],
        [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil]],
        [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]

30
31
32
33
34
# File 'lib/ripper/sexp.rb', line 30

def Ripper.sexp(src, filename = '-', lineno = 1)
  builder = SexpBuilderPP.new(src, filename, lineno)
  sexp = builder.parse
  sexp unless builder.error?
end

.sexp_raw(src, filename = '-', lineno = 1) ⇒ Object

EXPERIMENTAL

Parses src and create S-exp tree. This method is mainly for developer use.

require 'ripper'
require 'pp'

pp Ripper.sexp_raw("def m(a) nil end")
  #=> [:program,
       [:stmts_add,
        [:stmts_new],
        [:def,
         [:@ident, "m", [1, 4]],
         [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]],
         [:bodystmt,
          [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]],
          nil,
          nil,
          nil]]]]

56
57
58
59
60
# File 'lib/ripper/sexp.rb', line 56

def Ripper.sexp_raw(src, filename = '-', lineno = 1)
  builder = SexpBuilder.new(src, filename, lineno)
  sexp = builder.parse
  sexp unless builder.error?
end

.slice(src, pattern, n = 0) ⇒ Object

EXPERIMENTAL

Parses src and return a string which was matched to pattern. pattern should be described as Regexp.

require 'ripper'

p Ripper.slice('def m(a) nil end', 'ident')                   #=> "m"
p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+')  #=> "m(a)"
p Ripper.slice("<<EOS\nstring\nEOS",
               'heredoc_beg nl $(tstring_content*) heredoc_end', 1)
    #=> "string\n"

84
85
86
87
88
89
# File 'lib/ripper/lexer.rb', line 84

def Ripper.slice(src, pattern, n = 0)
  if m = token_match(src, pattern)
  then m.string(n)
  else nil
  end
end

.token_match(src, pattern) ⇒ Object

:nodoc:


91
92
93
# File 'lib/ripper/lexer.rb', line 91

def Ripper.token_match(src, pattern)   #:nodoc:
  TokenPattern.compile(pattern).match(src)
end

.tokenize(src, filename = '-', lineno = 1) ⇒ Object

Tokenizes the Ruby program and returns an array of strings.

p Ripper.tokenize("def m(a) nil end")
   # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]

20
21
22
# File 'lib/ripper/lexer.rb', line 20

def Ripper.tokenize(src, filename = '-', lineno = 1)
  Lexer.new(src, filename, lineno).tokenize
end

Instance Method Details

#columnInteger

Return column number of current parsing line. This number starts from 0.


17846
17847
17848
17849
17850
17851
17852
17853
17854
17855
17856
17857
17858
17859
# File 'ripper.c', line 17846

static VALUE
ripper_column(VALUE self)
{
    struct parser_params *parser;
    long col;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (NIL_P(parser->parsing_thread)) return Qnil;
    col = parser->tokp - parser->parser_lex_pbeg;
    return LONG2NUM(col);
}

#encodingEncoding

Return encoding of the source.


17332
17333
17334
17335
17336
17337
17338
17339
# File 'ripper.c', line 17332

VALUE
rb_parser_encoding(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return rb_enc_from_encoding(current_enc);
}

#end_seen?Boolean

Return true if parsed source ended by _END_.


17317
17318
17319
17320
17321
17322
17323
17324
# File 'ripper.c', line 17317

VALUE
rb_parser_end_seen_p(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return ruby__end__seen ? Qtrue : Qfalse;
}

#error?Boolean

Return true if parsed source has errors.


17301
17302
17303
17304
17305
17306
17307
17308
# File 'ripper.c', line 17301

static VALUE
ripper_error_p(VALUE vparser)
{
    struct parser_params *parser;

    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser);
    return parser->error_p ? Qtrue : Qfalse;
}

#filenameString

Return current parsing filename.


17867
17868
17869
17870
17871
17872
17873
17874
17875
17876
17877
# File 'ripper.c', line 17867

static VALUE
ripper_filename(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    return parser->parser_ruby_sourcefile_string;
}

#linenoInteger

Return line number of current parsing line. This number starts from 1.


17886
17887
17888
17889
17890
17891
17892
17893
17894
17895
17896
17897
# File 'ripper.c', line 17886

static VALUE
ripper_lineno(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (NIL_P(parser->parsing_thread)) return Qnil;
    return INT2NUM(parser->parser_ruby_sourceline);
}

#parseObject

Start parsing and returns the value of the root action.


17818
17819
17820
17821
17822
17823
17824
17825
17826
17827
17828
17829
17830
17831
17832
17833
17834
17835
17836
17837
# File 'ripper.c', line 17818

static VALUE
ripper_parse(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    if (!ripper_initialized_p(parser)) {
        rb_raise(rb_eArgError, "method called for uninitialized object");
    }
    if (!NIL_P(parser->parsing_thread)) {
        if (parser->parsing_thread == rb_thread_current())
            rb_raise(rb_eArgError, "Ripper#parse is not reentrant");
        else
            rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe");
    }
    parser->parsing_thread = rb_thread_current();
    rb_ensure(ripper_parse0, self, ripper_ensure, self);

    return parser->result;
}

#yydebugBoolean

Get yydebug.


17347
17348
17349
17350
17351
17352
17353
17354
# File 'ripper.c', line 17347

VALUE
rb_parser_get_yydebug(VALUE self)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    return yydebug ? Qtrue : Qfalse;
}

#yydebug=(flag) ⇒ Object

Set yydebug.


17362
17363
17364
17365
17366
17367
17368
17369
17370
# File 'ripper.c', line 17362

VALUE
rb_parser_set_yydebug(VALUE self, VALUE flag)
{
    struct parser_params *parser;

    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
    yydebug = RTEST(flag);
    return flag;
}