Project

General

Profile

Feature #13686 ยป add-state-to-ripper-for-trunk.patch

aycabta (aycabta .), 06/27/2017 12:34 PM

View differences:

ext/ripper/lib/ripper/filter.rb (working copy)
25 25
      @__lexer = Lexer.new(src, filename, lineno)
26 26
      @__line = nil
27 27
      @__col = nil
28
      @__state = nil
28 29
    end
29 30

  
30 31
    # The file name of the input.
......
46 47
      @__col
47 48
    end
48 49

  
50
    # The scanner's state of the current token.
51
    # This value is the bitwise OR of zero or more of the +Ripper::EXPR_*+ constants.
52
    def state
53
      @__state
54
    end
55

  
49 56
    # Starts the parser.
50 57
    # +init+ is a data accumulator and is passed to the next event handler (as
51 58
    # of Enumerable#inject).
52 59
    def parse(init = nil)
53 60
      data = init
54
      @__lexer.lex.each do |pos, event, tok|
61
      @__lexer.lex.each do |pos, event, tok, state|
55 62
        @__line, @__col = *pos
63
        @__state = state
56 64
        data = if respond_to?(event, true)
57 65
               then __send__(event, tok, data)
58 66
               else on_default(event, tok, data)
ext/ripper/lib/ripper/lexer.rb (working copy)
23 23
  end
24 24

  
25 25
  # Tokenizes the Ruby program and returns an array of an array,
26
  # which is formatted like <code>[[lineno, column], type, token]</code>.
26
  # which is formatted like
27
  # <code>[[lineno, column], type, token, state]</code>.
27 28
  #
28 29
  #   require 'ripper'
29 30
  #   require 'pp'
30 31
  #
31 32
  #   pp Ripper.lex("def m(a) nil end")
32
  #     #=> [[[1,  0], :on_kw,     "def"],
33
  #          [[1,  3], :on_sp,     " "  ],
34
  #          [[1,  4], :on_ident,  "m"  ],
35
  #          [[1,  5], :on_lparen, "("  ],
36
  #          [[1,  6], :on_ident,  "a"  ],
37
  #          [[1,  7], :on_rparen, ")"  ],
38
  #          [[1,  8], :on_sp,     " "  ],
39
  #          [[1,  9], :on_kw,     "nil"],
40
  #          [[1, 12], :on_sp,     " "  ],
41
  #          [[1, 13], :on_kw,     "end"]]
33
  #   #=> [[[1,  0], :on_kw,     "def", Ripper::EXPR_FNAME                   ],
34
  #        [[1,  3], :on_sp,     " ",   Ripper::EXPR_FNAME                   ],
35
  #        [[1,  4], :on_ident,  "m",   Ripper::EXPR_ENDFN                   ],
36
  #        [[1,  5], :on_lparen, "(",   Ripper::EXPR_LABEL | Ripper::EXPR_BEG],
37
  #        [[1,  6], :on_ident,  "a",   Ripper::EXPR_ARG                     ],
38
  #        [[1,  7], :on_rparen, ")",   Ripper::EXPR_ENDFN                   ],
39
  #        [[1,  8], :on_sp,     " ",   Ripper::EXPR_BEG                     ],
40
  #        [[1,  9], :on_kw,     "nil", Ripper::EXPR_END                     ],
41
  #        [[1, 12], :on_sp,     " ",   Ripper::EXPR_END                     ],
42
  #        [[1, 13], :on_kw,     "end", Ripper::EXPR_END                     ]]
42 43
  #
43 44
  def Ripper.lex(src, filename = '-', lineno = 1)
44 45
    Lexer.new(src, filename, lineno).lex
45 46
  end
46 47

  
47 48
  class Lexer < ::Ripper   #:nodoc: internal use only
48
    Elem = Struct.new(:pos, :event, :tok)
49
    Elem = Struct.new(:pos, :event, :tok, :state)
49 50

  
50 51
    def tokenize
51 52
      parse().sort_by(&:pos).map(&:tok)
......
72 73
        if Elem === e and e.event == :on_tstring_content
73 74
          tok = e.tok.dup if w > 0 and /\A\s/ =~ e.tok
74 75
          if (n = dedent_string(e.tok, w)) > 0
75
            ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n])]
76
            ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n], e.state)]
76 77
            e.pos[1] += n
77 78
          end
78 79
        end
......
88 89
      buf = []
89 90
      @buf << buf
90 91
      @buf = buf
91
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
92
      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
92 93
    end
93 94

  
94 95
    def on_heredoc_end(tok)
95
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
96
      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
96 97
      @buf = @stack.pop
97 98
    end
98 99

  
99 100
    def _push_token(tok)
100
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
101
      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
101 102
    end
102 103

  
103 104
    (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
parse.y (working copy)
156 156
    const char *token;
157 157
    int linenum;
158 158
    int column;
159
    enum lex_state_e state;
159 160
    int nonspc;
160 161
    struct token_info *next;
161 162
} token_info;
......
4989 4990
    ptinfo->token = token;
4990 4991
    ptinfo->linenum = ruby_sourceline;
4991 4992
    ptinfo->column = token_info_get_column(parser, t);
4993
    ptinfo->state = lex_state;
4992 4994
    ptinfo->nonspc = token_info_has_nonspaces(parser, t);
4993 4995
    ptinfo->next = parser->token_info;
4994 4996

  
......
11389 11391
    return INT2NUM(ruby_sourceline);
11390 11392
}
11391 11393

  
11394
/*
11395
 *  call-seq:
11396
 *    ripper#state   -> Integer
11397
 *
11398
 *  Return scanner state of current token.
11399
 */
11400
static VALUE
11401
ripper_state(VALUE self)
11402
{
11403
    struct parser_params *parser;
11404

  
11405
    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
11406
    if (!ripper_initialized_p(parser)) {
11407
        rb_raise(rb_eArgError, "method called for uninitialized object");
11408
    }
11409
    if (NIL_P(parser->parsing_thread)) return Qnil;
11410
    return INT2NUM(lex_state);
11411
}
11412

  
11392 11413
#ifdef RIPPER_DEBUG
11393 11414
/* :nodoc: */
11394 11415
static VALUE
......
11436 11457
    rb_define_method(Ripper, "column", ripper_column, 0);
11437 11458
    rb_define_method(Ripper, "filename", ripper_filename, 0);
11438 11459
    rb_define_method(Ripper, "lineno", ripper_lineno, 0);
11460
    rb_define_method(Ripper, "state", ripper_state, 0);
11439 11461
    rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0);
11440 11462
    rb_define_method(Ripper, "encoding", rb_parser_encoding, 0);
11441 11463
    rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0);
......
11450 11472
    rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
11451 11473
    rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
11452 11474

  
11475
    /* ignore newline, +/- is a sign. */
11476
    rb_define_const(Ripper, "EXPR_BEG", INT2NUM(EXPR_BEG));
11477
    /* newline significant, +/- is an operator. */
11478
    rb_define_const(Ripper, "EXPR_END", INT2NUM(EXPR_END));
11479
    /* ditto, and unbound braces. */
11480
    rb_define_const(Ripper, "EXPR_ENDARG", INT2NUM(EXPR_ENDARG));
11481
    /* ditto, and unbound braces. */
11482
    rb_define_const(Ripper, "EXPR_ENDFN", INT2NUM(EXPR_ENDFN));
11483
    /* newline significant, +/- is an operator. */
11484
    rb_define_const(Ripper, "EXPR_ARG", INT2NUM(EXPR_ARG));
11485
    /* newline significant, +/- is an operator. */
11486
    rb_define_const(Ripper, "EXPR_CMDARG", INT2NUM(EXPR_CMDARG));
11487
    /* newline significant, +/- is an operator. */
11488
    rb_define_const(Ripper, "EXPR_MID", INT2NUM(EXPR_MID));
11489
    /* ignore newline, no reserved words. */
11490
    rb_define_const(Ripper, "EXPR_FNAME", INT2NUM(EXPR_FNAME));
11491
    /* right after `.' or `::', no reserved words. */
11492
    rb_define_const(Ripper, "EXPR_DOT", INT2NUM(EXPR_DOT));
11493
    /* immediate after `class', no here document. */
11494
    rb_define_const(Ripper, "EXPR_CLASS", INT2NUM(EXPR_CLASS));
11495
    /* flag bit, label is allowed. */
11496
    rb_define_const(Ripper, "EXPR_LABEL", INT2NUM(EXPR_LABEL));
11497
    /* flag bit, just after a label. */
11498
    rb_define_const(Ripper, "EXPR_LABELED", INT2NUM(EXPR_LABELED));
11499
    /* symbol literal as FNAME. */
11500
    rb_define_const(Ripper, "EXPR_FITEM", INT2NUM(EXPR_FITEM));
11501
    /* equals to +EXPR_BEG+ */
11502
    rb_define_const(Ripper, "EXPR_VALUE", INT2NUM(EXPR_VALUE));
11503
    /* equals to <tt>(EXPR_BEG | EXPR_MID | EXPR_CLASS)</tt> */
11504
    rb_define_const(Ripper, "EXPR_BEG_ANY", INT2NUM(EXPR_BEG_ANY));
11505
    /* equals to <tt>(EXPR_ARG | EXPR_CMDARG)</tt> */
11506
    rb_define_const(Ripper, "EXPR_ARG_ANY", INT2NUM(EXPR_ARG_ANY));
11507
    /* equals to <tt>(EXPR_END | EXPR_ENDARG | EXPR_ENDFN)</tt> */
11508
    rb_define_const(Ripper, "EXPR_END_ANY", INT2NUM(EXPR_END_ANY));
11509

  
11453 11510
    ripper_init_eventids1_table(Ripper);
11454 11511
    ripper_init_eventids2_table(Ripper);
11455 11512

  
test/ripper/test_filter.rb (working copy)
15 15
        data[:filename] = filename rescue nil
16 16
        data[:lineno] = lineno
17 17
        data[:column] = column
18
        data[:state] = state
18 19
        data[:token] = token
19 20
      end
20 21
      data
......
75 76
    assert_equal(last_columns, filter.column)
76 77
  end
77 78

  
79
  def test_filter_state
80
    data = {}
81
    src = File.read(filename)
82
    filter = Filter.new(src)
83
    assert_equal(nil, filter.state)
84
    filter.parse(data)
85
    assert_not_nil(data[:state])
86
    assert_not_nil(filter.state)
87
  end
88

  
78 89
  def test_filter_token
79 90
    data = {}
80 91
    filter = Filter.new("begin; puts 1; end")
test/ripper/test_ripper.rb (working copy)
17 17
    assert_nil @ripper.column
18 18
  end
19 19

  
20
  def test_state
21
    assert_nil @ripper.state
22
  end
23

  
20 24
  def test_encoding
21 25
    assert_equal Encoding::UTF_8, @ripper.encoding
22 26
    ripper = Ripper.new('# coding: iso-8859-15')
test/ripper/test_scanner_events.rb (working copy)
48 48
  def test_lex
49 49
    assert_equal [],
50 50
                 Ripper.lex('')
51
    assert_equal [[[1,0], :on_ident, "a"]],
51
    assert_equal [[[1,0], :on_ident, "a", Ripper::EXPR_CMDARG]],
52 52
                 Ripper.lex('a')
53
    assert_equal [[[1, 0], :on_kw, "nil"]],
53
    assert_equal [[[1, 0], :on_kw, "nil", Ripper::EXPR_END]],
54 54
                 Ripper.lex("nil")
55
    assert_equal [[[1, 0], :on_kw, "def"],
56
                  [[1, 3], :on_sp, " "],
57
                  [[1, 4], :on_ident, "m"],
58
                  [[1, 5], :on_lparen, "("],
59
                  [[1, 6], :on_ident, "a"],
60
                  [[1, 7], :on_rparen, ")"],
61
                  [[1, 8], :on_kw, "end"]],
55
    assert_equal [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME],
56
                  [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME],
57
                  [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN],
58
                  [[1, 5], :on_lparen, "(", Ripper::EXPR_BEG | Ripper::EXPR_LABEL],
59
                  [[1, 6], :on_ident, "a", Ripper::EXPR_ARG],
60
                  [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN],
61
                  [[1, 8], :on_kw, "end", Ripper::EXPR_END]],
62 62
                 Ripper.lex("def m(a)end")
63
    assert_equal [[[1, 0], :on_int, "1"],
64
                  [[1, 1], :on_nl, "\n"],
65
                  [[2, 0], :on_int, "2"],
66
                  [[2, 1], :on_nl, "\n"],
67
                  [[3, 0], :on_int, "3"]],
63
    assert_equal [[[1, 0], :on_int, "1", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
64
                  [[1, 1], :on_nl, "\n", Ripper::EXPR_BEG],
65
                  [[2, 0], :on_int, "2", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
66
                  [[2, 1], :on_nl, "\n", Ripper::EXPR_BEG],
67
                  [[3, 0], :on_int, "3", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
68 68
                 Ripper.lex("1\n2\n3")
69
    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
70
                  [[1, 5], :on_nl, "\n"],
71
                  [[2, 0], :on_tstring_content, "heredoc\n"],
72
                  [[3, 0], :on_heredoc_end, "EOS"]],
69
    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
70
                  [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
71
                  [[2, 0], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
72
                  [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
73 73
                 Ripper.lex("<<""EOS\nheredoc\nEOS")
74
    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
75
                  [[1, 5], :on_nl, "\n"],
76
                  [[2, 0], :on_heredoc_end, "EOS"]],
74
    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
75
                  [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
76
                  [[2, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
77 77
                 Ripper.lex("<<""EOS\nEOS"),
78 78
                 "bug#4543"
79
    assert_equal [[[1, 0], :on_regexp_beg, "/"],
80
                  [[1, 1], :on_tstring_content, "foo\nbar"],
81
                  [[2, 3], :on_regexp_end, "/"]],
79
    assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
80
                  [[1, 1], :on_tstring_content, "foo\nbar", Ripper::EXPR_BEG],
81
                  [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
82 82
                 Ripper.lex("/foo\nbar/")
83
    assert_equal [[[1, 0], :on_regexp_beg, "/"],
84
                  [[1, 1], :on_tstring_content, "foo\n\u3020"],
85
                  [[2, 3], :on_regexp_end, "/"]],
83
    assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
84
                  [[1, 1], :on_tstring_content, "foo\n\u3020", Ripper::EXPR_BEG],
85
                  [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
86 86
                 Ripper.lex("/foo\n\u3020/")
87
    assert_equal [[[1, 0], :on_tstring_beg, "'"],
88
                  [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0"],
89
                  [[2, 3], :on_tstring_end, "'"]],
87
    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
88
                  [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0", Ripper::EXPR_BEG],
89
                  [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
90 90
                 Ripper.lex("'foo\n\xe3\x80\xa0'")
91
    assert_equal [[[1, 0], :on_tstring_beg, "'"],
92
                  [[1, 1], :on_tstring_content, "\u3042\n\u3044"],
93
                  [[2, 3], :on_tstring_end, "'"]],
91
    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
92
                  [[1, 1], :on_tstring_content, "\u3042\n\u3044", Ripper::EXPR_BEG],
93
                  [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
94 94
                 Ripper.lex("'\u3042\n\u3044'")
95
    assert_equal [[[1, 0], :on_rational, "1r"],
96
                  [[1, 2], :on_nl, "\n"],
97
                  [[2, 0], :on_imaginary, "2i"],
98
                  [[2, 2], :on_nl, "\n"],
99
                  [[3, 0], :on_imaginary, "3ri"],
100
                  [[3, 3], :on_nl, "\n"],
101
                  [[4, 0], :on_rational, "4.2r"],
102
                  [[4, 4], :on_nl, "\n"],
103
                  [[5, 0], :on_imaginary, "5.6ri"],
95
    assert_equal [[[1, 0], :on_rational, "1r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
96
                  [[1, 2], :on_nl, "\n", Ripper::EXPR_BEG],
97
                  [[2, 0], :on_imaginary, "2i", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
98
                  [[2, 2], :on_nl, "\n", Ripper::EXPR_BEG],
99
                  [[3, 0], :on_imaginary, "3ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
100
                  [[3, 3], :on_nl, "\n", Ripper::EXPR_BEG],
101
                  [[4, 0], :on_rational, "4.2r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
102
                  [[4, 4], :on_nl, "\n", Ripper::EXPR_BEG],
103
                  [[5, 0], :on_imaginary, "5.6ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
104 104
                 ],
105 105
                 Ripper.lex("1r\n2i\n3ri\n4.2r\n5.6ri")
106
     assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS"],
107
                   [[1, 6], :on_nl, "\n"],
108
                   [[2, 0], :on_ignored_sp, "  "],
109
                   [[2, 2], :on_tstring_content, "heredoc\n"],
110
                   [[3, 0], :on_heredoc_end, "EOS"]
106
     assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS", Ripper::EXPR_BEG],
107
                   [[1, 6], :on_nl, "\n", Ripper::EXPR_BEG],
108
                   [[2, 0], :on_ignored_sp, "  ", Ripper::EXPR_BEG],
109
                   [[2, 2], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
110
                   [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]
111 111
                 ],
112 112
                 Ripper.lex("<<~EOS\n  heredoc\nEOS")
113
    assert_equal [[[1, 0], :on_tstring_beg, "'"],
114
                  [[1, 1], :on_tstring_content, "foo"]],
113
    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
114
                  [[1, 1], :on_tstring_content, "foo", Ripper::EXPR_BEG]],
115 115
                 Ripper.lex("'foo")
116 116
  end
117 117