Class: CoarseTokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/anbt-sql-formatter/coarse-tokenizer.rb

Instance Method Summary collapse

Constructor Details

#initializeCoarseTokenizer

Returns a new instance of CoarseTokenizer.



22
23
24
25
26
# File 'lib/anbt-sql-formatter/coarse-tokenizer.rb', line 22

def initialize
  @comment_single_start = /--/
  @comment_multi_start  = /\/\*/
  @comment_multi_end    = /\*\//
end

Instance Method Details

#shift_to_buf(n) ⇒ Object



141
142
143
144
# File 'lib/anbt-sql-formatter/coarse-tokenizer.rb', line 141

def shift_to_buf(n)
  @buf += @str[0...n]
  @str = @str[n..]
end

#shift_token(length, type, mode, flag) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/anbt-sql-formatter/coarse-tokenizer.rb', line 147

def shift_token(length, type, mode, flag)
  case flag
  when :start
    @result << CoarseToken.new(type, @buf) if @buf.size > 0
    @buf = @str[0..(length-1)] # <length> char from head
  when :end
    @result << CoarseToken.new(type, @buf+@str[0..(length-1)]) if @buf.size > 0
    @buf = ""
  else
    raise "must not happen"
  end

  @str = @str[length..]
  @mode = mode
end

#tokenize(str) ⇒ Object

These are exclusive:

  • double quote string

  • single quote string

  • single line comment

  • multiple line comment

ソース先頭から見ていって先に現れたものが優先される。



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/anbt-sql-formatter/coarse-tokenizer.rb', line 40

def tokenize(str)
  @str = str.gsub("\r\n", "\n")
  out_of_quote_single   = true
  out_of_quote_double   = true
  out_of_comment_single = true
  out_of_comment_multi  = true

  @result = []
  @buf = ""
  @mode = :plain

  while @str.size > 0

    if /\A(")/ =~ @str && out_of_quote_double &&
        out_of_quote_single && out_of_comment_single && out_of_comment_multi
      ## begin double quote

      length = $1.size
      shift_token(length, :plain, :quote_double, :start)
      out_of_quote_double = false

    elsif /\A(")/ =~ @str && !(out_of_quote_double) &&
        out_of_quote_single && out_of_comment_single && out_of_comment_multi
      ## end double quote

      length = $1.size
      if /\A(".")/ =~ @str ## schema.table
        shift_to_buf(3)
      elsif /\A("")/ =~ @str ## escaped double quote
        shift_to_buf(2)
      else
        shift_token(length, :quote_double, :plain, :end)
        out_of_quote_double = true
      end

    elsif /\A(')/ =~ @str && out_of_quote_single &&
        out_of_quote_double && out_of_comment_single && out_of_comment_multi
      ## begin single quote

      length = $1.size
      shift_token(length, :plain, :quote_single, :start)
      out_of_quote_single = false
    elsif /\A(')/ =~ @str && !(out_of_quote_single) &&
        out_of_quote_double && out_of_comment_single && out_of_comment_multi
      ## end single quote

      length = $1.size
      if /\A('')/ =~ @str ## escaped single quote
        shift_to_buf(2)
      else
        shift_token(length, :quote_single, :plain, :end)
        out_of_quote_single = true
      end

    elsif /\A(#{@comment_single_start})/ =~ @str && out_of_comment_single &&
       out_of_quote_single && out_of_quote_double && out_of_comment_multi
      ## begin single line comment

      length = $1.size
      shift_token(length, :plain, :comment_single, :start)
      out_of_comment_single = false

    elsif /\A(\n)/ =~ @str && !(out_of_comment_single) &&
        out_of_quote_single && out_of_quote_double && out_of_comment_multi
      ## end single line comment

      length = $1.size
      shift_token(length, :comment_single, :plain, :end)
      out_of_comment_single = true

    elsif /\A(#{@comment_multi_start})/ =~ @str &&
        out_of_quote_single && out_of_quote_double && out_of_comment_single && out_of_comment_multi
      ## begin multi line comment

      length = $1.size
      shift_token(length, :plain, :comment_multi, :start)
      out_of_comment_multi = false

    elsif /\A(#{@comment_multi_end})/ =~ @str &&
        out_of_quote_single && out_of_quote_double && out_of_comment_single && !(out_of_comment_multi)
      ## end multi line comment

      length = $1.size
      shift_token(length, :comment_multi, :plain, :end)
      out_of_comment_multi = true

    elsif /\A\\/ =~ @str
      ## escape char
      shift_to_buf(2)

    else
      shift_to_buf(1)

    end
  end
  @result << CoarseToken.new(@mode, @buf+@str) if (@buf+@str).size > 0

  @result
end