Class: RSyntaxTree::StringParser

Inherits:
Object
  • Object
show all
Defined in:
lib/rsyntaxtree/string_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str, fontset, fontsize, global) ⇒ StringParser

Returns a new instance of StringParser.



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 19

def initialize(str, fontset, fontsize, global)
  @global = global
  # Clean up the data a little to make processing easier
  # repeated newlines => a newline
  string = str.gsub(/[\n\r]+/m, "\n")
  # a backslash followed by a newline => a backslash followed by an 'n'
  string.gsub!(/\\\n\s*/m, "\\n")
  # repeated whitespace characters => " "
  string.gsub!(/\s+/, " ")
  string.gsub!(/\]\s+\[/, "][")
  string.gsub!(/\s+\[/, "[")
  string.gsub!(/\[\s+/, "[")
  string.gsub!(/\s+\]/, "]")
  string.gsub!(/\]\s+/, "]")
  string.gsub!(/<(\d*)>/) do
    num_padding = $1.to_i
    result = if num_padding.positive?
               WHITESPACE_BLOCK * num_padding
             else
               WHITESPACE_BLOCK
             end
    result
  end

  @data = string # Store it for later...
  fontset[:normal] = fontset[:cjk] if @data.contains_cjk?
  @elist = ElementList.new # Initialize internal element list
  @pos = 0 # Position in the sentence
  @id = 1 # ID for the next element
  @level = 0 # Level in the diagram
  @fontset = fontset
  @fontsize = fontsize
end

Instance Attribute Details

#dataObject

Returns the value of attribute data.



17
18
19
# File 'lib/rsyntaxtree/string_parser.rb', line 17

def data
  @data
end

#elistObject

Returns the value of attribute elist.



17
18
19
# File 'lib/rsyntaxtree/string_parser.rb', line 17

def elist
  @elist
end

#idObject

Returns the value of attribute id.



17
18
19
# File 'lib/rsyntaxtree/string_parser.rb', line 17

def id
  @id
end

#levelObject

Returns the value of attribute level.



17
18
19
# File 'lib/rsyntaxtree/string_parser.rb', line 17

def level
  @level
end

#posObject

Returns the value of attribute pos.



17
18
19
# File 'lib/rsyntaxtree/string_parser.rb', line 17

def pos
  @pos
end

Class Method Details

.valid?(data) ⇒ Boolean

Returns:

  • (Boolean)

Raises:



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/rsyntaxtree/string_parser.rb', line 53

def self.valid?(data)
  raise RSTError, +"Error: input text is empty" if data.empty?

  if /\[\s*\]/m =~ data
    raise RSTError, +"Error: inside the brackets is empty"
  end

  text = data.strip
  text_r = text.split(//)
  open_br = []
  close_br = []
  escape = false
  text_r.each do |chr|
    if chr == "\\"
      escape = if escape
                 false
               else
                 true
               end
      next
    end

    if escape && /[\[\]]/ =~ chr
      escape = false
      next
    elsif chr == '['
      open_br.push(chr)
    elsif chr == ']'
      close_br.push(chr)
      break if open_br.length < close_br.length
    end
    escape = false
  end

  if open_br.empty? && close_br.empty?
    raise RSTError, +"Error: input text does not contain paired brackets"
  elsif open_br.length == close_br.length
    true
  else
    raise RSTError, +"Error: open and close brackets do not match"
  end
end

Instance Method Details

#get_elementlistObject



101
102
103
# File 'lib/rsyntaxtree/string_parser.rb', line 101

def get_elementlist
  @elist;
end

#get_next_tokenObject



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/rsyntaxtree/string_parser.rb', line 105

def get_next_token
  data = @data.split(//)
  gottoken = false
  token = ""
  i = 0

  return "" if (@pos + 1) >= data.length

  escape = false
  while ((@pos + i) < data.length) && !gottoken
    ch = data[@pos + i]
    case ch
    when "["
      if escape
        token += '\\['  # エスケープされた角括弧として保持
        escape = false
      elsif i.positive?
        gottoken = true
      else
        token += ch
      end
    when "]"
      if escape
        token += '\\]'  # エスケープされた角括弧として保持
        escape = false
      else
        token += ch if i.zero?
        gottoken = true
      end
    when "\\"
      if escape
        token += '\\\\'
        escape = false
      else
        escape = true
      end
    when " "
      if escape
        token += '\\n'
        escape = false
      else
        token += ch
      end
    when /[n{}<>^+*_=~|-]/
      if escape
        token += '\\' + ch
        escape = false
      else
        token += ch
      end
    else
      if escape
        token += ch
        escape = false
      else
        token += ch
      end
    end
    i += 1
  end

  @pos += if i > 1
            i - 1
          else
            1
          end
  token
end

#make_tree(parent) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/rsyntaxtree/string_parser.rb', line 174

def make_tree(parent)
  token = get_next_token.strip
  parts = []

  while token != "" && token != "]"
    token_r = token.split(//)
    case token_r[0]
    when "["
      # エスケープされた角括弧をチェック
      if token =~ /\A\\\[/ || token =~ /\A\\\]/
        # エスケープされた角括弧の場合は通常のテキストとして扱う
        element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
        @id += 1
        @elist.add(element)
      else
        # 以下、既存の処理
        tl = token_r.length
        token_r = token_r[1, tl - 1]
        spaceat = token_r.index(" ")
        newparent = -1

        if spaceat
          parts[0] = token_r[0, spaceat].join
          tl = token_r.length
          parts[1] = token_r[spaceat, tl - spaceat].join

          element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize, @global)
          @id += 1
          @elist.add(element)
          newparent = element.id

          element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize, @global)
          @id += 1
        else
          joined = token_r.join
          element = Element.new(@id, parent, joined, @level, @fontset, @fontsize, @global)
          @id += 1
          newparent = element.id
        end
        @elist.add(element)
        @level += 1
        make_tree(newparent)
      end
    else
      if token.strip != ""
        element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
        @id += 1
        @elist.add(element)
      end
    end
    token = get_next_token
  end
  @level -= 1
end

#parseObject



96
97
98
99
# File 'lib/rsyntaxtree/string_parser.rb', line 96

def parse
  make_tree(0);
  @elist.set_hierarchy
end