Class: StringParser

Inherits:
Object
  • Object
show all
Defined in:
lib/rsyntaxtree/string_parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ StringParser

Returns a new instance of StringParser.



50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rsyntaxtree/string_parser.rb', line 50

def initialize(str)
  # Clean up the data a little to make processing easier
  string = str.gsub(/\t/, "")
  string.gsub!(/\s+/, " ")
  string.gsub!(/\] \[/, "][")
  string.gsub!(/ \[/, "[")

  @data = string # Store it for later...
  @elist = ElementList.new # Initialize internal element list 
  @pos = 0 # Position in the sentence
  @id = 1 # ID for the next element
  @level = 0 # Level in the diagram
  @tncnt = Hash.new # Node type counts
end

Instance Attribute Details

#dataObject

Returns the value of attribute data.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def data
  @data
end

#elistObject

Returns the value of attribute elist.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def elist
  @elist
end

#idObject

Returns the value of attribute id.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def id
  @id
end

#levelObject

Returns the value of attribute level.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def level
  @level
end

#posObject

Returns the value of attribute pos.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def pos
  @pos
end

#tncntObject

Returns the value of attribute tncnt.



49
50
51
# File 'lib/rsyntaxtree/string_parser.rb', line 49

def tncnt
  @tncnt
end

Instance Method Details

#auto_subscriptObject



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/rsyntaxtree/string_parser.rb', line 106

def auto_subscript
  elements = @elist.get_elements
  tmpcnt   = Hash.new
  elements.each do |element|
    if(element.type == ETYPE_NODE)
      count = 1
      content = element.content
      
      if @tncnt[content]
        count = @tncnt[content]
      end
      
      if(count > 1)
        if tmpcnt[content]
          tmpcnt[content] += 1
        else
          tmpcnt[content] = 1
        end
        
        element.content += ("_" + tmpcnt[content].to_s)
      end

    end
  end  
  @tncnt
end

#count_node(name) ⇒ Object



133
134
135
136
137
138
139
140
# File 'lib/rsyntaxtree/string_parser.rb', line 133

def count_node(name)
  name = name.strip
  if @tncnt[name]
    @tncnt[name] += 1
  else
    @tncnt[name] = 1
  end
end

#get_elementlistObject



102
103
104
# File 'lib/rsyntaxtree/string_parser.rb', line 102

def get_elementlist
  @elist;
end

#get_next_tokenObject



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/rsyntaxtree/string_parser.rb', line 142

def get_next_token
  data = @data.split(//)
  gottoken = false
  token = ""
  i = 0

  if((@pos + 1) >= data.length)
    return ""
  end
  
  while(((@pos + i) < data.length) && !gottoken)
    ch = data[@pos + i];
    case ch
    when "["
      if(i > 0)
        gottoken = true
      else
        token += ch
      end
    when "]"
      if(i == 0 )
        token += ch
      end
      gottoken = true
    when /[\n\r]/
      gottoken = false # same as do nothing  
    else
      token += ch
    end
    i += 1
  end

  if(i > 1)
    @pos += (i - 1)
  else
    @pos += 1
  end
  return token
end

#make_tree(parent) ⇒ Object



182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/rsyntaxtree/string_parser.rb', line 182

def make_tree(parent)
  token = get_next_token.strip
  parts = Array.new
  
  while(token != "" && token != "]" )
    token_r = token.split(//)
    case token_r[0]
    when "["
      tl = token_r.length
      token_r = token_r[1, tl - 1]
      spaceat = token_r.index(" ")
      newparent  = -1

      if spaceat
        parts[0] = token_r[0, spaceat].join
        tl =token_r.length
        parts[1] = token_r[spaceat, tl - spaceat].join
        element = Element.new(@id, parent, parts[0], @level)
        @id += 1
        @elist.add(element)
        newparent = element.id
        count_node(parts[0])
        
        element = Element.new(@id, @id - 1, parts[1], @level + 1 )
        @id += 1          
        @elist.add(element)
      else
        element = Element.new(@id, parent, token_r.join, @level)
        @id += 1          
        newparent = element.id
        @elist.add(element)
        count_node(token_r.join)
      end 

      @level += 1
      make_tree(newparent)

    else
      if token.strip != ""
        element = Element.new(@id, parent, token, @level)
        @id += 1          
        @elist.add(element)
        count_node(token)
      end
    end
     
    token = get_next_token
  end
  @level -= 1
end

#parseObject



98
99
100
# File 'lib/rsyntaxtree/string_parser.rb', line 98

def parse
  make_tree(0);
end

#valid?Boolean

caution: quick and dirty solution

Returns:

  • (Boolean)


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/rsyntaxtree/string_parser.rb', line 66

def valid?
  if(@data.length < 1)
    return false
  end
  if /\A\s*\[.+ .+\]\s*\z/ !~ @data
    return false  
  end    

  text = @data.strip
  text_r = text.split(//)
  open_br, close_br = [], []
  text_r.each do |chr|
    if chr == '['
      open_br.push(chr)
    elsif chr == ']'
      close_br.push(chr)
      if open_br.length < close_br.length
        break
      end
    end
  end

  return false unless open_br.length == close_br.length
  make_tree(0)
  return false if @tncnt.empty?
  @tncnt.each do |key, value|
    return false if key == ""
  end
  return true
end