Class: Excite::Token

Inherits:
Object
  • Object
show all
Defined in:
lib/excite/crfparser.rb

Constant Summary collapse

BR_CHAR =
"\a"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str, part_of_speech = nil) ⇒ Token

Returns a new instance of Token.



290
291
292
293
# File 'lib/excite/crfparser.rb', line 290

def initialize(str, part_of_speech=nil)
  @str = str
  @part_of_speech = part_of_speech
end

Instance Attribute Details

#idx_in_nodeObject (readonly)

Returns the value of attribute idx_in_node.



287
288
289
# File 'lib/excite/crfparser.rb', line 287

def idx_in_node
  @idx_in_node
end

#labelObject

Returns the value of attribute label.



288
289
290
# File 'lib/excite/crfparser.rb', line 288

def label
  @label
end

#nodeObject (readonly)

Returns the value of attribute node.



287
288
289
# File 'lib/excite/crfparser.rb', line 287

def node
  @node
end

#node_token_countObject (readonly)

Returns the value of attribute node_token_count.



287
288
289
# File 'lib/excite/crfparser.rb', line 287

def node_token_count
  @node_token_count
end

#part_of_speechObject (readonly)

Returns the value of attribute part_of_speech.



287
288
289
# File 'lib/excite/crfparser.rb', line 287

def part_of_speech
  @part_of_speech
end

Class Method Details

.for_br(node) ⇒ Object



297
298
299
# File 'lib/excite/crfparser.rb', line 297

def self.for_br(node)
  new(BR_CHAR,'br').is_in_node!(node, 0, 1)
end

Instance Method Details

#empty?Boolean

Returns:

  • (Boolean)


320
321
322
# File 'lib/excite/crfparser.rb', line 320

def empty?
  raw.strip.blank?
end

#for_join(prev) ⇒ Object



328
329
330
331
332
333
334
335
336
337
338
# File 'lib/excite/crfparser.rb', line 328

def for_join(prev)
  if ['pp','ppc','ppr','pps','rrb', 'pos'].include?(part_of_speech)
    raw
  elsif prev && ['ppd','ppl','lrb'].include?(prev.part_of_speech)
    raw
  elsif 'br' == part_of_speech
    " "
  else
    " "+raw
  end
end

#is_in_node!(node, idx_in_node, node_token_count) ⇒ Object



301
302
303
304
305
306
# File 'lib/excite/crfparser.rb', line 301

def is_in_node!(node, idx_in_node, node_token_count)
  @node = node
  @idx_in_node = idx_in_node
  @node_token_count = node_token_count
  self
end

#lcnpObject



316
317
318
# File 'lib/excite/crfparser.rb', line 316

def lcnp
  @lcnp ||= np.downcase
end

#npObject



312
313
314
# File 'lib/excite/crfparser.rb', line 312

def np
  @np ||= CRFParser.strip_punct(@str)
end

#rawObject



308
309
310
# File 'lib/excite/crfparser.rb', line 308

def raw
  @str
end

#to_sObject



324
325
326
# File 'lib/excite/crfparser.rb', line 324

def to_s
  "{#{raw}}"
end