Class: AnyStyle::ParserCore

Inherits:
Object
  • Object
show all
Includes:
StringUtils
Defined in:
lib/anystyle/parser.rb

Direct Known Subclasses

Finder, Parser

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from StringUtils

canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate

Constructor Details

#initialize(options = {}) ⇒ ParserCore

Returns a new instance of ParserCore


20
21
22
23
# File 'lib/anystyle/parser.rb', line 20

def initialize(options = {})
  @options = self.class.defaults.merge(options)
  load_model
end

Class Attribute Details

.defaultsObject (readonly)

Returns the value of attribute defaults


6
7
8
# File 'lib/anystyle/parser.rb', line 6

def defaults
  @defaults
end

.formatsObject (readonly)

Returns the value of attribute formats


6
7
8
# File 'lib/anystyle/parser.rb', line 6

def formats
  @formats
end

Instance Attribute Details

#featuresObject (readonly)

Returns the value of attribute features


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def features
  @features
end

#modelObject (readonly)

Returns the value of attribute model


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def model
  @model
end

#normalizersObject (readonly)

Returns the value of attribute normalizers


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def normalizers
  @normalizers
end

#optionsObject (readonly)

Returns the value of attribute options


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def options
  @options
end

Class Method Details

.instanceObject

Returns a default parser instance


13
14
15
# File 'lib/anystyle/parser.rb', line 13

def instance
  Thread.current["anystyle_#{name.downcase}"] ||= new
end

.load(path) ⇒ Object


8
9
10
# File 'lib/anystyle/parser.rb', line 8

def load(path)
  new :model => path
end

Instance Method Details

#check(input) ⇒ Object


41
42
43
# File 'lib/anystyle/parser.rb', line 41

def check(input)
  model.check prepare(input, tagged: true)
end

#expand(dataset) ⇒ Object

Raises:

  • (NotImplementedError)

68
69
70
# File 'lib/anystyle/parser.rb', line 68

def expand(dataset)
  raise NotImplementedError
end

#label(input, **opts) ⇒ Object


37
38
39
# File 'lib/anystyle/parser.rb', line 37

def label(input, **opts)
  model.label prepare(input, **opts)
end

#learn(input) ⇒ Object


53
54
55
# File 'lib/anystyle/parser.rb', line 53

def learn(input)
  train(input, truncate: false)
end

#load_model(file = options[:model]) ⇒ Object


25
26
27
28
29
30
31
32
33
34
35
# File 'lib/anystyle/parser.rb', line 25

def load_model(file = options[:model])
  unless file.nil?
    @model = Wapiti.load(file)
    @model.options.update_attributes options
  else
    @model = Wapiti::Model.new(options.reject { |k,_| k == :model })
    @model.path = options[:model]
  end

  self
end

#normalize(hash, **opts) ⇒ Object


57
58
59
60
61
62
63
64
65
66
# File 'lib/anystyle/parser.rb', line 57

def normalize(hash, **opts)
  normalizers.each do |n|
    begin
      hash = n.normalize(hash, **opts) unless n.skip?
    rescue => e
      warn "Error in #{n.name} normalizer: #{e.message}"
    end
  end
  hash
end

#prepare(input, **opts) ⇒ Object


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/anystyle/parser.rb', line 72

def prepare(input, **opts)
  case input
  when Wapiti::Dataset
    expand input
  when Wapiti::Sequence
    expand Wapiti::Dataset.new([input])
  when String
    if !input.tainted? && input.length < 1024 && File.exists?(input)
      expand Wapiti::Dataset.open(input, opts)
    else
      expand Wapiti::Dataset.parse(input, opts)
    end
  else
    expand Wapiti::Dataset.parse(input, opts)
  end
end

#train(input = options[:training_data], truncate: true) ⇒ Object


45
46
47
48
49
50
51
# File 'lib/anystyle/parser.rb', line 45

def train(input = options[:training_data], truncate: true)
  load_model(nil) if truncate
  unless input.nil? || input.empty?
    model.train prepare(input, tagged: true)
  end
  model
end