Class: AnyStyle::ParserCore

Inherits:
Object
  • Object
show all
Includes:
StringUtils
Defined in:
lib/anystyle/parser.rb

Direct Known Subclasses

Finder, Parser

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from StringUtils

canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate

Constructor Details

#initialize(options = {}) ⇒ ParserCore


20
21
22
23
# File 'lib/anystyle/parser.rb', line 20

def initialize(options = {})
  @options = self.class.defaults.merge(options)
  load_model
end

Class Attribute Details

.defaultsObject (readonly)

Returns the value of attribute defaults


6
7
8
# File 'lib/anystyle/parser.rb', line 6

def defaults
  @defaults
end

.formatsObject (readonly)

Returns the value of attribute formats


6
7
8
# File 'lib/anystyle/parser.rb', line 6

def formats
  @formats
end

Instance Attribute Details

#featuresObject (readonly)

Returns the value of attribute features


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def features
  @features
end

#modelObject (readonly)

Returns the value of attribute model


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def model
  @model
end

#mtimeObject (readonly)

Returns the value of attribute mtime


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def mtime
  @mtime
end

#normalizersObject (readonly)

Returns the value of attribute normalizers


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def normalizers
  @normalizers
end

#optionsObject (readonly)

Returns the value of attribute options


18
19
20
# File 'lib/anystyle/parser.rb', line 18

def options
  @options
end

Class Method Details

.instanceObject

Returns a default parser instance


13
14
15
# File 'lib/anystyle/parser.rb', line 13

def instance
  Thread.current["anystyle_#{name.downcase}"] ||= new
end

.load(path) ⇒ Object


8
9
10
# File 'lib/anystyle/parser.rb', line 8

def load(path)
  new :model => path
end

Instance Method Details

#check(input) ⇒ Object


51
52
53
# File 'lib/anystyle/parser.rb', line 51

def check(input)
  model.check prepare(input, tagged: true)
end

#expand(dataset) ⇒ Object

Raises:

  • (NotImplementedError)

78
79
80
# File 'lib/anystyle/parser.rb', line 78

def expand(dataset)
  raise NotImplementedError
end

#label(input, **opts) ⇒ Object


47
48
49
# File 'lib/anystyle/parser.rb', line 47

def label(input, **opts)
  model.label prepare(input, **opts)
end

#learn(input) ⇒ Object


63
64
65
# File 'lib/anystyle/parser.rb', line 63

def learn(input)
  train(input, truncate: false)
end

#load_model(file = options[:model]) ⇒ Object


25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/anystyle/parser.rb', line 25

def load_model(file = options[:model])
  unless file.nil?
    @model = Wapiti.load(file)
    @model.options.update_attributes options
    @mtime = File.mtime(file)
  else
    @model = Wapiti::Model.new(options.reject { |k,_| k == :model })
    @model.path = options[:model]
    @mtime = Time.now
  end

  self
end

#normalize(hash, **opts) ⇒ Object


67
68
69
70
71
72
73
74
75
76
# File 'lib/anystyle/parser.rb', line 67

def normalize(hash, **opts)
  normalizers.each do |n|
    begin
      hash = n.normalize(hash, **opts) unless n.skip?
    rescue => e
      warn "Error in #{n.name} normalizer: #{e.message}"
    end
  end
  hash
end

#prepare(input, **opts) ⇒ Object


82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/anystyle/parser.rb', line 82

def prepare(input, **opts)
  case input
  when Wapiti::Dataset
    expand input
  when Wapiti::Sequence
    expand Wapiti::Dataset.new([input])
  when String
    if !input.tainted? && input.length < 1024 && File.exists?(input)
      expand Wapiti::Dataset.open(input, **opts)
    else
      expand Wapiti::Dataset.parse(input, **opts)
    end
  else
    expand Wapiti::Dataset.parse(input, **opts)
  end
end

#reloadObject


39
40
41
# File 'lib/anystyle/parser.rb', line 39

def reload
  load_model(model.path)
end

#stale?Boolean


43
44
45
# File 'lib/anystyle/parser.rb', line 43

def stale?
  File.exist?(model.path) && File.mtime(model.path) > mtime
end

#train(input = options[:training_data], truncate: true) ⇒ Object


55
56
57
58
59
60
61
# File 'lib/anystyle/parser.rb', line 55

def train(input = options[:training_data], truncate: true)
  load_model(nil) if truncate
  unless input.nil? || input.empty?
    model.train prepare(input, tagged: true)
  end
  model
end