Module: StanfordCoreNLP

Extended by:
BindIt::Binding, Bridge
Defined in:
lib/stanford-core-nlp.rb,
lib/stanford-core-nlp/config.rb,
lib/stanford-core-nlp/version.rb

Defined Under Namespace

Modules: Bridge Classes: Config

Constant Summary collapse

VERSION =
'3.5.0.alpha'

Class Attribute Summary collapse

Class Method Summary collapse

Methods included from Bridge

inject_get_method

Class Attribute Details

.custom_propertiesObject

Custom properties



64
65
66
# File 'lib/stanford-core-nlp.rb', line 64

def custom_properties
  @custom_properties
end

.languageObject

Store the language currently being used.



62
63
64
# File 'lib/stanford-core-nlp.rb', line 62

def language
  @language
end

.model_filesObject

The model file names for a given language.



58
59
60
# File 'lib/stanford-core-nlp.rb', line 58

def model_files
  @model_files
end

.model_pathObject

The folder in which to look for models.



60
61
62
# File 'lib/stanford-core-nlp.rb', line 60

def model_path
  @model_path
end

Class Method Details

.bindObject

########################### #

Public API methods       #

########################### #



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/stanford-core-nlp.rb', line 115

def self.bind

  # Take care of Windows users.
  if self.running_on_windows?
    self.jar_path.gsub!('/', '\\')
    self.model_path.gsub!('/', '\\')
  end

  # Make the bindings.
  super

  # Bind annotation bridge.
  self.default_classes.each do |info|
    klass = const_get(info.first)
    self.inject_get_method(klass)
  end

end

.const_missing(const) ⇒ Object

Hack in order not to break backwards compatibility.



192
193
194
195
196
197
198
199
200
# File 'lib/stanford-core-nlp.rb', line 192

def self.const_missing(const)
  if const == :Text
    puts "WARNING: StanfordCoreNLP::Text has been deprecated." +
    "Please use StanfordCoreNLP::Annotation instead."
    Annotation
  else
    super(const)
  end
end

.load(*annotators) ⇒ Object

Load a StanfordCoreNLP pipeline with the specified JVM flags and StanfordCoreNLP properties.



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/stanford-core-nlp.rb', line 137

def self.load(*annotators)

  self.bind unless self.bound

  # Prepend the JAR path to the model files.
  properties = {}
  self.model_files.each do |k,v|
    found = false
    annotators.each do |annotator|
      found = true if k.index(annotator.to_s)
      break if found
    end
    next unless found
    f = self.model_path + v
    unless File.readable?(f)
      raise "Model file #{f} could not be found. " +
      "You may need to download this file manually " +
      "and/or set paths properly."
    end
    properties[k] = f
  end

  properties['annotators'] = annotators.map { |x| x.to_s }.join(', ')

  unless self.language == :english
    # Bug fix for French/German parsers.
    # Otherwise throws "IllegalArgumentException:
    # Unknown option: -retainTmpSubcategories"
    properties['parse.flags'] = ''
    # Bug fix for French/German parsers.
    # Otherswise throws java.lang.NullPointerException: null.
    properties['parse.buildgraphs'] = 'false'
  end

  # Bug fix for NER system. Otherwise throws:
  # Error initializing binder 1 at edu.stanford.
  # nlp.time.Options.<init>(Options.java:88)
  properties['sutime.binders'] = '0'

  # Manually include SUTime models.
  if annotators.include?(:ner)
    properties['sutime.rules'] =
    self.model_path + 'sutime/defs.sutime.txt, ' +
    self.model_path + 'sutime/english.sutime.txt'
  end

  props = get_properties(properties)

  # Hack for Java7 compatibility.
  bridge = const_get(:AnnotationBridge)
  bridge.getPipelineWithProperties(props)

end

.set_model(name, file) ⇒ Object

Set a model file.



106
107
108
109
# File 'lib/stanford-core-nlp.rb', line 106

def self.set_model(name, file)
  n = name.split('.')[0].intern
  self.model_files[name] = Config::ModelFolders[n] + file
end

.use(language) ⇒ Object

Use models for a given language. Language can be supplied as full-length, or ISO-639 2 or 3 letter code (e.g. :english, :eng or :en will work).



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/stanford-core-nlp.rb', line 81

def self.use(language)
  lang = nil
  self.model_files = {}
  Config::LanguageCodes.each do |l,codes|
    lang = codes[2] if codes.include?(language)
  end
  self.language = lang
  Config::Models.each do |n, languages|
    models = languages[lang]
    folder = Config::ModelFolders[n]
    if models.is_a?(Hash)
      n = n.to_s
      models.each do |m, file|
        self.model_files["#{n}.#{m}"] = folder + file
      end
    elsif models.is_a?(String)
      self.model_files["#{n}.model"] = folder + models
    end
  end
end