Class: Opener::POSTaggers::EnEs
- Inherits:
-
Object
- Object
- Opener::POSTaggers::EnEs
- Defined in:
- lib/opener/pos_taggers/en_es/en_es.rb,
lib/opener/pos_taggers/en_es/version.rb
Overview
Base POS tagger class for the various language specific ones such as OpeneR::POSTaggers::FR.
Constant Summary collapse
- DEFAULT_OPTIONS =
The default options to use.
{ :enable_time => true }
- VERSION =
"2.0.3"
Instance Attribute Summary collapse
- #args ⇒ Array readonly
- #options ⇒ Hash readonly
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ EnEs
constructor
A new instance of EnEs.
-
#run(input) ⇒ Array
Runs the command and returns the resulting KAF document.
Constructor Details
#initialize(options = {}) ⇒ EnEs
Returns a new instance of EnEs.
33 34 35 36 |
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 33 def initialize( = {}) @args = .delete(:args) || [] @options = DEFAULT_OPTIONS.merge() end |
Instance Attribute Details
#args ⇒ Array (readonly)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 13 class EnEs attr_reader :args, :options ## # The default options to use. # # @return [Hash] # DEFAULT_OPTIONS = { :enable_time => true } ## # @param [Hash] options # # @option options [Array] :args # # @option options [TrueClass|FalseClass] :enable_time When set to `true` # (default) dynamic timestamps will be added. # def initialize( = {}) @args = .delete(:args) || [] @options = DEFAULT_OPTIONS.merge() end ## # Runs the command and returns the resulting KAF document. # # @param [String] input The input to tag. # @return [Array] # def run(input) language = language_from_kaf(input) input = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input.to_inputstream) kaf = Java::ixa.kaflib.KAFDocument.create_from_stream(reader) annotator = new_annotator(language) annotator.annotatePOSToKAF(kaf, lemmatizer(language), language) return kaf.to_string end protected ## # Creates and configures a new annotator instance. # # @param [String] language # @return [Java::ehy.pos.Annotate] # def new_annotator(language) annotator = Java::ehu.pos.Annotate.new(language) annotator.disableTimestamp unless [:enable_time] return annotator end ## # Returns the lemmatizer to use. # # @param [String] language # def lemmatizer(language) return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language) end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#options ⇒ Hash (readonly)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 13 class EnEs attr_reader :args, :options ## # The default options to use. # # @return [Hash] # DEFAULT_OPTIONS = { :enable_time => true } ## # @param [Hash] options # # @option options [Array] :args # # @option options [TrueClass|FalseClass] :enable_time When set to `true` # (default) dynamic timestamps will be added. # def initialize( = {}) @args = .delete(:args) || [] @options = DEFAULT_OPTIONS.merge() end ## # Runs the command and returns the resulting KAF document. # # @param [String] input The input to tag. # @return [Array] # def run(input) language = language_from_kaf(input) input = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input.to_inputstream) kaf = Java::ixa.kaflib.KAFDocument.create_from_stream(reader) annotator = new_annotator(language) annotator.annotatePOSToKAF(kaf, lemmatizer(language), language) return kaf.to_string end protected ## # Creates and configures a new annotator instance. # # @param [String] language # @return [Java::ehy.pos.Annotate] # def new_annotator(language) annotator = Java::ehu.pos.Annotate.new(language) annotator.disableTimestamp unless [:enable_time] return annotator end ## # Returns the lemmatizer to use. # # @param [String] language # def lemmatizer(language) return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language) end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
Instance Method Details
#run(input) ⇒ Array
Runs the command and returns the resulting KAF document.
44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 44 def run(input) language = language_from_kaf(input) input = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input.to_inputstream) kaf = Java::ixa.kaflib.KAFDocument.create_from_stream(reader) annotator = new_annotator(language) annotator.annotatePOSToKAF(kaf, lemmatizer(language), language) return kaf.to_string end |