Class: Opener::POSTaggers::EnEs

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/pos_taggers/en_es/en_es.rb,
lib/opener/pos_taggers/en_es/version.rb

Overview

Base POS tagger class for the various language specific ones such as OpeneR::POSTaggers::FR.

Direct Known Subclasses

EN, ES, FR, IT, NL

Constant Summary collapse

DEFAULT_OPTIONS =

The default options to use.

Returns:

  • (Hash)
{
  :enable_time => true
}
VERSION =
"2.0.3"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ EnEs

Returns a new instance of EnEs.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :args (Array)
  • :enable_time (TrueClass|FalseClass)

    When set to ‘true` (default) dynamic timestamps will be added.



33
34
35
36
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 33

def initialize(options = {})
  @args    = options.delete(:args) || []
  @options = DEFAULT_OPTIONS.merge(options)
end

Instance Attribute Details

#argsArray (readonly)

Returns:

  • (Array)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 13

class EnEs
  attr_reader :args, :options

  ##
  # The default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :enable_time => true
  }

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args
  #
  # @option options [TrueClass|FalseClass] :enable_time When set to `true`
  #  (default) dynamic timestamps will be added.
  #
  def initialize(options = {})
    @args    = options.delete(:args) || []
    @options = DEFAULT_OPTIONS.merge(options)
  end

  ##
  # Runs the command and returns the resulting KAF document.
  #
  # @param [String] input The input to tag.
  # @return [Array]
  #
  def run(input)
    language = language_from_kaf(input)
    input    = StringIO.new(input)

    reader    = Java::java.io.InputStreamReader.new(input.to_inputstream)
    kaf       = Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
    annotator = new_annotator(language)

    annotator.annotatePOSToKAF(kaf, lemmatizer(language), language)

    return kaf.to_string
  end

  protected

  ##
  # Creates and configures a new annotator instance.
  #
  # @param [String] language
  # @return [Java::ehy.pos.Annotate]
  #
  def new_annotator(language)
    annotator = Java::ehu.pos.Annotate.new(language)

    annotator.disableTimestamp unless options[:enable_time]

    return annotator
  end

  ##
  # Returns the lemmatizer to use.
  #
  # @param [String] language
  #
  def lemmatizer(language)
    return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language)
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#optionsHash (readonly)

Returns:

  • (Hash)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 13

class EnEs
  attr_reader :args, :options

  ##
  # The default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :enable_time => true
  }

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args
  #
  # @option options [TrueClass|FalseClass] :enable_time When set to `true`
  #  (default) dynamic timestamps will be added.
  #
  def initialize(options = {})
    @args    = options.delete(:args) || []
    @options = DEFAULT_OPTIONS.merge(options)
  end

  ##
  # Runs the command and returns the resulting KAF document.
  #
  # @param [String] input The input to tag.
  # @return [Array]
  #
  def run(input)
    language = language_from_kaf(input)
    input    = StringIO.new(input)

    reader    = Java::java.io.InputStreamReader.new(input.to_inputstream)
    kaf       = Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
    annotator = new_annotator(language)

    annotator.annotatePOSToKAF(kaf, lemmatizer(language), language)

    return kaf.to_string
  end

  protected

  ##
  # Creates and configures a new annotator instance.
  #
  # @param [String] language
  # @return [Java::ehy.pos.Annotate]
  #
  def new_annotator(language)
    annotator = Java::ehu.pos.Annotate.new(language)

    annotator.disableTimestamp unless options[:enable_time]

    return annotator
  end

  ##
  # Returns the lemmatizer to use.
  #
  # @param [String] language
  #
  def lemmatizer(language)
    return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language)
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

Instance Method Details

#run(input) ⇒ Array

Runs the command and returns the resulting KAF document.

Parameters:

  • input (String)

    The input to tag.

Returns:

  • (Array)


44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/opener/pos_taggers/en_es/en_es.rb', line 44

def run(input)
  language = language_from_kaf(input)
  input    = StringIO.new(input)

  reader    = Java::java.io.InputStreamReader.new(input.to_inputstream)
  kaf       = Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  annotator = new_annotator(language)

  annotator.annotatePOSToKAF(kaf, lemmatizer(language), language)

  return kaf.to_string
end