Class: Opener::Tokenizer::CLI

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/tokenizer/cli.rb

Overview

CLI wrapper around Opener::Tokenizer using OptionParser.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ CLI

Returns a new instance of CLI.

Parameters:

  • options (Hash) (defaults to: {})


17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/opener/tokenizer/cli.rb', line 17

def initialize(options = {})
  @options = DEFAULT_OPTIONS.merge(options)

  @option_parser = OptionParser.new do |opts|
    opts.program_name   = 'tokenizer'
    opts.summary_indent = '  '

    opts.on('-h', '--help', 'Shows this help message') do
      show_help
    end

    opts.on('-v', '--version', 'Shows the current version') do
      show_version
    end

    opts.on(
      '-l',
      '--language [VALUE]',
      'Uses this specific language'
    ) do |value|
      @options[:language] = value
      @options[:kaf] = false
    end

    opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
      @options[:kaf] = true
    end

    opts.on('-p', '--plain', 'Treats the input as plain text') do
      @options[:kaf] = false
    end

    opts.separator <<-EOF

Examples:

  cat example.txt | #{opts.program_name} -l en # Manually specify the language
  cat example.kaf | #{opts.program_name}       # Uses the xml:lang attribute

Languages:

  * Dutch (nl)
  * English (en)
  * French (fr)
  * German (de)
  * Italian (it)
  * Spanish (es)

KAF Input:

  If you give a KAF file as an input (-k or --kaf) the language is taken from
  the xml:lang attribute inside the file. Else it expects that you give the
  language as an argument (-l or --language)

Sample KAF syntax:

  <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  <KAF version="v1.opener" xml:lang="en">
    <raw>This is some text.</raw>
  </KAF>
    EOF
  end
end

Instance Attribute Details

#option_parserOptionParser (readonly)

Returns:

  • (OptionParser)


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/opener/tokenizer/cli.rb', line 11

class CLI
  attr_reader :options, :option_parser

  ##
  # @param [Hash] options
  #
  def initialize(options = {})
    @options = DEFAULT_OPTIONS.merge(options)

    @option_parser = OptionParser.new do |opts|
      opts.program_name   = 'tokenizer'
      opts.summary_indent = '  '

      opts.on('-h', '--help', 'Shows this help message') do
        show_help
      end

      opts.on('-v', '--version', 'Shows the current version') do
        show_version
      end

      opts.on(
        '-l',
        '--language [VALUE]',
        'Uses this specific language'
      ) do |value|
        @options[:language] = value
        @options[:kaf] = false
      end

      opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
        @options[:kaf] = true
      end

      opts.on('-p', '--plain', 'Treats the input as plain text') do
        @options[:kaf] = false
      end

      opts.separator <<-EOF

Examples:

  cat example.txt | #{opts.program_name} -l en # Manually specify the language
  cat example.kaf | #{opts.program_name}       # Uses the xml:lang attribute

Languages:

  * Dutch (nl)
  * English (en)
  * French (fr)
  * German (de)
  * Italian (it)
  * Spanish (es)

KAF Input:

  If you give a KAF file as an input (-k or --kaf) the language is taken from
  the xml:lang attribute inside the file. Else it expects that you give the
  language as an argument (-l or --language)

Sample KAF syntax:

  <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  <KAF version="v1.opener" xml:lang="en">
<raw>This is some text.</raw>
  </KAF>
      EOF
    end
  end

  ##
  # @param [String] input
  #
  def run(input)
    option_parser.parse!(options[:args])

    tokenizer = Tokenizer.new(options)

    stdout, stderr, process = tokenizer.run(input)

    puts stdout
  end

  private

  ##
  # Shows the help message and exits the program.
  #
  def show_help
    abort option_parser.to_s
  end

  ##
  # Shows the version and exits the program.
  #
  def show_version
    abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
  end
end

#optionsHash (readonly)

Returns:

  • (Hash)


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/opener/tokenizer/cli.rb', line 11

class CLI
  attr_reader :options, :option_parser

  ##
  # @param [Hash] options
  #
  def initialize(options = {})
    @options = DEFAULT_OPTIONS.merge(options)

    @option_parser = OptionParser.new do |opts|
      opts.program_name   = 'tokenizer'
      opts.summary_indent = '  '

      opts.on('-h', '--help', 'Shows this help message') do
        show_help
      end

      opts.on('-v', '--version', 'Shows the current version') do
        show_version
      end

      opts.on(
        '-l',
        '--language [VALUE]',
        'Uses this specific language'
      ) do |value|
        @options[:language] = value
        @options[:kaf] = false
      end

      opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
        @options[:kaf] = true
      end

      opts.on('-p', '--plain', 'Treats the input as plain text') do
        @options[:kaf] = false
      end

      opts.separator <<-EOF

Examples:

  cat example.txt | #{opts.program_name} -l en # Manually specify the language
  cat example.kaf | #{opts.program_name}       # Uses the xml:lang attribute

Languages:

  * Dutch (nl)
  * English (en)
  * French (fr)
  * German (de)
  * Italian (it)
  * Spanish (es)

KAF Input:

  If you give a KAF file as an input (-k or --kaf) the language is taken from
  the xml:lang attribute inside the file. Else it expects that you give the
  language as an argument (-l or --language)

Sample KAF syntax:

  <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  <KAF version="v1.opener" xml:lang="en">
<raw>This is some text.</raw>
  </KAF>
      EOF
    end
  end

  ##
  # @param [String] input
  #
  def run(input)
    option_parser.parse!(options[:args])

    tokenizer = Tokenizer.new(options)

    stdout, stderr, process = tokenizer.run(input)

    puts stdout
  end

  private

  ##
  # Shows the help message and exits the program.
  #
  def show_help
    abort option_parser.to_s
  end

  ##
  # Shows the version and exits the program.
  #
  def show_version
    abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
  end
end

Instance Method Details

#run(input) ⇒ Object

Parameters:

  • input (String)


84
85
86
87
88
89
90
91
92
# File 'lib/opener/tokenizer/cli.rb', line 84

def run(input)
  option_parser.parse!(options[:args])

  tokenizer = Tokenizer.new(options)

  stdout, stderr, process = tokenizer.run(input)

  puts stdout
end