Class: PragmaticSegmenter::Segmenter

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_segmenter/segmenter.rb

Overview

This class segments a text into an array of sentences.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text:, language: 'en', doc_type: nil, clean: true) ⇒ Segmenter

Returns a new instance of Segmenter.



11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/pragmatic_segmenter/segmenter.rb', line 11

def initialize(text:, language: 'en', doc_type: nil, clean: true)
  return unless text
  @language = language
  @language_module = Languages.get_language_by_code(language)
  @doc_type = doc_type

  if clean
    @text = cleaner.new(text: text, doc_type: @doc_type, language: @language_module).clean
  else
    @text = text
  end
end

Instance Attribute Details

#doc_typeObject (readonly)

Returns the value of attribute doc_type.



9
10
11
# File 'lib/pragmatic_segmenter/segmenter.rb', line 9

def doc_type
  @doc_type
end

#languageObject (readonly)

Returns the value of attribute language.



9
10
11
# File 'lib/pragmatic_segmenter/segmenter.rb', line 9

def language
  @language
end

#textObject (readonly)

Returns the value of attribute text.



9
10
11
# File 'lib/pragmatic_segmenter/segmenter.rb', line 9

def text
  @text
end

Instance Method Details

#segmentObject



24
25
26
27
# File 'lib/pragmatic_segmenter/segmenter.rb', line 24

def segment
  return [] unless @text
  processor.new(language: @language_module).process(text: @text)
end