Module: ClassifierReborn::Summarizer
- Defined in:
- lib/classifier-reborn/lsi/summarizer.rb
Class Method Summary collapse
- .paragraph_summary(str, count = 1, separator = ' [...] ') ⇒ Object
- .perform_lsi(chunks, count, separator) ⇒ Object
- .split_paragraphs(str) ⇒ Object
- .split_sentences(str) ⇒ Object
- .summary(str, count = 10, separator = ' [...] ') ⇒ Object
Class Method Details
.paragraph_summary(str, count = 1, separator = ' [...] ') ⇒ Object
15 16 17 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 15 def paragraph_summary(str, count = 1, separator = ' [...] ') perform_lsi split_paragraphs(str), count, separator end |
.perform_lsi(chunks, count, separator) ⇒ Object
27 28 29 30 31 32 33 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 27 def perform_lsi(chunks, count, separator) lsi = ClassifierReborn::LSI.new auto_rebuild: false chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 } lsi.build_index summaries = lsi.highest_relative_content count summaries.select { |chunk| summaries.include? chunk }.map(&:strip).join(separator) end |
.split_paragraphs(str) ⇒ Object
23 24 25 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 23 def split_paragraphs(str) str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive end |
.split_sentences(str) ⇒ Object
19 20 21 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 19 def split_sentences(str) str.split(/(\.|\!|\?)/) # TODO: make this less primitive end |
.summary(str, count = 10, separator = ' [...] ') ⇒ Object
11 12 13 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 11 def summary(str, count = 10, separator = ' [...] ') perform_lsi split_sentences(str), count, separator end |