Module: ClassifierReborn::Summarizer

Defined in:
lib/classifier-reborn/lsi/summarizer.rb

Class Method Summary collapse

Class Method Details

.paragraph_summary(str, count = 1, separator = ' [...] ') ⇒ Object



15
16
17
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 15

def paragraph_summary(str, count = 1, separator = ' [...] ')
  perform_lsi split_paragraphs(str), count, separator
end

.perform_lsi(chunks, count, separator) ⇒ Object



27
28
29
30
31
32
33
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 27

def perform_lsi(chunks, count, separator)
  lsi = ClassifierReborn::LSI.new auto_rebuild: false
  chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
  lsi.build_index
  summaries = lsi.highest_relative_content count
  summaries.select { |chunk| summaries.include? chunk }.map(&:strip).join(separator)
end

.split_paragraphs(str) ⇒ Object



23
24
25
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 23

def split_paragraphs(str)
  str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive
end

.split_sentences(str) ⇒ Object



19
20
21
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 19

def split_sentences(str)
  str.split(/(\.|\!|\?)/) # TODO: make this less primitive
end

.summary(str, count = 10, separator = ' [...] ') ⇒ Object



11
12
13
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 11

def summary(str, count = 10, separator = ' [...] ')
  perform_lsi split_sentences(str), count, separator
end