Module: Shiner

Defined in:
lib/shiner.rb,
lib/shiner/version.rb

Constant Summary collapse

VERSION =
"0.0.6"

Class Method Summary collapse

Class Method Details

.classifierObject



42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/shiner.rb', line 42

def self.classifier
  @classifier ||= (
      b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
      File.read(File.dirname(__FILE__) + '/../data/interesting.txt').split("\n").each { |i|
        b.train_interesting i
      }
      File.read(File.dirname(__FILE__) + '/../data/uninteresting.txt').split("\n").each { |i|
        b.train_uninteresting i
      }
      b
    )
end

.shine(string, options = {:max_length => 188}) ⇒ Object



11
12
13
14
# File 'lib/shiner.rb', line 11

def self.shine(string, options={:max_length => 188})
  best = string_to_best_sentences(string, options)
  best[:sentences].collect{|sentence| sentence[:sentence]}.join(' ') if best
end

.string_to_best_sentences(string, options = {}) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/shiner.rb', line 16

def self.string_to_best_sentences(string, options={})
  sentences = string_to_scored_sentences(string)
  batches = []
  sentences.each_index{|index|
    batch={:sentences => sentences[index, options[:max_sentences] || sentences.size]}
    next if options[:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length < options[:max_length] && batches.size > 0 #don't take last few sentences if they do not meet max_length
    next if options[:max_sentences] && batch[:sentences].size < options[:max_sentences] && batches.size > 0 #don't take last few sentences if they do not meet max_sentences
    batch[:sentences].pop while options[:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length > options[:max_length]
    next if batch[:sentences].empty?
    batch[:score] = batch[:sentences].collect{|sentence| sentence[:score]}.sum.to_f / batch[:sentences].size
    batches << batch
  }
  best = batches.sort_by{|batch| batch[:score]}.last
end

.string_to_scored_sentences(string) ⇒ Object



31
32
33
34
35
36
37
38
39
40
# File 'lib/shiner.rb', line 31

def self.string_to_scored_sentences(string)
  sentences=[]
  string_to_sentences(string).each{ |sentence|
    classifications = classifier.classifications(sentence)
    sentences << {:sentence => sentence, :classifications => classifications, 
      :score => 1 - classifications['Interesting'] / classifications['Uninteresting'] }
  }
  #sentences = sentences.sort_by{|sentence| sentence[:score] }
  sentences
end

.string_to_sentences(string) ⇒ Object



55
56
57
58
# File 'lib/shiner.rb', line 55

def self.string_to_sentences(string)
  @tactful_tokenizer ||= TactfulTokenizer::Model.new
  @tactful_tokenizer.tokenize_text(string)
end