Module: Shiner
- Defined in:
- lib/shiner.rb,
lib/shiner/version.rb
Constant Summary collapse
- VERSION =
"0.0.6"
Class Method Summary collapse
- .classifier ⇒ Object
- .shine(string, options = {:max_length => 188}) ⇒ Object
- .string_to_best_sentences(string, options = {}) ⇒ Object
- .string_to_scored_sentences(string) ⇒ Object
- .string_to_sentences(string) ⇒ Object
Class Method Details
.classifier ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/shiner.rb', line 42 def self.classifier @classifier ||= ( b = Classifier::Bayes.new 'Interesting', 'Uninteresting' File.read(File.dirname(__FILE__) + '/../data/interesting.txt').split("\n").each { |i| b.train_interesting i } File.read(File.dirname(__FILE__) + '/../data/uninteresting.txt').split("\n").each { |i| b.train_uninteresting i } b ) end |
.shine(string, options = {:max_length => 188}) ⇒ Object
11 12 13 14 |
# File 'lib/shiner.rb', line 11 def self.shine(string, ={:max_length => 188}) best = string_to_best_sentences(string, ) best[:sentences].collect{|sentence| sentence[:sentence]}.join(' ') if best end |
.string_to_best_sentences(string, options = {}) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/shiner.rb', line 16 def self.string_to_best_sentences(string, ={}) sentences = string_to_scored_sentences(string) batches = [] sentences.each_index{|index| batch={:sentences => sentences[index, [:max_sentences] || sentences.size]} next if [:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length < [:max_length] && batches.size > 0 #don't take last few sentences if they do not meet max_length next if [:max_sentences] && batch[:sentences].size < [:max_sentences] && batches.size > 0 #don't take last few sentences if they do not meet max_sentences batch[:sentences].pop while [:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length > [:max_length] next if batch[:sentences].empty? batch[:score] = batch[:sentences].collect{|sentence| sentence[:score]}.sum.to_f / batch[:sentences].size batches << batch } best = batches.sort_by{|batch| batch[:score]}.last end |
.string_to_scored_sentences(string) ⇒ Object
31 32 33 34 35 36 37 38 39 40 |
# File 'lib/shiner.rb', line 31 def self.string_to_scored_sentences(string) sentences=[] string_to_sentences(string).each{ |sentence| classifications = classifier.classifications(sentence) sentences << {:sentence => sentence, :classifications => classifications, :score => 1 - classifications['Interesting'] / classifications['Uninteresting'] } } #sentences = sentences.sort_by{|sentence| sentence[:score] } sentences end |
.string_to_sentences(string) ⇒ Object
55 56 57 58 |
# File 'lib/shiner.rb', line 55 def self.string_to_sentences(string) @tactful_tokenizer ||= TactfulTokenizer::Model.new @tactful_tokenizer.tokenize_text(string) end |