Class: Fuzzy::Scorer
- Inherits:
-
Object
- Object
- Fuzzy::Scorer
- Defined in:
- lib/fuzzy.rb
Defined Under Namespace
Instance Method Summary collapse
-
#initialize(corpus) ⇒ Scorer
constructor
A new instance of Scorer.
- #normalized_tokens ⇒ Object
- #rank(query) ⇒ Object
- #tokenize ⇒ Object
- #tokens ⇒ Object
Constructor Details
#initialize(corpus) ⇒ Scorer
Returns a new instance of Scorer.
23 24 25 26 27 28 |
# File 'lib/fuzzy.rb', line 23 def initialize corpus corpus = (corpus || []).reject{|c| c[:weight].blank? or c[:terms].blank?} @corpus = corpus.map{|c| TermSet.new(c[:weight], c[:terms].reject{|t| t.blank?})} @total_weight = @corpus.sum {|c| c.weight} @weighted_tokens = @corpus.flat_map{ |c| c.tokens @total_weight } end |
Instance Method Details
#normalized_tokens ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/fuzzy.rb', line 50 def normalized_tokens basic_tokens = tokens max = basic_tokens.max_by(&:weight).weight min = basic_tokens.min_by(&:weight).weight # Calculate m and c values for the linear transform y=mx+c # m = (y' - y)/(x' - x) m = (1 - 0).fdiv(max - min) # Substituting the max values in, we get 1 = m(max) + c c = 1 - (m * max) basic_tokens.map{|t| Token.new(t.token, (t.weight*m + c))} end |
#rank(query) ⇒ Object
30 31 32 33 34 35 36 37 38 |
# File 'lib/fuzzy.rb', line 30 def rank query scores = @weighted_tokens.map do |wt| length_score = wt.token.starts_with?(query) ? query.length.fdiv(wt.token.length) : 0 length_score * wt.weight end score_count = scores.count{|s| s > 0} return 0 unless score_count > 0 scores.sum / score_count end |
#tokenize ⇒ Object
40 41 42 43 44 |
# File 'lib/fuzzy.rb', line 40 def tokenize @corpus.flat_map{|c| c.cleaned_terms.to_a}.flat_map do |str| (1..str.length).map { |len| str.slice(0, len) } end.to_set end |