Module: FinModeling::HasStringClassifier

Included in:
AssetsItem, CashChangeItem, ComprehensiveIncomeStatementItem, EquityChangeItem, IncomeStatementItem, LiabsAndEquityItem
Defined in:
lib/finmodeling/has_string_classifer.rb

Defined Under Namespace

Modules: ClassMethods

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.included(base) ⇒ Object



54
55
56
# File 'lib/finmodeling/has_string_classifer.rb', line 54

def self.included(base)
  base.extend(ClassMethods)
end

Instance Method Details

#classification_estimatesObject



67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/finmodeling/has_string_classifer.rb', line 67

def classification_estimates
  tokens = tokenize

  estimates = {}
  self.class.klasses.each do |cur_klass|
    ret = self.class.classifiers[cur_klass].classify(*tokens)
    result = {:klass=>ret[0], :confidence=>ret[1]}
    estimates[cur_klass] = (result[:klass] == :yes) ? result[:confidence] : -result[:confidence]
  end

  return estimates
end

#classifyObject



80
81
82
83
84
# File 'lib/finmodeling/has_string_classifer.rb', line 80

def classify
  estimates = classification_estimates
  best_guess_klass = estimates.keys.sort{ |x,y| estimates[x] <=> estimates[y] }.last
  return best_guess_klass
end

#tokenizeObject



86
87
88
89
90
91
92
93
# File 'lib/finmodeling/has_string_classifer.rb', line 86

def tokenize
  words = ["^"] + self.downcase.split(" ") + ["$"]

  tokens = [1, 2, 3].collect do |words_per_token|
    words.each_cons(words_per_token).to_a.map{|x| x.join(" ") }
  end
  return tokens.flatten
end

#train(expected_klass) ⇒ Object

Raises:

  • (TypeError)


58
59
60
61
62
63
64
65
# File 'lib/finmodeling/has_string_classifer.rb', line 58

def train(expected_klass)
  raise TypeError.new("#{expected_klass} is not in #{self.class.klasses}") if !self.class.klasses.include?(expected_klass)

  self.class.klasses.each do |cur_klass|
    is_expected_klass = (expected_klass == cur_klass) ? :yes : :no
    self.class.classifiers[cur_klass].train(is_expected_klass, *tokenize)
  end
end