Module: FinModeling::HasStringClassifier
- Included in:
- AssetsItem, CashChangeItem, ComprehensiveIncomeStatementItem, EquityChangeItem, IncomeStatementItem, LiabsAndEquityItem
- Defined in:
- lib/finmodeling/has_string_classifer.rb
Defined Under Namespace
Modules: ClassMethods
Class Method Summary collapse
Instance Method Summary collapse
- #classification_estimates ⇒ Object
- #classify ⇒ Object
- #tokenize ⇒ Object
- #train(expected_klass) ⇒ Object
Class Method Details
.included(base) ⇒ Object
54 55 56 |
# File 'lib/finmodeling/has_string_classifer.rb', line 54 def self.included(base) base.extend(ClassMethods) end |
Instance Method Details
#classification_estimates ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/finmodeling/has_string_classifer.rb', line 67 def classification_estimates tokens = tokenize estimates = {} self.class.klasses.each do |cur_klass| ret = self.class.classifiers[cur_klass].classify(*tokens) result = {:klass=>ret[0], :confidence=>ret[1]} estimates[cur_klass] = (result[:klass] == :yes) ? result[:confidence] : -result[:confidence] end return estimates end |
#classify ⇒ Object
80 81 82 83 84 |
# File 'lib/finmodeling/has_string_classifer.rb', line 80 def classify estimates = classification_estimates best_guess_klass = estimates.keys.sort{ |x,y| estimates[x] <=> estimates[y] }.last return best_guess_klass end |
#tokenize ⇒ Object
86 87 88 89 90 91 92 93 |
# File 'lib/finmodeling/has_string_classifer.rb', line 86 def tokenize words = ["^"] + self.downcase.split(" ") + ["$"] tokens = [1, 2, 3].collect do |words_per_token| words.each_cons(words_per_token).to_a.map{|x| x.join(" ") } end return tokens.flatten end |
#train(expected_klass) ⇒ Object
58 59 60 61 62 63 64 65 |
# File 'lib/finmodeling/has_string_classifer.rb', line 58 def train(expected_klass) raise TypeError.new("#{expected_klass} is not in #{self.class.klasses}") if !self.class.klasses.include?(expected_klass) self.class.klasses.each do |cur_klass| is_expected_klass = (expected_klass == cur_klass) ? :yes : :no self.class.classifiers[cur_klass].train(is_expected_klass, *tokenize) end end |