Class: Tomoto::LDA
- Inherits:
-
Object
- Object
- Tomoto::LDA
- Defined in:
- lib/tomoto/lda.rb
Class Method Summary collapse
- .load(filename) ⇒ Object
- .new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil) ⇒ Object
Instance Method Summary collapse
- #add_doc(doc) ⇒ Object
- #count_by_topics ⇒ Object
-
#infer(doc, iter: 100, tolerance: -1,, workers: 0, parallel: :default, together: 0) ⇒ Object
TODO support multiple docs.
- #make_doc(doc) ⇒ Object
- #removed_top_words ⇒ Object
- #save(filename, full: true) ⇒ Object
-
#summary(initial_hp: true, params: true, topic_word_top_n: 5) ⇒ Object
returns string instead of printing.
- #topic_words(topic_id = nil, top_n: 10) ⇒ Object
-
#train(iterations = 10, workers: 0, parallel: :default) ⇒ Object
TODO raise error if iterations < 1.
- #tw ⇒ Object
Class Method Details
.load(filename) ⇒ Object
11 12 13 14 15 |
# File 'lib/tomoto/lda.rb', line 11 def self.load(filename) model = new model._load(filename) model end |
.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil) ⇒ Object
3 4 5 6 7 8 9 |
# File 'lib/tomoto/lda.rb', line 3 def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil) model = _new(to_tw(tw), k, alpha, eta, seed || -1) model.instance_variable_set(:@min_cf, min_cf) model.instance_variable_set(:@min_df, min_df) model.instance_variable_set(:@rm_top, rm_top) init_params(model, binding) end |
Instance Method Details
#add_doc(doc) ⇒ Object
17 18 19 |
# File 'lib/tomoto/lda.rb', line 17 def add_doc(doc) _add_doc(prepare_doc(doc)) end |
#count_by_topics ⇒ Object
31 32 33 34 |
# File 'lib/tomoto/lda.rb', line 31 def count_by_topics prepare _count_by_topics end |
#infer(doc, iter: 100, tolerance: -1,, workers: 0, parallel: :default, together: 0) ⇒ Object
TODO support multiple docs
26 27 28 29 |
# File 'lib/tomoto/lda.rb', line 26 def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0) raise "cannot infer with untrained model" unless trained? _infer(doc, iter, tolerance, workers, to_ps(parallel), together) end |
#make_doc(doc) ⇒ Object
21 22 23 |
# File 'lib/tomoto/lda.rb', line 21 def make_doc(doc) _make_doc(tokenize_doc(doc)) end |
#removed_top_words ⇒ Object
36 37 38 39 |
# File 'lib/tomoto/lda.rb', line 36 def removed_top_words prepare _removed_top_words(@rm_top) end |
#save(filename, full: true) ⇒ Object
41 42 43 |
# File 'lib/tomoto/lda.rb', line 41 def save(filename, full: true) _save(filename, full) end |
#summary(initial_hp: true, params: true, topic_word_top_n: 5) ⇒ Object
returns string instead of printing
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/tomoto/lda.rb', line 46 def summary(initial_hp: true, params: true, topic_word_top_n: 5) summary = [] summary << "<Basic Info>" basic_info(summary) summary << "|" summary << "<Training Info>" training_info(summary) summary << "|" if initial_hp summary << "<Initial Parameters>" initial_params_info(summary) summary << "|" end if params summary << "<Parameters>" params_info(summary) summary << "|" end if topic_word_top_n > 0 summary << "<Topics>" topics_info(summary, topic_word_top_n: topic_word_top_n) summary << "|" end # skip ending | summary.pop summary.join("\n") end |
#topic_words(topic_id = nil, top_n: 10) ⇒ Object
81 82 83 84 85 86 87 |
# File 'lib/tomoto/lda.rb', line 81 def topic_words(topic_id = nil, top_n: 10) if topic_id _topic_words(topic_id, top_n) else k.times.map { |i| _topic_words(i, top_n) } end end |
#train(iterations = 10, workers: 0, parallel: :default) ⇒ Object
TODO raise error if iterations < 1
90 91 92 93 |
# File 'lib/tomoto/lda.rb', line 90 def train(iterations = 10, workers: 0, parallel: :default) prepare _train(iterations, workers, to_ps(parallel)) end |
#tw ⇒ Object
95 96 97 |
# File 'lib/tomoto/lda.rb', line 95 def tw TERM_WEIGHT[_tw] end |