Class: Lda::Backends::PureRuby
- Defined in:
- lib/lda-ruby/backends/pure_ruby.rb
Constant Summary collapse
- MIN_PROBABILITY =
1e-12
Instance Attribute Summary collapse
-
#corpus_iteration_kernel ⇒ Object
writeonly
Sets the attribute corpus_iteration_kernel.
-
#document_inference_kernel ⇒ Object
writeonly
Sets the attribute document_inference_kernel.
-
#gamma_shift_kernel ⇒ Object
writeonly
Sets the attribute gamma_shift_kernel.
-
#topic_document_probability_kernel ⇒ Object
writeonly
Sets the attribute topic_document_probability_kernel.
-
#topic_term_accumulator_kernel ⇒ Object
writeonly
Sets the attribute topic_term_accumulator_kernel.
-
#topic_term_finalizer_kernel ⇒ Object
writeonly
Sets the attribute topic_term_finalizer_kernel.
-
#topic_term_seed_kernel ⇒ Object
writeonly
Sets the attribute topic_term_seed_kernel.
-
#topic_weights_kernel ⇒ Object
writeonly
Sets the attribute topic_weights_kernel.
-
#trusted_kernel_outputs ⇒ Object
writeonly
Sets the attribute trusted_kernel_outputs.
Attributes inherited from Base
#convergence, #corpus, #em_convergence, #em_max_iter, #est_alpha, #init_alpha, #max_iter, #num_topics, #verbose
Instance Method Summary collapse
- #beta ⇒ Object
- #compute_phi ⇒ Object
- #corpus=(corpus) ⇒ Object
- #em(start) ⇒ Object
- #gamma ⇒ Object
-
#initialize(random_seed: nil) ⇒ PureRuby
constructor
A new instance of PureRuby.
- #model ⇒ Object
- #name ⇒ Object
- #topic_document_probability(phi_matrix, document_counts) ⇒ Object
Methods inherited from Base
#fast_load_corpus_from_file, #load_settings, #set_config
Constructor Details
#initialize(random_seed: nil) ⇒ PureRuby
Returns a new instance of PureRuby.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 8 def initialize(random_seed: nil) super(random_seed: random_seed) @beta_probabilities = nil @beta_log = nil @gamma = nil @phi = nil @topic_weights_kernel = nil @topic_term_accumulator_kernel = nil @document_inference_kernel = nil @corpus_iteration_kernel = nil @topic_term_finalizer_kernel = nil @gamma_shift_kernel = nil @topic_document_probability_kernel = nil @topic_term_seed_kernel = nil @trusted_kernel_outputs = false end |
Instance Attribute Details
#corpus_iteration_kernel=(value) ⇒ Object (writeonly)
Sets the attribute corpus_iteration_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def corpus_iteration_kernel=(value) @corpus_iteration_kernel = value end |
#document_inference_kernel=(value) ⇒ Object (writeonly)
Sets the attribute document_inference_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def document_inference_kernel=(value) @document_inference_kernel = value end |
#gamma_shift_kernel=(value) ⇒ Object (writeonly)
Sets the attribute gamma_shift_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def gamma_shift_kernel=(value) @gamma_shift_kernel = value end |
#topic_document_probability_kernel=(value) ⇒ Object (writeonly)
Sets the attribute topic_document_probability_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def topic_document_probability_kernel=(value) @topic_document_probability_kernel = value end |
#topic_term_accumulator_kernel=(value) ⇒ Object (writeonly)
Sets the attribute topic_term_accumulator_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def topic_term_accumulator_kernel=(value) @topic_term_accumulator_kernel = value end |
#topic_term_finalizer_kernel=(value) ⇒ Object (writeonly)
Sets the attribute topic_term_finalizer_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def topic_term_finalizer_kernel=(value) @topic_term_finalizer_kernel = value end |
#topic_term_seed_kernel=(value) ⇒ Object (writeonly)
Sets the attribute topic_term_seed_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def topic_term_seed_kernel=(value) @topic_term_seed_kernel = value end |
#topic_weights_kernel=(value) ⇒ Object (writeonly)
Sets the attribute topic_weights_kernel
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def topic_weights_kernel=(value) @topic_weights_kernel = value end |
#trusted_kernel_outputs=(value) ⇒ Object (writeonly)
Sets the attribute trusted_kernel_outputs
25 26 27 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25 def trusted_kernel_outputs=(value) @trusted_kernel_outputs = value end |
Instance Method Details
#beta ⇒ Object
108 109 110 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 108 def beta @beta_log || [] end |
#compute_phi ⇒ Object
116 117 118 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 116 def compute_phi clone_matrix(@phi || []) end |
#corpus=(corpus) ⇒ Object
39 40 41 42 43 44 45 46 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 39 def corpus=(corpus) super @beta_probabilities = nil @beta_log = nil @gamma = nil @phi = nil true end |
#em(start) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 48 def em(start) return nil if @corpus.nil? || @corpus.num_docs.zero? topics = Integer(num_topics) raise ArgumentError, "num_topics must be greater than zero" if topics <= 0 terms = max_term_index + 1 raise ArgumentError, "corpus must contain terms" if terms <= 0 document_words = @corpus.documents.map { |document| document.words.map(&:to_i) } document_counts = @corpus.documents.map { |document| document.counts.map(&:to_f) } @beta_probabilities = if start.to_s.strip.casecmp("seeded").zero? || start.to_s.strip.casecmp("deterministic").zero? seeded_topic_term_probabilities(topics, terms, document_words, document_counts) else initial_topic_term_probabilities(topics, terms) end document_totals = document_counts.map { |counts| counts.sum.to_f } document_lengths = document_words.map(&:length) previous_gamma = nil Integer(em_max_iter).times do if @trusted_kernel_outputs && @corpus_iteration_kernel current_gamma, current_phi, topic_term_counts = infer_corpus_iteration( nil, document_words, document_counts, document_totals, document_lengths, topics, terms ) else topic_term_counts = Array.new(topics) { Array.new(terms, MIN_PROBABILITY) } current_gamma, current_phi, topic_term_counts = infer_corpus_iteration( topic_term_counts, document_words, document_counts, document_totals, document_lengths, topics, terms ) end @beta_probabilities, @beta_log = finalize_topic_term_counts(topic_term_counts) @gamma = current_gamma @phi = current_phi break if previous_gamma && average_gamma_shift(previous_gamma, current_gamma) <= Float(em_convergence) previous_gamma = current_gamma end nil end |
#gamma ⇒ Object
112 113 114 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 112 def gamma @gamma || [] end |
#model ⇒ Object
120 121 122 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 120 def model [Integer(num_topics), max_term_index + 1, Float(init_alpha)] end |
#name ⇒ Object
35 36 37 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 35 def name "pure_ruby" end |
#topic_document_probability(phi_matrix, document_counts) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 124 def topic_document_probability(phi_matrix, document_counts) kernel_output = nil if @topic_document_probability_kernel kernel_output = @topic_document_probability_kernel.call( phi_matrix, document_counts, Integer(num_topics), MIN_PROBABILITY ) end if valid_topic_document_probability_output?(kernel_output, document_counts.size, Integer(num_topics)) if @trusted_kernel_outputs kernel_output else kernel_output.map { |row| row.map(&:to_f) } end else default_topic_document_probability(phi_matrix, document_counts) end rescue StandardError default_topic_document_probability(phi_matrix, document_counts) end |