Class: Lda::Backends::PureRuby

Inherits:
Base
  • Object
show all
Defined in:
lib/lda-ruby/backends/pure_ruby.rb

Constant Summary collapse

MIN_PROBABILITY =
1e-12

Instance Attribute Summary collapse

Attributes inherited from Base

#convergence, #corpus, #em_convergence, #em_max_iter, #est_alpha, #init_alpha, #max_iter, #num_topics, #verbose

Instance Method Summary collapse

Methods inherited from Base

#fast_load_corpus_from_file, #load_settings, #set_config

Constructor Details

#initialize(random_seed: nil) ⇒ PureRuby

Returns a new instance of PureRuby.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 8

def initialize(random_seed: nil)
  super(random_seed: random_seed)
  @beta_probabilities = nil
  @beta_log = nil
  @gamma = nil
  @phi = nil
  @topic_weights_kernel = nil
  @topic_term_accumulator_kernel = nil
  @document_inference_kernel = nil
  @corpus_iteration_kernel = nil
  @topic_term_finalizer_kernel = nil
  @gamma_shift_kernel = nil
  @topic_document_probability_kernel = nil
  @topic_term_seed_kernel = nil
  @trusted_kernel_outputs = false
end

Instance Attribute Details

#corpus_iteration_kernel=(value) ⇒ Object (writeonly)

Sets the attribute corpus_iteration_kernel

Parameters:

  • value

    the value to set the attribute corpus_iteration_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def corpus_iteration_kernel=(value)
  @corpus_iteration_kernel = value
end

#document_inference_kernel=(value) ⇒ Object (writeonly)

Sets the attribute document_inference_kernel

Parameters:

  • value

    the value to set the attribute document_inference_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def document_inference_kernel=(value)
  @document_inference_kernel = value
end

#gamma_shift_kernel=(value) ⇒ Object (writeonly)

Sets the attribute gamma_shift_kernel

Parameters:

  • value

    the value to set the attribute gamma_shift_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def gamma_shift_kernel=(value)
  @gamma_shift_kernel = value
end

#topic_document_probability_kernel=(value) ⇒ Object (writeonly)

Sets the attribute topic_document_probability_kernel

Parameters:

  • value

    the value to set the attribute topic_document_probability_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def topic_document_probability_kernel=(value)
  @topic_document_probability_kernel = value
end

#topic_term_accumulator_kernel=(value) ⇒ Object (writeonly)

Sets the attribute topic_term_accumulator_kernel

Parameters:

  • value

    the value to set the attribute topic_term_accumulator_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def topic_term_accumulator_kernel=(value)
  @topic_term_accumulator_kernel = value
end

#topic_term_finalizer_kernel=(value) ⇒ Object (writeonly)

Sets the attribute topic_term_finalizer_kernel

Parameters:

  • value

    the value to set the attribute topic_term_finalizer_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def topic_term_finalizer_kernel=(value)
  @topic_term_finalizer_kernel = value
end

#topic_term_seed_kernel=(value) ⇒ Object (writeonly)

Sets the attribute topic_term_seed_kernel

Parameters:

  • value

    the value to set the attribute topic_term_seed_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def topic_term_seed_kernel=(value)
  @topic_term_seed_kernel = value
end

#topic_weights_kernel=(value) ⇒ Object (writeonly)

Sets the attribute topic_weights_kernel

Parameters:

  • value

    the value to set the attribute topic_weights_kernel to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def topic_weights_kernel=(value)
  @topic_weights_kernel = value
end

#trusted_kernel_outputs=(value) ⇒ Object (writeonly)

Sets the attribute trusted_kernel_outputs

Parameters:

  • value

    the value to set the attribute trusted_kernel_outputs to.



25
26
27
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 25

def trusted_kernel_outputs=(value)
  @trusted_kernel_outputs = value
end

Instance Method Details

#betaObject



108
109
110
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 108

def beta
  @beta_log || []
end

#compute_phiObject



116
117
118
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 116

def compute_phi
  clone_matrix(@phi || [])
end

#corpus=(corpus) ⇒ Object



39
40
41
42
43
44
45
46
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 39

def corpus=(corpus)
  super
  @beta_probabilities = nil
  @beta_log = nil
  @gamma = nil
  @phi = nil
  true
end

#em(start) ⇒ Object

Raises:

  • (ArgumentError)


48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 48

def em(start)
  return nil if @corpus.nil? || @corpus.num_docs.zero?

  topics = Integer(num_topics)
  raise ArgumentError, "num_topics must be greater than zero" if topics <= 0

  terms = max_term_index + 1
  raise ArgumentError, "corpus must contain terms" if terms <= 0

  document_words = @corpus.documents.map { |document| document.words.map(&:to_i) }
  document_counts = @corpus.documents.map { |document| document.counts.map(&:to_f) }

  @beta_probabilities =
    if start.to_s.strip.casecmp("seeded").zero? || start.to_s.strip.casecmp("deterministic").zero?
      seeded_topic_term_probabilities(topics, terms, document_words, document_counts)
    else
      initial_topic_term_probabilities(topics, terms)
    end

  document_totals = document_counts.map { |counts| counts.sum.to_f }
  document_lengths = document_words.map(&:length)

  previous_gamma = nil

  Integer(em_max_iter).times do
    if @trusted_kernel_outputs && @corpus_iteration_kernel
      current_gamma, current_phi, topic_term_counts = infer_corpus_iteration(
        nil,
        document_words,
        document_counts,
        document_totals,
        document_lengths,
        topics,
        terms
      )
    else
      topic_term_counts = Array.new(topics) { Array.new(terms, MIN_PROBABILITY) }
      current_gamma, current_phi, topic_term_counts = infer_corpus_iteration(
        topic_term_counts,
        document_words,
        document_counts,
        document_totals,
        document_lengths,
        topics,
        terms
      )
    end

    @beta_probabilities, @beta_log = finalize_topic_term_counts(topic_term_counts)
    @gamma = current_gamma
    @phi = current_phi

    break if previous_gamma && average_gamma_shift(previous_gamma, current_gamma) <= Float(em_convergence)

    previous_gamma = current_gamma
  end

  nil
end

#gammaObject



112
113
114
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 112

def gamma
  @gamma || []
end

#modelObject



120
121
122
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 120

def model
  [Integer(num_topics), max_term_index + 1, Float(init_alpha)]
end

#nameObject



35
36
37
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 35

def name
  "pure_ruby"
end

#topic_document_probability(phi_matrix, document_counts) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/lda-ruby/backends/pure_ruby.rb', line 124

def topic_document_probability(phi_matrix, document_counts)
  kernel_output = nil
  if @topic_document_probability_kernel
    kernel_output = @topic_document_probability_kernel.call(
      phi_matrix,
      document_counts,
      Integer(num_topics),
      MIN_PROBABILITY
    )
  end

  if valid_topic_document_probability_output?(kernel_output, document_counts.size, Integer(num_topics))
    if @trusted_kernel_outputs
      kernel_output
    else
      kernel_output.map { |row| row.map(&:to_f) }
    end
  else
    default_topic_document_probability(phi_matrix, document_counts)
  end
rescue StandardError
  default_topic_document_probability(phi_matrix, document_counts)
end