Class: Eps::TextEncoder
- Inherits:
-
Object
- Object
- Eps::TextEncoder
- Defined in:
- lib/eps/text_encoder.rb
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#vocabulary ⇒ Object
readonly
Returns the value of attribute vocabulary.
Instance Method Summary collapse
- #fit(arr) ⇒ Object
-
#initialize(**options) ⇒ TextEncoder
constructor
A new instance of TextEncoder.
- #transform(arr) ⇒ Object
Constructor Details
#initialize(**options) ⇒ TextEncoder
Returns a new instance of TextEncoder.
5 6 7 8 |
# File 'lib/eps/text_encoder.rb', line 5 def initialize(**) @options = @vocabulary = [:vocabulary] || [] end |
Instance Attribute Details
#options ⇒ Object (readonly)
Returns the value of attribute options.
3 4 5 |
# File 'lib/eps/text_encoder.rb', line 3 def @options end |
#vocabulary ⇒ Object (readonly)
Returns the value of attribute vocabulary.
3 4 5 |
# File 'lib/eps/text_encoder.rb', line 3 def vocabulary @vocabulary end |
Instance Method Details
#fit(arr) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/eps/text_encoder.rb', line 10 def fit(arr) counts, fit = count_and_fit(arr) min_length = [:min_length] if min_length counts.select! { |k, _| k.length >= min_length } end min_occurrences = [:min_occurrences] if min_occurrences counts.select! { |_, v| v >= min_occurrences } end max_occurrences = [:max_occurrences] if max_occurrences counts.reject! { |_, v| v > max_occurrences } end max_features = [:max_features] if max_features counts = Hash[counts.sort_by { |_, v| -v }[0...max_features]] end @vocabulary = counts.keys fit end |
#transform(arr) ⇒ Object
38 39 40 41 |
# File 'lib/eps/text_encoder.rb', line 38 def transform(arr) counts, fit = count_and_fit(arr) fit end |