Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/text_nlp/string.rb
Class Attribute Summary collapse
-
.normalizer ⇒ Object
Returns the value of attribute normalizer.
-
.tokenizer ⇒ Object
Returns the value of attribute tokenizer.
Instance Attribute Summary collapse
-
#normalized ⇒ Object
Returns the value of attribute normalized.
Instance Method Summary collapse
- #normalize ⇒ Object
- #normalize! ⇒ Object
- #similarity(text) ⇒ Object
- #tokenize ⇒ Object
- #transform(*transformers) ⇒ Object
Class Attribute Details
.normalizer ⇒ Object
Returns the value of attribute normalizer.
8 9 10 |
# File 'lib/text_nlp/string.rb', line 8 def normalizer @normalizer end |
.tokenizer ⇒ Object
Returns the value of attribute tokenizer.
9 10 11 |
# File 'lib/text_nlp/string.rb', line 9 def tokenizer @tokenizer end |
Instance Attribute Details
#normalized ⇒ Object
Returns the value of attribute normalized.
5 6 7 |
# File 'lib/text_nlp/string.rb', line 5 def normalized @normalized end |
Instance Method Details
#normalize ⇒ Object
12 13 14 15 16 17 18 19 |
# File 'lib/text_nlp/string.rb', line 12 def normalize unless normalized() new_string = (String.normalizer || TextNlp::Normalizer.new).normalize(self) new_string.normalized = true return new_string end self end |
#normalize! ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/text_nlp/string.rb', line 21 def normalize! unless normalized() replace(self.normalize) self.normalized = true end self end |
#similarity(text) ⇒ Object
33 34 35 36 37 38 39 40 41 |
# File 'lib/text_nlp/string.rb', line 33 def similarity(text) score = 0.0 tokens1, tokens2 = self.normalize.tokenize, text.normalize.tokenize if (tokens1.size > 0 && tokens2.size > 0) intersection = tokens1 & tokens2 score = (((intersection.size.to_f / tokens1.size) + (intersection.size.to_f / tokens2.size)) / 2) end score end |
#tokenize ⇒ Object
29 30 31 |
# File 'lib/text_nlp/string.rb', line 29 def tokenize (String.tokenizer || TextNlp::Tokenizer.new).tokenize(self) end |
#transform(*transformers) ⇒ Object
43 44 45 46 |
# File 'lib/text_nlp/string.rb', line 43 def transform(*transformers) transformers = [transformers] unless transformers.respond_to?(:each) transformers.flatten.inject(self) { |text,transformer| transformer.transform(text) } end |