Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/text_nlp/string.rb

Class Attribute Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Class Attribute Details

.normalizerObject

Returns the value of attribute normalizer.



8
9
10
# File 'lib/text_nlp/string.rb', line 8

def normalizer
  @normalizer
end

.tokenizerObject

Returns the value of attribute tokenizer.



9
10
11
# File 'lib/text_nlp/string.rb', line 9

def tokenizer
  @tokenizer
end

Instance Attribute Details

#normalizedObject

Returns the value of attribute normalized.



5
6
7
# File 'lib/text_nlp/string.rb', line 5

def normalized
  @normalized
end

Instance Method Details

#normalizeObject



12
13
14
15
16
17
18
19
# File 'lib/text_nlp/string.rb', line 12

def normalize
  unless normalized()
    new_string = (String.normalizer || TextNlp::Normalizer.new).normalize(self)
    new_string.normalized = true
    return new_string
  end
  self
end

#normalize!Object



21
22
23
24
25
26
27
# File 'lib/text_nlp/string.rb', line 21

def normalize!
  unless normalized()
    replace(self.normalize)
    self.normalized = true
  end
  self
end

#similarity(text) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/text_nlp/string.rb', line 33

def similarity(text)
  score = 0.0
  tokens1, tokens2 = self.normalize.tokenize, text.normalize.tokenize
  if (tokens1.size > 0 && tokens2.size > 0)
    intersection = tokens1 & tokens2
    score = (((intersection.size.to_f / tokens1.size) + (intersection.size.to_f / tokens2.size)) / 2)
  end
  score
end

#tokenizeObject



29
30
31
# File 'lib/text_nlp/string.rb', line 29

def tokenize
  (String.tokenizer || TextNlp::Tokenizer.new).tokenize(self)
end

#transform(*transformers) ⇒ Object



43
44
45
46
# File 'lib/text_nlp/string.rb', line 43

def transform(*transformers)
  transformers = [transformers] unless transformers.respond_to?(:each)
  transformers.flatten.inject(self) { |text,transformer| transformer.transform(text) }
end