Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/rlid/common.rb

Overview

add methods to String

Instance Method Summary collapse

Instance Method Details

#each_ngram(n = 3) ⇒ Object



115
116
117
118
119
120
# File 'lib/rlid/common.rb', line 115

def each_ngram(n=3)
  string = preprocess(n)
  string.chars.each_cons(n) do |chars|
    yield chars.join
  end
end

#preprocess(n) ⇒ Object

private



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/rlid/common.rb', line 123

def preprocess(n)
  string = self.dup

  # remove spaces at the start and end
  string.gsub!(/\A\s+/, '')
  string.gsub!(/\s+\Z/, '')

  # remove non alphabetic characters
  string.gsub!(/[^[:alpha:]'\n]/, ' ')
  # substitute newlines with ||
  string.gsub!(/\s*\n\s*/, '|'*(n-1))
  string.gsub!(/\s+/, ' ')
  # remove spaces at the start and end
  string.gsub!(/\A\s+/, '')
  string.gsub!(/\s+\Z/, '')
  
  string.downcase!

  padding = "|" * (n-1)

  #if string.size == 1
  #  string = "|" + string + " "
  if string.size < n-1
    string = padding + string + " "
  else
    string = padding + string + padding
  end
  string
end