Module: PristineText

Defined in:
lib/pristine_text.rb,
lib/pristine_text/version.rb

Constant Summary collapse

VERSION =
"0.0.1"

Class Method Summary collapse

Class Method Details

.clean(text, locale = :en, stem = true) ⇒ Object



25
26
27
28
29
30
31
32
33
34
# File 'lib/pristine_text.rb', line 25

def self.clean(text, locale= :en, stem= true)
  text= UnicodeUtils.downcase(text, locale).
    gsub(/[^\p{Letter}\s]+/, "").
    strip.squeeze
  if stem
    stem(text.split, locale).join " "
  else
    text
  end
end

.pipe(text, locale) ⇒ Object



6
7
8
9
10
11
12
13
14
15
# File 'lib/pristine_text.rb', line 6

def self.pipe(text, locale)
  if `which stemwords`.empty?
    raise LoadError.new("cannot find stemwords, install libstemmer-tools")
  end
  Open3.popen3("stemwords -l #{locale}") do |i, o, e, t|
    i.write text
    i.close
    o.read.strip
  end
end

.stem(text, locale) ⇒ Object



17
18
19
20
21
22
23
# File 'lib/pristine_text.rb', line 17

def self.stem(text, locale)
  if text.kind_of?(Array)
    pipe(text.join("\n"), locale).split("\n")
  elsif text.kind_of?(String)
    pipe text, locale
  end
end