Module: TextHelper

Included in:
HTTPHelper, TitleGrabber::MultiThreadedGrabber
Defined in:
lib/text_helper.rb

Constant Summary collapse

SINGLE_SPACE =
-" "

Instance Method Summary collapse

Instance Method Details

#clean_up_whitespace(text) ⇒ Object

document.querySelector(‘title’).textContent.trim().replace(/n/g, ‘ ’).replace(/s2,/g, ‘ ’)



19
20
21
22
23
24
# File 'lib/text_helper.rb', line 19

def clean_up_whitespace(text)
  text.strip!
  text.gsub!(/\s{2,}/, SINGLE_SPACE)
  text.gsub!("\n", SINGLE_SPACE)
  text
end

#utf8_encode(text = nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/text_helper.rb', line 4

def utf8_encode(text = nil)
  txt = String(text)

  begin
    txt.encode!(-"UTF-8", invalid: :replace, undef: :replace,
                          replace: -"")
  rescue EncodingError
    -""
  else
    txt.delete!(-"\u0000") # get rid of nasty null bytes
    txt
  end
end