Module: TextHelper
- Included in:
- HTTPHelper, TitleGrabber::MultiThreadedGrabber
- Defined in:
- lib/text_helper.rb
Constant Summary collapse
- SINGLE_SPACE =
-" "
Instance Method Summary collapse
-
#clean_up_whitespace(text) ⇒ Object
document.querySelector(‘title’).textContent.trim().replace(/n/g, ‘ ’).replace(/s2,/g, ‘ ’).
- #utf8_encode(text = nil) ⇒ Object
Instance Method Details
#clean_up_whitespace(text) ⇒ Object
document.querySelector(‘title’).textContent.trim().replace(/n/g, ‘ ’).replace(/s2,/g, ‘ ’)
19 20 21 22 23 24 |
# File 'lib/text_helper.rb', line 19 def clean_up_whitespace(text) text.strip! text.gsub!(/\s{2,}/, SINGLE_SPACE) text.gsub!("\n", SINGLE_SPACE) text end |
#utf8_encode(text = nil) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/text_helper.rb', line 4 def utf8_encode(text = nil) txt = String(text) begin txt.encode!(-"UTF-8", invalid: :replace, undef: :replace, replace: -"") rescue EncodingError -"" else txt.delete!(-"\u0000") # get rid of nasty null bytes txt end end |