Module: Jkl::Text

Defined in:
lib/jkl/text_client.rb

Class Method Summary collapse

Class Method Details

.plain_text(document, words_on_line = 5) ⇒ Object



5
6
7
# File 'lib/jkl/text_client.rb', line 5

def plain_text(document, words_on_line = 5)
  remove_short_lines(strip_all_tags(remove_script_tags(document)), words_on_line)
end

.remove_blank_lines(text) ⇒ Object



13
14
15
# File 'lib/jkl/text_client.rb', line 13

def remove_blank_lines(text)
  text.gsub(/\n\r|\r\n|\n|\r/, "")
end

.remove_html_comments(text) ⇒ Object



17
18
19
# File 'lib/jkl/text_client.rb', line 17

def remove_html_comments(text)
  text.gsub(/<!--(.|\s)*?-->/, "")
end

.remove_script_tags(text) ⇒ Object



21
22
23
24
# File 'lib/jkl/text_client.rb', line 21

def remove_script_tags(text)
  text = remove_html_comments(text)
  text.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i, "")
end

.remove_short_lines(text, words_on_line = 5) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/jkl/text_client.rb', line 26

def remove_short_lines(text, words_on_line = 5)
  text = text.gsub(/\s\s/, "\n")
  str = ""
  # remove short lines - ususally just navigation
  text.split("\n").each do |l|
    str << l unless l.count(" ") < words_on_line
  end
  str
end

.strip_all_tags(text) ⇒ Object



9
10
11
# File 'lib/jkl/text_client.rb', line 9

def strip_all_tags(text)
  text.gsub(/<\/?[^>]*>/, "")
end