Module: HtmlToPlainText
- Included in:
- Premailer
- Defined in:
- lib/premailer/html_to_plain_text.rb
Overview
Support functions for Premailer
Instance Method Summary collapse
-
#convert_to_text(html, line_length = 65, from_charset = 'UTF-8') ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed.
Instance Method Details
#convert_to_text(html, line_length = 65, from_charset = 'UTF-8') ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed
TODO:
- add support for DL, OL
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/premailer/html_to_plain_text.rb', line 11 def convert_to_text(html, line_length = 65, from_charset = 'UTF-8') r = Text::Reform.new(:trim => true, :squeeze => false, :break => Text::Reform.break_wrap) txt = html # decode HTML entities he = HTMLEntities.new txt = he.decode(txt) # handle headings (H1-H6) txt.gsub!(/[ \t]*<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s| hlevel = $1.to_i # cleanup text inside of headings htext = $2.gsub(/<\/?[^>]*>/i, '').strip hlength = (htext.length > line_length ? line_length : htext.length) case hlevel when 1 # H1, asterisks above and below ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n" when 2 # H1, dashes above and below ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n" else # H3-H6, dashes below htext + "\n" + ('-' * htext.length) + "\n" end end # links txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s| $2.strip + ' ( ' + $1.strip + ' )' end # lists -- TODO: should handle ordered lists txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ') # list not followed by a newline txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n") # paragraphs and line breaks txt.gsub!(/<\/p>/i, "\n\n") txt.gsub!(/<br[\/ ]*>/i, "\n") # strip remaining tags txt.gsub!(/<\/?[^>]*>/, '') # wrap text txt = r.format(('[' * line_length), txt) # remove linefeeds (\r\n and \r -> \n) txt.gsub!(/\r\n?/, "\n") # strip extra spaces txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines # no more than two consecutive newlines txt.gsub!(/[\n]{3,}/, "\n\n") txt.strip end |