Module: Premailer::Adapter::Hpricot

Defined in:
lib/premailer/adapter/hpricot.rb

Overview

Hpricot adapter

Instance Method Summary collapse

Instance Method Details

#load_html(input) ⇒ ::Hpricot

Load the HTML file and convert it into an Hpricot document.

Returns:

  • (::Hpricot)

    a document.



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/premailer/adapter/hpricot.rb', line 177

def load_html(input) # :nodoc:
  thing = nil

  # TODO: duplicate options
  if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
    thing = input
  elsif @is_local_file
    @base_dir = File.dirname(input)
    thing = File.open(input, 'r')
  else
    thing = open(input)
  end

  # TODO: deal with Hpricot seg faults on empty input
  thing ? Hpricot(thing) : nil
end

#to_inline_cssString

Merge CSS into the HTML document.

Returns:

  • (String)

    HTML.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/premailer/adapter/hpricot.rb', line 10

def to_inline_css
  doc = @processed_doc
  @unmergable_rules = CssParser::Parser.new

  # Give all styles already in style attributes a specificity of 1000
  # per http://www.w3.org/TR/CSS21/cascade.html#specificity
  doc.search("*[@style]").each do |el|
    el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
  end

  # Iterate through the rules and merge them into the HTML
  @css_parser.each_selector(:all) do |selector, declaration, specificity, media_types|
    # Save un-mergable rules separately
    selector.gsub!(/:link([\s]*)+/i) {|m| $1 }

    # Convert element names to lower case
    selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }

    if Premailer.is_media_query?(media_types) || selector =~ Premailer::RE_UNMERGABLE_SELECTORS
      @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration), media_types) unless @options[:preserve_styles]
    else
      begin
        if selector =~ Premailer::RE_RESET_SELECTORS
          # this is in place to preserve the MailChimp CSS reset: http://github.com/mailchimp/Email-Blueprints/
          # however, this doesn't mean for testing pur
          @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration))  unless !@options[:preserve_reset]
        end

        # Change single ID CSS selectors into xpath so that we can match more
        # than one element.  Added to work around dodgy generated code.
        selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')

        # convert attribute selectors to hpricot's format
        selector.gsub!(/\[([\w]+)\]/, '[@\1]')
        selector.gsub!(/\[([\w]+)([\=\~\^\$\*]+)([\w\s]+)\]/, '[@\1\2\'\3\']')

        doc.search(selector).each do |el|
          if el.elem? and (el.name != 'head' and el.parent.name != 'head')
            # Add a style attribute or append to the existing one
            block = "[SPEC=#{specificity}[#{declaration}]]"
            el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
          end
        end
      rescue ::Hpricot::Error, RuntimeError, ArgumentError
        $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
        next
      end
    end
  end

  # Remove script tags
  if @options[:remove_scripts]
    doc.search("script").remove
  end

  # Read STYLE attributes and perform folding
  doc.search("*[@style]").each do |el|
    style = el.attributes['style'].to_s

    declarations = []

    style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
      rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
      declarations << rs
    end
    # Perform style folding
    merged = CssParser.merge(declarations)
    merged.expand_shorthand!
    merged.create_shorthand!

    # Duplicate CSS attributes as HTML attributes
    if Premailer::RELATED_ATTRIBUTES.has_key?(el.name)
      Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
        el[html_att] = merged[css_att].gsub(/url\('(.*)'\)/,'\1').gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
      end
    end

    # write the inline STYLE attribute
    el['style'] = Premailer.escape_string(merged.declarations_to_s)
  end

  doc = write_unmergable_css_rules(doc, @unmergable_rules)

  if @options[:remove_classes] or @options[:remove_comments]
    doc.search('*').each do |el|
      if el.comment? and @options[:remove_comments]
        lst = el.parent.children
        el.parent = nil
        lst.delete(el)
      elsif el.elem?
        el.remove_attribute('class') if @options[:remove_classes]
      end
    end
  end

  if @options[:remove_ids]
    # find all anchor's targets and hash them
    targets = []
    doc.search("a[@href^='#']").each do |el|
      target = el.get_attribute('href')[1..-1]
      targets << target
      el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
    end
    # hash ids that are links target, delete others
    doc.search("*[@id]").each do |el|
      id = el.get_attribute('id')
      if targets.include?(id)
        el.set_attribute('id', Digest::MD5.hexdigest(id))
      else
        el.remove_attribute('id')
      end
    end
  end

  @processed_doc = doc

  @processed_doc.to_original_html
end

#to_plain_textString

Converts the HTML document to a format suitable for plain-text e-mail.

If present, uses the element as its base; otherwise uses the whole document.

Returns:

  • (String)

    Plain text.



157
158
159
160
161
162
163
164
165
# File 'lib/premailer/adapter/hpricot.rb', line 157

def to_plain_text
  html_src = ''
  begin
    html_src = @doc.search("body").inner_html
  rescue; end

  html_src = @doc.to_html unless html_src and not html_src.empty?
  convert_to_text(html_src, @options[:line_length], @html_encoding)
end

#to_sString

Gets the original HTML as a string.

Returns:

  • (String)

    HTML.



170
171
172
# File 'lib/premailer/adapter/hpricot.rb', line 170

def to_s
  @doc.to_original_html
end

#write_unmergable_css_rules(doc, unmergable_rules) ⇒ ::Hpricot

Create a style element with un-mergable rules (e.g. :hover) and write it into the body.

doc is an Hpricot document and unmergable_css_rules is a Css::RuleSet.

Returns:

  • (::Hpricot)

    a document.



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/premailer/adapter/hpricot.rb', line 135

def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
  styles = unmergable_rules.to_s

  unless styles.empty?
    style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
    if head = doc.search('head')
      head.append(style_tag)
    elsif body = doc.search('body')
      body.append(style_tag)
    else
      doc.inner_html= doc.inner_html << style_tag
    end
  end
  doc
end