Module: Premailer::Adapter::Nokogiri

Defined in:
lib/premailer/adapter/nokogiri.rb

Overview

Nokogiri adapter

Instance Method Summary collapse

Instance Method Details

#load_html(input) ⇒ ::Nokogiri::XML

Load the HTML file and convert it into an Nokogiri document.

Returns:

  • (::Nokogiri::XML)

    a document.



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/premailer/adapter/nokogiri.rb', line 175

def load_html(input) # :nodoc:
  thing = nil

  # TODO: duplicate options
  if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
    thing = input
				elsif @is_local_file
    @base_dir = File.dirname(input)
    thing = File.open(input, 'r')
				else
    thing = open(input)
  end

  if thing.respond_to?(:read)
    thing = thing.read
  end

  return nil unless thing
  doc = nil

  # Handle HTML entities
  if @options[:replace_html_entities] == true and thing.is_a?(String)
    if RUBY_VERSION =~ /1.9/
      html_entity_ruby_version = "1.9"
    elsif RUBY_VERSION =~ /1.8/
      html_entity_ruby_version = "1.8"
    end
    if html_entity_ruby_version
      HTML_ENTITIES[html_entity_ruby_version].map do |entity, replacement|
        thing.gsub! entity, replacement
      end
    end
  end
  # Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
  # However, we really don't want to hardcode this. ASCII-8BIG should be the default, but not the only option.
  if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
    thing = thing.force_encoding(@options[:input_encoding]).encode!
    doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding]) {|c| c.recover }
  else
    default_encoding = RUBY_PLATFORM == 'java' ? nil : 'BINARY'
    doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding] || default_encoding) {|c| c.recover }
  end

  return doc
end

#to_inline_cssString

Merge CSS into the HTML document.

Returns:

  • (String)

    an HTML.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/premailer/adapter/nokogiri.rb', line 11

def to_inline_css
  doc = @processed_doc
  @unmergable_rules = CssParser::Parser.new

  # Give all styles already in style attributes a specificity of 1000
  # per http://www.w3.org/TR/CSS21/cascade.html#specificity
  doc.search("*[@style]").each do |el|
    el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
  end

  # Iterate through the rules and merge them into the HTML
  @css_parser.each_selector(:all) do |selector, declaration, specificity|
    # Save un-mergable rules separately
    selector.gsub!(/:link([\s]*)+/i) {|m| $1 }

    # Convert element names to lower case
    selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }

    if selector =~ Premailer::RE_UNMERGABLE_SELECTORS
      @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless @options[:preserve_styles]
    else
      begin
        # Change single ID CSS selectors into xpath so that we can match more
        # than one element.  Added to work around dodgy generated code.
        selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')

        doc.search(selector).each do |el|
          if el.elem? and (el.name != 'head' and el.parent.name != 'head')
            # Add a style attribute or append to the existing one
            block = "[SPEC=#{specificity}[#{declaration}]]"
            el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
          end
        end
      rescue  ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
        $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
        next
      end
    end
  end

  # Remove script tags
  if @options[:remove_scripts]
    doc.search("script").remove
  end

  # Read STYLE attributes and perform folding
  doc.search("*[@style]").each do |el|
    style = el.attributes['style'].to_s

    declarations = []
    style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
      rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
      declarations << rs
    end

    # Perform style folding
    merged = CssParser.merge(declarations)
    merged.expand_shorthand!

    # Duplicate CSS attributes as HTML attributes
    if Premailer::RELATED_ATTRIBUTES.has_key?(el.name)
      Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
        el[html_att] = merged[css_att].gsub(/url\('(.*)'\)/,'\1').gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
      end
    end

    # write the inline STYLE attribute
    el['style'] = Premailer.escape_string(merged.declarations_to_s).split(';').map(&:strip).sort.join('; ')
  end

  doc = write_unmergable_css_rules(doc, @unmergable_rules)

  if @options[:remove_classes] or @options[:remove_comments]
    doc.traverse do |el|
      if el.comment? and @options[:remove_comments]
        el.remove
      elsif el.element?
        el.remove_attribute('class') if @options[:remove_classes]
      end
    end
  end

  if @options[:remove_ids]
    # find all anchor's targets and hash them
    targets = []
    doc.search("a[@href^='#']").each do |el|
      target = el.get_attribute('href')[1..-1]
      targets << target
      el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
    end
    # hash ids that are links target, delete others
    doc.search("*[@id]").each do |el|
      id = el.get_attribute('id')
      if targets.include?(id)
        el.set_attribute('id', Digest::MD5.hexdigest(id))
      else
        el.remove_attribute('id')
      end
    end
  end

  @processed_doc = doc
  if is_xhtml?
    # we don't want to encode carriage returns
    @processed_doc.to_xhtml(:encoding => nil).gsub(/&\#xD;/i, "\r")
  else
    @processed_doc.to_html
  end
end

#to_plain_textString

Converts the HTML document to a format suitable for plain-text e-mail.

If present, uses the element as its base; otherwise uses the whole document.

Returns:

  • (String)

    a plain text.



152
153
154
155
156
157
158
159
160
# File 'lib/premailer/adapter/nokogiri.rb', line 152

def to_plain_text
  html_src = ''
  begin
    html_src = @doc.at("body").inner_html
  rescue; end

  html_src = @doc.to_html unless html_src and not html_src.empty?
  convert_to_text(html_src, @options[:line_length], @html_encoding)
end

#to_sString

Gets the original HTML as a string.

Returns:

  • (String)

    HTML.



164
165
166
167
168
169
170
# File 'lib/premailer/adapter/nokogiri.rb', line 164

def to_s
  if is_xhtml?
    @doc.to_xhtml(:encoding => nil)
  else
    @doc.to_html(:encoding => nil)
  end
end

#write_unmergable_css_rules(doc, unmergable_rules) ⇒ ::Nokogiri::XML

Create a style element with un-mergable rules (e.g. :hover) and write it into the body.

doc is an Nokogiri document and unmergable_css_rules is a Css::RuleSet.

Returns:

  • (::Nokogiri::XML)

    a document.



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/premailer/adapter/nokogiri.rb', line 127

def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
  styles = ''
  unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
    styles += "#{selector} { #{declarations} }\n"
  end

  unless styles.empty?
    style_tag = "<style type=\"text/css\">\n#{styles}></style>"
    if head = doc.search('head')
      doc.at_css('head').children.before(::Nokogiri::XML.fragment(style_tag))
    elsif body = doc.search('body')
      doc.at_css('body').children.before(::Nokogiri::XML.fragment(style_tag))
    else
      doc.inner_html = style_tag += doc.inner_html
    end
  end
  doc
end