Module: Premailer::Adapter::NokogiriFast

Includes:
AdapterHelper::RgbToHex
Defined in:
lib/premailer/adapter/nokogiri_fast.rb

Overview

NokogiriFast adapter

Constant Summary collapse

WIDTH_AND_HEIGHT =
['width', 'height'].freeze

Instance Method Summary collapse

Methods included from AdapterHelper::RgbToHex

#ensure_hex, #is_rgb?, #to_hex

Instance Method Details

#load_html(input) ⇒ ::Nokogiri::XML

Load the HTML file and convert it into an Nokogiri document.

Returns:

  • (::Nokogiri::XML)

    a document.



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/premailer/adapter/nokogiri_fast.rb', line 224

def load_html(input) # :nodoc:
  thing = nil

  # TODO: duplicate options
  if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
    thing = input
  elsif @is_local_file
    @base_dir = File.dirname(input)
    thing = File.open(input, 'r')
  else
    thing = URI.open(input)
  end

  if thing.respond_to?(:read)
    thing = thing.read
  end

  return nil unless thing
  doc = nil

  # Handle HTML entities
  if @options[:replace_html_entities] == true and thing.is_a?(String)
    HTML_ENTITIES.map do |entity, replacement|
      thing.gsub! entity, replacement
    end
  end
  encoding = @options[:input_encoding] || (RUBY_PLATFORM == 'java' ? nil : 'BINARY')
  doc = if @options[:html_fragment]
    ::Nokogiri::HTML.fragment(thing, encoding)
  else
    ::Nokogiri::HTML(thing, nil, encoding) { |c| c.recover }
  end

  # Fix for removing any CDATA tags from both style and script tags inserted per
  # https://github.com/sparklemotion/nokogiri/issues/311 and
  # https://github.com/premailer/premailer/issues/199
  ['style', 'script'].each do |tag|
    doc.search(tag).children.each do |child|
      child.swap(child.text()) if child.cdata?
    end
  end

  doc
end

#to_inline_cssString

Merge CSS into the HTML document.

Returns:

  • (String)

    an HTML.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/premailer/adapter/nokogiri_fast.rb', line 14

def to_inline_css
  doc = @processed_doc
  @unmergable_rules = CssParser::Parser.new

  # Give all styles already in style attributes a specificity of 1000
  # per http://www.w3.org/TR/CSS21/cascade.html#specificity
  doc.search("*[@style]").each do |el|
    el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
  end

  # Create an index for nodes by tag name/id/class
  # Also precompute the map of nodes to descendants
  index, all_nodes, descendants = make_index(doc)

  # Iterate through the rules and merge them into the HTML
  @css_parser.each_selector(:all) do |selector, declaration, specificity, media_types|

    # Save un-mergable rules separately
    selector.gsub!(/:link([\s]*)+/i) { |m| $1 }

    # Convert element names to lower case
    selector.gsub!(/([\s]|^)([\w]+)/) { |m| $1.to_s + $2.to_s.downcase }

    if Premailer.is_media_query?(media_types) || selector =~ Premailer::RE_UNMERGABLE_SELECTORS
      @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selectors: selector, block: declaration), media_types) unless @options[:preserve_styles]
    else
      begin
        if Premailer::RE_RESET_SELECTORS.match?(selector)
          # this is in place to preserve the MailChimp CSS reset: http://github.com/mailchimp/Email-Blueprints/
          # however, this doesn't mean for testing pur
          @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selectors: selector, block: declaration)) unless !@options[:preserve_reset]
        end

        # Try the new index based technique. If not supported, fall back to the old brute force one.
        nodes = match_selector(index, all_nodes, descendants, selector) || doc.search(selector)
        nodes.each do |el|
          if el.elem? and (el.name != 'head' and el.parent.name != 'head')
            # Add a style attribute or append to the existing one
            block = "[SPEC=#{specificity}[#{declaration}]]"
            el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
          end
        end
      rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
        $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
        next
      end
    end
  end

  # Remove script tags
  doc.search("script").remove if @options[:remove_scripts]

  # Read STYLE attributes and perform folding
  doc.search("*[@style]").each do |el|
    style = el.attributes['style'].to_s

    declarations = []
    style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/m).each do |declaration|
      begin
        rs = CssParser::RuleSet.new(block: declaration[1].to_s, specificity: declaration[0].to_i)
        declarations << rs
      rescue ArgumentError => e
        raise e if @options[:rule_set_exceptions]
      end
    end

    # Perform style folding
    merged = CssParser.merge(declarations)
    begin
      merged.expand_shorthand!
    rescue ArgumentError => e
      raise e if @options[:rule_set_exceptions]
    end

    # Duplicate CSS attributes as HTML attributes
    if Premailer::RELATED_ATTRIBUTES.has_key?(el.name) && @options[:css_to_attributes]
      Premailer::RELATED_ATTRIBUTES[el.name].each do |css_attr, html_attr|
        if el[html_attr].nil? and not merged[css_attr].empty?
          new_val = merged[css_attr].dup

          # Remove url() function wrapper
          new_val.gsub!(/url\((['"])(.*?)\1\)/, '\2')

          # Remove !important, trailing semi-colon, and leading/trailing whitespace
          new_val.gsub!(/;$|\s*!important/, '').strip!

          # For width and height tags, remove px units
          new_val.gsub!(/(\d+)px/, '\1') if WIDTH_AND_HEIGHT.include?(html_attr)

          # For color-related tags, convert RGB to hex if specified by options
          new_val = ensure_hex(new_val) if css_attr.end_with?('color') && @options[:rgb_to_hex_attributes]

          el[html_attr] = new_val
        end

        unless @options[:preserve_style_attribute]
          merged.instance_variable_get(:@declarations).tap do |declarations|
            declarations.delete(css_attr)
          end
        end
      end
    end

    # Collapse multiple rules into one as much as possible.
    merged.create_shorthand! if @options[:create_shorthands]

    # write the inline STYLE attribute
    el['style'] = merged.declarations_to_s
  end

  doc = write_unmergable_css_rules(doc, @unmergable_rules) unless @options[:drop_unmergeable_css_rules]

  if @options[:remove_classes] or @options[:remove_comments]
    doc.traverse do |el|
      if el.comment? and @options[:remove_comments]
        el.remove
      elsif el.element?
        el.remove_attribute('class') if @options[:remove_classes]
      end
    end
  end

  if @options[:remove_ids]
    # find all anchor's targets and hash them
    targets = []
    doc.search("a[@href^='#']").each do |el|
      target = el.get_attribute('href')[1..-1]
      targets << target
      el.set_attribute('href', "#" + Digest::SHA256.hexdigest(target))
    end
    # hash ids that are links target, delete others
    doc.search("*[@id]").each do |el|
      id = el.get_attribute('id')
      if targets.include?(id)
        el.set_attribute('id', Digest::SHA256.hexdigest(id))
      else
        el.remove_attribute('id')
      end
    end
  end

  if @options[:reset_contenteditable]
    doc.search('*[@contenteditable]').each do |el|
      el.remove_attribute('contenteditable')
    end
  end

  @processed_doc = doc
  if is_xhtml?
    # we don't want to encode carriage returns
    @processed_doc.to_xhtml(:encoding => @options[:output_encoding]).gsub(/&\#(xD|13);/i, "\r")
  else
    @processed_doc.to_html(:encoding => @options[:output_encoding])
  end
end

#to_plain_textString

Converts the HTML document to a format suitable for plain-text e-mail.

If present, uses the <body> element as its base; otherwise uses the whole document.

Returns:

  • (String)

    a plain text.



200
201
202
203
204
205
206
207
208
209
# File 'lib/premailer/adapter/nokogiri_fast.rb', line 200

def to_plain_text
  html_src = ''
  begin
    html_src = @doc.at("body").inner_html
  rescue;
  end

  html_src = @doc.to_html unless html_src and not html_src.empty?
  convert_to_text(html_src, @options[:line_length], @html_encoding)
end

#to_sString

Gets the original HTML as a string.

Returns:

  • (String)

    HTML.



213
214
215
216
217
218
219
# File 'lib/premailer/adapter/nokogiri_fast.rb', line 213

def to_s
  if is_xhtml?
    @doc.to_xhtml(:encoding => nil)
  else
    @doc.to_html(:encoding => nil)
  end
end

#write_unmergable_css_rules(doc, unmergable_rules) ⇒ ::Nokogiri::XML

Create a style element with un-mergable rules (e.g. :hover) and write it into the head.

doc is an Nokogiri document and unmergable_css_rules is a Css::RuleSet.

Returns:

  • (::Nokogiri::XML)

    a document.



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/premailer/adapter/nokogiri_fast.rb', line 176

def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
  styles = unmergable_rules.to_s
  unless styles.empty?
    if @options[:html_fragment]
      style_tag = ::Nokogiri::XML::Node.new("style", doc)
      style_tag.content = styles
      doc.add_child(style_tag)
    else
      style_tag = doc.create_element "style", styles
      head = doc.at_css('head')
      head ||= doc.root.first_element_child.add_previous_sibling(doc.create_element "head") if doc.root && doc.root.first_element_child
      head ||= doc.add_child(doc.create_element "head")
      head << style_tag
    end
  end
  doc
end