Class: Whitewash

Inherits:
Object
  • Object
show all
Defined in:
lib/whitewash.rb

Constant Summary collapse

CSS =
Regexp.new(%r{
  \A\s*
  ([-a-z0-9]+) : \s*
  (?: (?: [-./a-z0-9]+ | \#[0-9a-f]+ | [0-9]+% ) \s* ) +
  \s*\z
}xi).freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(whitelist = Whitewash.default_whitelist) ⇒ Whitewash

whitelist is expected to be loaded from xhtml.yaml.



48
49
50
# File 'lib/whitewash.rb', line 48

def initialize(whitelist = Whitewash.default_whitelist)
  @whitelist = whitelist
end

Instance Attribute Details

#xhtmlObject (readonly)

Returns the value of attribute xhtml.



52
53
54
# File 'lib/whitewash.rb', line 52

def xhtml
  @xhtml
end

Class Method Details

.default_whitelistObject



39
40
41
42
43
44
# File 'lib/whitewash.rb', line 39

def Whitewash.default_whitelist
  unless found = PATH.find {|dir| File.readable?(File.join(dir, WHITELIST)) }
    raise RuntimeError, "Can't find default whitelist"
  end
  File.open(File.join(found, WHITELIST)) {|f| Whitewash.load(f.read.untaint) }
end

.load(string) ⇒ Object

use Syck to parse the whitelist until Psych issue #36 is fixed



28
29
30
# File 'lib/whitewash.rb', line 28

def Whitewash.load(string)
  YAML.load(string)
end

Instance Method Details

#check_style(whitelist, style) ⇒ Object



61
62
63
64
65
66
67
68
# File 'lib/whitewash.rb', line 61

def check_style(whitelist, style)
  css = whitelist['_css'] or return true
  style.split(';').each do |s|
    return false unless
      s =~ CSS and css.include? $1
  end
  true
end

#sanitize(html, whitelist = @whitelist, &p) ⇒ Object

Return sanitized HTML.

If block is supplied, it will be invoked for each Nokogiri::XML::Element in the sanitized HTML.



112
113
114
115
116
117
118
119
# File 'lib/whitewash.rb', line 112

def sanitize(html, whitelist = @whitelist, &p)
  xml = Nokogiri::HTML(html) {|config| config.noblanks }
  xml = xml.xpath('//html/body').first
  return '' if xml.nil?

  sanitize_element(xml, whitelist, &p)
  xml.children.to_xhtml
end

#sanitize_element(xml, whitelist = @whitelist, &p) ⇒ Object

compare elements and attributes with the whitelist



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/whitewash.rb', line 72

def sanitize_element(xml, whitelist = @whitelist, &p)
  if xml.name =~ /^_/ or not whitelist.keys.include?(xml.name)
    xml.element_children.each {|e| sanitize_element(e, whitelist, &p) }
    xml.replace(xml.children)
    return
  end

  # sanitize CSS in <style> elements
  if 'style' == xml.name and not check_style(whitelist, xml.content)
    xml.remove
    return
  end

  xml.attribute_nodes.each do |a|
    attrs ||= whitelist['_common'].merge((whitelist[xml.name] or {}))
    unless attrs[a.name] === a.to_s
      xml.remove_attribute(a.name)
      next
    end

    # sanitize CSS in style="" attributes
    if 'style' == a.name and not check_style(whitelist, a.value)
      xml.remove_attribute(a.name)
      next
    end
  end

  # recurse
  xml.element_children.each {|e| sanitize_element(e, whitelist, &p) }

  if block_given?
    yield xml
  end
end