Module: Loofah::HTML5::Scrub
- Defined in:
- lib/loofah/html5/scrub.rb
Constant Summary
- CONTROL_CHARACTERS =
if RUBY_VERSION =~ /^1\.8/ /`|[\000-\040\177\s]+|\302[\200-\240]/ else /[`\u0000-\u0020\u007F\s\u0080-\u0101]/ end
Class Method Summary (collapse)
- + (Boolean) allowed_element?(element_name)
-
+ (Object) scrub_attributes(node)
alternative implementation of the html5lib attribute scrubbing algorithm.
-
+ (Object) scrub_css(style)
lifted nearly verbatim from html5lib.
Class Method Details
+ (Boolean) allowed_element?(element_name)
17 18 19 |
# File 'lib/loofah/html5/scrub.rb', line 17 def allowed_element? element_name ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name end |
+ (Object) scrub_attributes(node)
alternative implementation of the html5lib attribute scrubbing algorithm
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/loofah/html5/scrub.rb', line 22 def scrub_attributes node node.attribute_nodes.each do |attr_node| attr_name = if attr_node.namespace "#{attr_node.namespace.prefix}:#{attr_node.node_name}" else attr_node.node_name end unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name) attr_node.remove next end if WhiteList::ATTR_VAL_IS_URI.include?(attr_name) # this block lifted nearly verbatim from HTML5 sanitization val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) attr_node.remove next end end if WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value end if WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove next end end if node.attributes['style'] node['style'] = scrub_css node.attributes['style'] end end |
+ (Object) scrub_css(style)
lifted nearly verbatim from html5lib
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/loofah/html5/scrub.rb', line 55 def scrub_css style # disallow urls style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') # gauntlet return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/ clean = [] style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val| next if val.empty? prop.downcase! if WhiteList::ALLOWED_CSS_PROPERTIES.include?(prop) clean << "#{prop}: #{val};" elsif %w[background border margin padding].include?(prop.split('-')[0]) clean << "#{prop}: #{val};" unless val.split().any? do |keyword| WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) && keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ end elsif WhiteList::ALLOWED_SVG_PROPERTIES.include?(prop) clean << "#{prop}: #{val};" end end style = clean.join(' ') end |