Class: TextUtils::HtmlSanitizer
- Defined in:
- lib/text_utils/html_sanitizer.rb
Constant Summary collapse
- RELAXED =
{ elements: [ 'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 'small', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u', 'ul', 'div', 'font', 'span', 'iframe'], attributes: { :all => ['class', 'style'], 'a' => ['href', 'title', 'rel'], 'blockquote' => ['cite'], 'col' => ['span', 'width'], 'colgroup' => ['span', 'width'], 'img' => ['align', 'alt', 'height', 'src', 'title', 'width'], 'ol' => ['start', 'type'], 'q' => ['cite'], 'table' => ['summary', 'width'], 'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'], 'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope', 'width'], 'ul' => ['type'], 'code' => ['lang', 'language'], 'iframe' => ['height', 'scrolling', 'src', 'width'] }, protocols: { 'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]}, 'blockquote' => {'cite' => ['http', 'https', :relative]}, 'img' => {'src' => ['http', 'https', :relative]}, 'q' => {'cite' => ['http', 'https', :relative]} } }
- VIDEO_URLS =
[ /^http:\/\/(?:www\.)?youtube\.com\/v\//, ]
- EMBEDDED_VIDEO =
lambda do |env| node = env[:node] node_name = node.name.to_s.downcase parent = node.parent # Since the transformer receives the deepest nodes first, we look for a # <param> element or an <embed> element whose parent is an <object>. return nil unless (node_name == 'param' || node_name == 'embed') && parent.name.to_s.downcase == 'object' if node_name == 'param' # Quick XPath search to find the <param> node that contains the video URL. return nil unless movie_node = parent.search('param[@name="movie"]')[0] url = movie_node['value'] else # Since this is an <embed>, the video URL is in the "src" attribute. No # extra work needed. url = node['src'] end # # Verify that the video URL is actually a valid YouTube video URL. return nil unless VIDEO_URLS.any?{|t| url =~ t} # # We're now certain that this is a YouTube embed, but we still need to run # # it through a special Sanitize step to ensure that no unwanted elements or # # attributes that don't belong in a YouTube embed can sneak in. Sanitize.clean_node!(parent, { :elements => ['embed', 'object', 'param'], attributes: { 'embed' => ['allowfullscreen', 'allowscriptaccess', 'height', 'src', 'type', 'width'], 'object' => ['height', 'width'], 'param' => ['name', 'value'] } }) # Now that we're sure that this is a valid YouTube embed and that there are # no unwanted elements or attributes hidden inside it, we can tell Sanitize # to whitelist the current node (<param> or <embed>) and its parent # (<object>). {:whitelist_nodes => [node, parent]} end
Instance Method Summary collapse
Methods inherited from Processor
Constructor Details
This class inherits a constructor from TextUtils::Processor
Instance Method Details
#call(data, env) ⇒ Object
81 82 83 84 85 86 87 88 89 90 |
# File 'lib/text_utils/html_sanitizer.rb', line 81 def call data, env data = call_next data, env Sanitize.clean(data, RELAXED.merge( transformers: [EMBEDDED_VIDEO], :add_attributes => { all: [:class] } )) end |