Class: Zumobi::RemovingSanitize
- Inherits:
-
Object
- Object
- Zumobi::RemovingSanitize
- Defined in:
- lib/zumobi/removing_sanitize.rb
Defined Under Namespace
Modules: Config
Class Method Summary collapse
Class Method Details
.clean(html, config = RemovingSanitize::Config::ZUMOBI) ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/zumobi/removing_sanitize.rb', line 126 def self.clean(html, config = RemovingSanitize::Config::ZUMOBI) unless html.blank? # Remove CDATA escaping: sanitize converts this to <[CDATA[ ... ]]> which is visible to user. html.gsub!(/<!\[CDATA\[/,'') html.gsub!(/\]\]>/,'') # In one feed Nokogiri eats too much HTML when parsing it due to the present of a comment sequence. # So here we resort to stripping it out with a regular expression first. ! html.gsub!(/<!--[^-]*-->/,"") # Decode HTML entities. coder = HTMLEntities.new html = coder.decode(html) # Decode HTML that is escaped, e.g. "<div>test</div>" html = CGI::unescapeHTML(html) unless (html.blank?) unless (config[:removals].nil?) doc = Nokogiri::HTML.fragment "<div>#{html}</div>" config[:removals].each do |removal| doc.search(removal).each do |element| element.remove end end # The original does not work on plain text, at the least. Methods in EntryDecorator # like text(true) and plaintext could return content with a wrapping div element. # html = doc.children[0].to_html html = doc.children[0].children.map { |node| node.to_html }.join('') end html = Sanitize.clean(html, config) end end html end |