Class: Govspeak::HtmlSanitizer
- Inherits:
-
Object
- Object
- Govspeak::HtmlSanitizer
- Defined in:
- lib/govspeak/html_sanitizer.rb
Defined Under Namespace
Classes: ImageSourceWhitelister
Instance Method Summary collapse
-
#initialize(dirty_html, options = {}) ⇒ HtmlSanitizer
constructor
A new instance of HtmlSanitizer.
- #sanitize(allowed_elements: []) ⇒ Object
- #sanitize_config(allowed_elements: []) ⇒ Object
Constructor Details
#initialize(dirty_html, options = {}) ⇒ HtmlSanitizer
Returns a new instance of HtmlSanitizer.
20 21 22 23 |
# File 'lib/govspeak/html_sanitizer.rb', line 20 def initialize(dirty_html, = {}) @dirty_html = dirty_html @allowed_image_hosts = [:allowed_image_hosts] end |
Instance Method Details
#sanitize(allowed_elements: []) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/govspeak/html_sanitizer.rb', line 25 def sanitize(allowed_elements: []) transformers = [] if @allowed_image_hosts && @allowed_image_hosts.any? transformers << ImageSourceWhitelister.new(@allowed_image_hosts) end # It would be cleaner to move this `transformers` key into the `sanitize_config` method rather # than having to use Sanitize::Config.merge() twice in succession. However, `sanitize_config` # is a public method and it looks like other projects depend on it behaving the way it # currently does – i.e. to return Sanitize config without any transformers. # e.g. https://github.com/alphagov/hmrc-manuals-api/blob/4a83f78d0bb839520155623fd9b63b3b12a3b13a/app/validators/no_dangerous_html_in_text_fields_validator.rb#L44 config_with_transformers = Sanitize::Config.merge( sanitize_config(allowed_elements:), transformers:, ) Sanitize.clean(@dirty_html, config_with_transformers) end |
#sanitize_config(allowed_elements: []) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/govspeak/html_sanitizer.rb', line 44 def sanitize_config(allowed_elements: []) # We purposefully disable style elements which Sanitize::Config::RELAXED allows elements = Sanitize::Config::RELAXED[:elements] - %w[style] + %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements) Sanitize::Config.merge( Sanitize::Config::RELAXED, elements:, attributes: { # We purposefully disable style attributes which Sanitize::Config::RELAXED allows :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label] - %w[style], "a" => Sanitize::Config::RELAXED[:attributes]["a"] + [:data] + %w[draggable], "svg" => %w[xmlns width height viewbox focusable], "path" => %w[fill d], "div" => [:data], "span" => [:data], # The style attributes are permitted here just for the ones Kramdown for table alignment # we replace them in a post processor. "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style], "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style], "govspeak-embed-attachment" => %w[content-id], }, # The only styling we permit is text-align on table cells (which is the CSS kramdown # generates), we can therefore only allow this one CSS property css: { properties: %w[text-align] }, ) end |