Class: Govspeak::HtmlSanitizer

Inherits:
Object
  • Object
show all
Defined in:
lib/govspeak/html_sanitizer.rb

Defined Under Namespace

Classes: ImageSourceWhitelister

Instance Method Summary collapse

Constructor Details

#initialize(dirty_html, options = {}) ⇒ HtmlSanitizer

Returns a new instance of HtmlSanitizer.



20
21
22
23
# File 'lib/govspeak/html_sanitizer.rb', line 20

def initialize(dirty_html, options = {})
  @dirty_html = dirty_html
  @allowed_image_hosts = options[:allowed_image_hosts]
end

Instance Method Details

#sanitize(allowed_elements: []) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/govspeak/html_sanitizer.rb', line 25

def sanitize(allowed_elements: [])
  transformers = []
  if @allowed_image_hosts && @allowed_image_hosts.any?
    transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
  end

  # It would be cleaner to move this `transformers` key into the `sanitize_config` method rather
  # than having to use Sanitize::Config.merge() twice in succession. However, `sanitize_config`
  # is a public method and it looks like other projects depend on it behaving the way it
  # currently does – i.e. to return Sanitize config without any transformers.
  # e.g. https://github.com/alphagov/hmrc-manuals-api/blob/4a83f78d0bb839520155623fd9b63b3b12a3b13a/app/validators/no_dangerous_html_in_text_fields_validator.rb#L44
  config_with_transformers = Sanitize::Config.merge(
    sanitize_config(allowed_elements:),
    transformers:,
  )

  Sanitize.clean(@dirty_html, config_with_transformers)
end

#sanitize_config(allowed_elements: []) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/govspeak/html_sanitizer.rb', line 44

def sanitize_config(allowed_elements: [])
  # We purposefully disable style elements which Sanitize::Config::RELAXED allows
  elements = Sanitize::Config::RELAXED[:elements] - %w[style] +
    %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements)

  Sanitize::Config.merge(
    Sanitize::Config::RELAXED,
    elements:,
    attributes: {
      # We purposefully disable style attributes which Sanitize::Config::RELAXED allows
      :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label] - %w[style],
      "a" => Sanitize::Config::RELAXED[:attributes]["a"] + [:data] + %w[draggable],
      "svg" => %w[xmlns width height viewbox focusable],
      "path" => %w[fill d],
      "div" => [:data],
      # The style attributes are permitted here just for the ones Kramdown for table alignment
      # we replace them in a post processor.
      "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
      "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
      "govspeak-embed-attachment" => %w[content-id],
    },
    # The only styling we permit is text-align on table cells (which is the CSS kramdown
    # generates), we can therefore only allow this one CSS property
    css: { properties: %w[text-align] },
  )
end