Class: Govspeak::HtmlSanitizer

Inherits:
Object
  • Object
show all
Defined in:
lib/govspeak/html_sanitizer.rb

Defined Under Namespace

Classes: ImageSourceWhitelister, TableCellTextAlignWhitelister

Instance Method Summary collapse

Constructor Details

#initialize(dirty_html, options = {}) ⇒ HtmlSanitizer

Returns a new instance of HtmlSanitizer.



38
39
40
41
# File 'lib/govspeak/html_sanitizer.rb', line 38

def initialize(dirty_html, options = {})
  @dirty_html = dirty_html
  @allowed_image_hosts = options[:allowed_image_hosts]
end

Instance Method Details

#sanitize(allowed_elements: []) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/govspeak/html_sanitizer.rb', line 43

def sanitize(allowed_elements: [])
  transformers = [TableCellTextAlignWhitelister.new]
  if @allowed_image_hosts && @allowed_image_hosts.any?
    transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
  end

  # It would be cleaner to move this `transformers` key into the `sanitize_config` method rather
  # than having to use Sanitize::Config.merge() twice in succession. However, `sanitize_config`
  # is a public method and it looks like other projects depend on it behaving the way it
  # currently does – i.e. to return Sanitize config without any transformers.
  # e.g. https://github.com/alphagov/hmrc-manuals-api/blob/4a83f78d0bb839520155623fd9b63b3b12a3b13a/app/validators/no_dangerous_html_in_text_fields_validator.rb#L44
  config_with_transformers = Sanitize::Config.merge(
    sanitize_config(allowed_elements: allowed_elements),
    transformers: transformers,
  )

  Sanitize.clean(@dirty_html, config_with_transformers)
end

#sanitize_config(allowed_elements: []) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/govspeak/html_sanitizer.rb', line 62

def sanitize_config(allowed_elements: [])
  Sanitize::Config.merge(
    Sanitize::Config::RELAXED,
    elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements),
    attributes: {
      :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label],
      "a" => Sanitize::Config::RELAXED[:attributes]["a"] + [:data] + %w[draggable],
      "svg" => Sanitize::Config::RELAXED[:attributes][:all] + %w[xmlns width height viewbox focusable],
      "path" => Sanitize::Config::RELAXED[:attributes][:all] + %w[fill d],
      "div" => [:data],
      "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
      "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
      "govspeak-embed-attachment" => %w[content-id],
    },
  )
end