Class: HTMLPipeline

Inherits:
Object
  • Object
show all
Defined in:
lib/html_pipeline.rb,
lib/html_pipeline/filter.rb,
lib/html_pipeline/version.rb,
lib/html_pipeline/node_filter.rb,
lib/html_pipeline/text_filter.rb,
lib/html_pipeline/convert_filter.rb,
lib/html_pipeline/sanitization_filter.rb,
lib/html_pipeline/node_filter/emoji_filter.rb,
lib/html_pipeline/node_filter/https_filter.rb,
lib/html_pipeline/text_filter/image_filter.rb,
lib/html_pipeline/node_filter/mention_filter.rb,
lib/html_pipeline/convert_filter/markdown_filter.rb,
lib/html_pipeline/node_filter/asset_proxy_filter.rb,
lib/html_pipeline/node_filter/team_mention_filter.rb,
lib/html_pipeline/node_filter/absolute_source_filter.rb,
lib/html_pipeline/node_filter/image_max_width_filter.rb,
lib/html_pipeline/node_filter/syntax_highlight_filter.rb,
lib/html_pipeline/text_filter/plain_text_input_filter.rb,
lib/html_pipeline/node_filter/table_of_contents_filter.rb

Defined Under Namespace

Classes: ConvertFilter, Filter, InvalidFilterError, MissingDependencyError, NodeFilter, SanitizationFilter, TextFilter

Constant Summary collapse

VERSION =
"3.2.2"

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash) ⇒ HTMLPipeline

Returns a new instance of HTMLPipeline.

Raises:

  • (ArgumentError)


114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/html_pipeline.rb', line 114

def initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash)
  raise ArgumentError, "default_context cannot be nil" if default_context.nil?

  @text_filters = text_filters.flatten.freeze || []
  validate_filters(@text_filters, HTMLPipeline::TextFilter)

  @node_filters = node_filters.flatten.freeze || []
  validate_filters(@node_filters, HTMLPipeline::NodeFilter)

  @convert_filter = convert_filter

  if @convert_filter.nil? && (!@text_filters.empty? && !@node_filters.empty?)
    raise InvalidFilterError, "Must provide `convert_filter` if `text_filters` and `node_filters` are also provided"
  elsif !@convert_filter.nil?
    validate_filter(@convert_filter, HTMLPipeline::ConvertFilter)
  end

  @sanitization_config = sanitization_config.nil? ? nil : Selma::Sanitizer.new(sanitization_config)

  @default_context = default_context.freeze
  @instrumentation_service = self.class.default_instrumentation_service
end

Class Attribute Details

.default_instrumentation_serviceObject

Public: Default instrumentation service for new pipeline objects.



111
112
113
# File 'lib/html_pipeline.rb', line 111

def default_instrumentation_service
  @default_instrumentation_service
end

Instance Attribute Details

#instrumentation_nameObject



103
104
105
106
107
# File 'lib/html_pipeline.rb', line 103

def instrumentation_name
  return @instrumentation_name if defined?(@instrumentation_name)

  @instrumentation_name = self.class.name
end

#instrumentation_serviceObject

Public: Instrumentation service for the pipeline. Set an ActiveSupport::Notifications compatible object to enable.



98
99
100
# File 'lib/html_pipeline.rb', line 98

def instrumentation_service
  @instrumentation_service
end

#node_filtersObject (readonly)

Public: Returns an Array of Filter objects for this Pipeline.



91
92
93
# File 'lib/html_pipeline.rb', line 91

def node_filters
  @node_filters
end

#sanitization_configObject (readonly)

Public: A hash representing the sanitization configuration settings



94
95
96
# File 'lib/html_pipeline.rb', line 94

def sanitization_config
  @sanitization_config
end

#text_filtersObject (readonly)

Public: Returns an Array of Filter objects for this Pipeline.



91
92
93
# File 'lib/html_pipeline.rb', line 91

def text_filters
  @text_filters
end

Class Method Details

.define_dependency_loaded_method(name, value) ⇒ Object



86
87
88
# File 'lib/html_pipeline.rb', line 86

def define_dependency_loaded_method(name, value)
  self.class.define_method(:"#{name}_loaded?", -> { value })
end

.optional_dependency(name, requirer) ⇒ Object



45
46
47
48
# File 'lib/html_pipeline.rb', line 45

def optional_dependency(name, requirer)
  require name
rescue LoadError # rubocop:disable Lint/SuppressedException:
end

.require_dependencies(names, requirer) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/html_pipeline.rb', line 57

def require_dependencies(names, requirer)
  dependency_list = names.dup
  loaded = false

  while !loaded && names.length > 1
    name = names.shift

    begin
      require_dependency(name, requirer)
      loaded = true # we got a dependency
      define_dependency_loaded_method(name, true)
    # try the next dependency
    rescue MissingDependencyError
      define_dependency_loaded_method(name, false)
    end
  end

  return if loaded

  begin
    name = names.shift
    require name
    define_dependency_loaded_method(name, true)
  rescue LoadError => e
    raise MissingDependencyError,
      "Missing all dependencies '#{dependency_list.join(", ")}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}"
  end
end

.require_dependency(name, requirer) ⇒ Object



50
51
52
53
54
55
# File 'lib/html_pipeline.rb', line 50

def require_dependency(name, requirer)
  require name
rescue LoadError => e
  raise MissingDependencyError,
    "Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}"
end

Instance Method Details

#call(text, context: {}, result: {}) ⇒ Object

Apply all filters in the pipeline to the given HTML.

html - A UTF-8 String comprised of HTML. context - The context hash passed to each filter. See the Filter docs

for more info on possible values. This object MUST NOT be modified
in place by filters.  Use the Result for passing state back.

result - The result Hash passed to each filter for modification. This

is where Filters store extracted information from the content.

Returns the result Hash after being filtered by this Pipeline. Contains an :output key with the String HTML markup based on the output of the last filter in the pipeline.



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/html_pipeline.rb', line 149

def call(text, context: {}, result: {})
  context = @default_context.merge(context)
  context = context.freeze
  result ||= {}

  if @text_filters.any?
    payload = default_payload({
      text_filters: @text_filters.map { |f| f.class.name },
      context: context,
      result: result,
    })
    instrument("call_text_filters.html_pipeline", payload) do
      result[:output] =
        @text_filters.inject(text) do |doc, filter|
          perform_filter(filter, doc, context: (filter.context || {}).merge(context), result: result)
        end
    end
  end

  text = result[:output] || text

  html = if @convert_filter.nil?
    text
  else
    instrument("call_convert_filter.html_pipeline", payload) do
      html = @convert_filter.call(text, context: (@convert_filter.context || {}).merge(context))
    end
  end

  rewriter_options = {
    memory: {
      max_allowed_memory_usage: 5242880, # arbitrary limit of 5MB
    },
  }

  if @node_filters.empty?
    instrument("sanitization.html_pipeline", payload) do
      result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, options: rewriter_options).rewrite(html)
    end
  else
    instrument("call_node_filters.html_pipeline", payload) do
      @node_filters.each { |filter| filter.context = (filter.context || {}).merge(context) }
      result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: rewriter_options).rewrite(html)
      payload = default_payload({
        node_filters: @node_filters.map { |f| f.class.name },
        context: context,
        result: result,
      })
    end
  end

  result = result.merge(@node_filters.collect(&:result).reduce({}, :merge))
  @node_filters.each(&:reset!)

  result
end

#default_payload(payload = {}) ⇒ Object

Internal: Default payload for instrumentation.

Accepts a Hash of additional payload data to be merged.

Returns a Hash.



259
260
261
# File 'lib/html_pipeline.rb', line 259

def default_payload(payload = {})
  { pipeline: instrumentation_name }.merge(payload)
end

#instrument(event, payload = {}, &block) ⇒ Object

Internal: if the ‘instrumentation_service` object is set, instruments the block, otherwise the block is ran without instrumentation.

Returns the result of the provided block.



247
248
249
250
251
252
# File 'lib/html_pipeline.rb', line 247

def instrument(event, payload = {}, &block)
  payload ||= default_payload
  return yield(payload) unless instrumentation_service

  instrumentation_service.instrument(event, payload, &block)
end

#perform_filter(filter, doc, context: {}, result: {}) ⇒ Object

Internal: Applies a specific filter to the supplied doc.

The filter is instrumented.

Returns the result of the filter.



211
212
213
214
215
216
217
218
219
220
221
# File 'lib/html_pipeline.rb', line 211

def perform_filter(filter, doc, context: {}, result: {})
  payload = default_payload({
    filter: filter.class.name,
    context: context,
    result: result,
  })

  instrument("call_filter.html_pipeline", payload) do
    filter.call(doc, context: context, result: result)
  end
end

#setup_instrumentation(name, service: nil) ⇒ Object

Public: setup instrumentation for this pipeline.

Returns nothing.



237
238
239
240
241
# File 'lib/html_pipeline.rb', line 237

def setup_instrumentation(name, service: nil)
  self.instrumentation_name = name
  self.instrumentation_service =
    service || self.class.default_instrumentation_service
end

#to_html(input, context: {}, result: {}) ⇒ Object

Like call but guarantee the value returned is a string of HTML markup.



224
225
226
227
228
229
230
231
232
# File 'lib/html_pipeline.rb', line 224

def to_html(input, context: {}, result: {})
  result = call(input, context: context, result: result)
  output = result[:output]
  if output.respond_to?(:to_html)
    output.to_html
  else
    output.to_s
  end
end