Class: HTMLPipeline
- Inherits:
-
Object
- Object
- HTMLPipeline
- Defined in:
- lib/html_pipeline.rb,
lib/html_pipeline/filter.rb,
lib/html_pipeline/version.rb,
lib/html_pipeline/node_filter.rb,
lib/html_pipeline/text_filter.rb,
lib/html_pipeline/convert_filter.rb,
lib/html_pipeline/sanitization_filter.rb,
lib/html_pipeline/node_filter/emoji_filter.rb,
lib/html_pipeline/node_filter/https_filter.rb,
lib/html_pipeline/text_filter/image_filter.rb,
lib/html_pipeline/node_filter/mention_filter.rb,
lib/html_pipeline/convert_filter/markdown_filter.rb,
lib/html_pipeline/node_filter/asset_proxy_filter.rb,
lib/html_pipeline/node_filter/team_mention_filter.rb,
lib/html_pipeline/node_filter/absolute_source_filter.rb,
lib/html_pipeline/node_filter/image_max_width_filter.rb,
lib/html_pipeline/node_filter/syntax_highlight_filter.rb,
lib/html_pipeline/text_filter/plain_text_input_filter.rb,
lib/html_pipeline/node_filter/table_of_contents_filter.rb
Defined Under Namespace
Classes: ConvertFilter, Filter, InvalidFilterError, MissingDependencyError, NodeFilter, SanitizationFilter, TextFilter
Constant Summary collapse
- VERSION =
"3.2.1"
Class Attribute Summary collapse
-
.default_instrumentation_service ⇒ Object
Public: Default instrumentation service for new pipeline objects.
Instance Attribute Summary collapse
- #instrumentation_name ⇒ Object
-
#instrumentation_service ⇒ Object
Public: Instrumentation service for the pipeline.
-
#node_filters ⇒ Object
readonly
Public: Returns an Array of Filter objects for this Pipeline.
-
#sanitization_config ⇒ Object
readonly
Public: A hash representing the sanitization configuration settings.
-
#text_filters ⇒ Object
readonly
Public: Returns an Array of Filter objects for this Pipeline.
Class Method Summary collapse
- .define_dependency_loaded_method(name, value) ⇒ Object
- .optional_dependency(name, requirer) ⇒ Object
- .require_dependencies(names, requirer) ⇒ Object
- .require_dependency(name, requirer) ⇒ Object
Instance Method Summary collapse
-
#call(text, context: {}, result: {}) ⇒ Object
Apply all filters in the pipeline to the given HTML.
-
#default_payload(payload = {}) ⇒ Object
Internal: Default payload for instrumentation.
-
#initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash) ⇒ HTMLPipeline
constructor
A new instance of HTMLPipeline.
-
#instrument(event, payload = {}, &block) ⇒ Object
Internal: if the ‘instrumentation_service` object is set, instruments the block, otherwise the block is ran without instrumentation.
-
#perform_filter(filter, doc, context: {}, result: {}) ⇒ Object
Internal: Applies a specific filter to the supplied doc.
-
#setup_instrumentation(name, service: nil) ⇒ Object
Public: setup instrumentation for this pipeline.
-
#to_html(input, context: {}, result: {}) ⇒ Object
Like call but guarantee the value returned is a string of HTML markup.
Constructor Details
#initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash) ⇒ HTMLPipeline
Returns a new instance of HTMLPipeline.
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/html_pipeline.rb', line 114 def initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash) raise ArgumentError, "default_context cannot be nil" if default_context.nil? @text_filters = text_filters.flatten.freeze || [] validate_filters(@text_filters, HTMLPipeline::TextFilter) @node_filters = node_filters.flatten.freeze || [] validate_filters(@node_filters, HTMLPipeline::NodeFilter) @convert_filter = convert_filter if @convert_filter.nil? && (!@text_filters.empty? && !@node_filters.empty?) raise InvalidFilterError, "Must provide `convert_filter` if `text_filters` and `node_filters` are also provided" elsif !@convert_filter.nil? validate_filter(@convert_filter, HTMLPipeline::ConvertFilter) end @sanitization_config = sanitization_config.nil? ? nil : Selma::Sanitizer.new(sanitization_config) @default_context = default_context.freeze @instrumentation_service = self.class.default_instrumentation_service end |
Class Attribute Details
.default_instrumentation_service ⇒ Object
Public: Default instrumentation service for new pipeline objects.
111 112 113 |
# File 'lib/html_pipeline.rb', line 111 def default_instrumentation_service @default_instrumentation_service end |
Instance Attribute Details
#instrumentation_name ⇒ Object
103 104 105 106 107 |
# File 'lib/html_pipeline.rb', line 103 def instrumentation_name return @instrumentation_name if defined?(@instrumentation_name) @instrumentation_name = self.class.name end |
#instrumentation_service ⇒ Object
Public: Instrumentation service for the pipeline. Set an ActiveSupport::Notifications compatible object to enable.
98 99 100 |
# File 'lib/html_pipeline.rb', line 98 def instrumentation_service @instrumentation_service end |
#node_filters ⇒ Object (readonly)
Public: Returns an Array of Filter objects for this Pipeline.
91 92 93 |
# File 'lib/html_pipeline.rb', line 91 def node_filters @node_filters end |
#sanitization_config ⇒ Object (readonly)
Public: A hash representing the sanitization configuration settings
94 95 96 |
# File 'lib/html_pipeline.rb', line 94 def sanitization_config @sanitization_config end |
#text_filters ⇒ Object (readonly)
Public: Returns an Array of Filter objects for this Pipeline.
91 92 93 |
# File 'lib/html_pipeline.rb', line 91 def text_filters @text_filters end |
Class Method Details
.define_dependency_loaded_method(name, value) ⇒ Object
86 87 88 |
# File 'lib/html_pipeline.rb', line 86 def define_dependency_loaded_method(name, value) self.class.define_method(:"#{name}_loaded?", -> { value }) end |
.optional_dependency(name, requirer) ⇒ Object
45 46 47 48 |
# File 'lib/html_pipeline.rb', line 45 def optional_dependency(name, requirer) require name rescue LoadError # rubocop:disable Lint/SuppressedException: end |
.require_dependencies(names, requirer) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/html_pipeline.rb', line 57 def require_dependencies(names, requirer) dependency_list = names.dup loaded = false while !loaded && names.length > 1 name = names.shift begin require_dependency(name, requirer) loaded = true # we got a dependency define_dependency_loaded_method(name, true) # try the next dependency rescue MissingDependencyError define_dependency_loaded_method(name, false) end end return if loaded begin name = names.shift require name define_dependency_loaded_method(name, true) rescue LoadError => e raise MissingDependencyError, "Missing all dependencies '#{dependency_list.join(", ")}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" end end |
.require_dependency(name, requirer) ⇒ Object
50 51 52 53 54 55 |
# File 'lib/html_pipeline.rb', line 50 def require_dependency(name, requirer) require name rescue LoadError => e raise MissingDependencyError, "Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" end |
Instance Method Details
#call(text, context: {}, result: {}) ⇒ Object
Apply all filters in the pipeline to the given HTML.
html - A UTF-8 String comprised of HTML. context - The context hash passed to each filter. See the Filter docs
for more info on possible values. This object MUST NOT be modified
in place by filters. Use the Result for passing state back.
result - The result Hash passed to each filter for modification. This
is where Filters store extracted information from the content.
Returns the result Hash after being filtered by this Pipeline. Contains an :output key with the String HTML markup based on the output of the last filter in the pipeline.
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# File 'lib/html_pipeline.rb', line 149 def call(text, context: {}, result: {}) context = @default_context.merge(context) context = context.freeze result ||= {} if @text_filters.any? payload = default_payload({ text_filters: @text_filters.map { |f| f.class.name }, context: context, result: result, }) instrument("call_text_filters.html_pipeline", payload) do result[:output] = @text_filters.inject(text) do |doc, filter| perform_filter(filter, doc, context: (filter.context || {}).merge(context), result: result) end end end text = result[:output] || text html = if @convert_filter.nil? text else instrument("call_convert_filter.html_pipeline", payload) do html = @convert_filter.call(text, context: (@convert_filter.context || {}).merge(context)) end end = { memory: { max_allowed_memory_usage: 5242880, # arbitrary limit of 5MB }, } if @node_filters.empty? instrument("sanitization.html_pipeline", payload) do result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: ).rewrite(html) end unless @convert_filter.nil? # no html, so no sanitization else instrument("call_node_filters.html_pipeline", payload) do @node_filters.each { |filter| filter.context = (filter.context || {}).merge(context) } result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: ).rewrite(html) payload = default_payload({ node_filters: @node_filters.map { |f| f.class.name }, context: context, result: result, }) end end result = result.merge(@node_filters.collect(&:result).reduce({}, :merge)) @node_filters.each(&:reset!) result end |
#default_payload(payload = {}) ⇒ Object
Internal: Default payload for instrumentation.
Accepts a Hash of additional payload data to be merged.
Returns a Hash.
259 260 261 |
# File 'lib/html_pipeline.rb', line 259 def default_payload(payload = {}) { pipeline: instrumentation_name }.merge(payload) end |
#instrument(event, payload = {}, &block) ⇒ Object
Internal: if the ‘instrumentation_service` object is set, instruments the block, otherwise the block is ran without instrumentation.
Returns the result of the provided block.
247 248 249 250 251 252 |
# File 'lib/html_pipeline.rb', line 247 def instrument(event, payload = {}, &block) payload ||= default_payload return yield(payload) unless instrumentation_service instrumentation_service.instrument(event, payload, &block) end |
#perform_filter(filter, doc, context: {}, result: {}) ⇒ Object
Internal: Applies a specific filter to the supplied doc.
The filter is instrumented.
Returns the result of the filter.
211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/html_pipeline.rb', line 211 def perform_filter(filter, doc, context: {}, result: {}) payload = default_payload({ filter: filter.class.name, context: context, result: result, }) instrument("call_filter.html_pipeline", payload) do filter.call(doc, context: context, result: result) end end |
#setup_instrumentation(name, service: nil) ⇒ Object
Public: setup instrumentation for this pipeline.
Returns nothing.
237 238 239 240 241 |
# File 'lib/html_pipeline.rb', line 237 def setup_instrumentation(name, service: nil) self.instrumentation_name = name self.instrumentation_service = service || self.class.default_instrumentation_service end |
#to_html(input, context: {}, result: {}) ⇒ Object
Like call but guarantee the value returned is a string of HTML markup.
224 225 226 227 228 229 230 231 232 |
# File 'lib/html_pipeline.rb', line 224 def to_html(input, context: {}, result: {}) result = call(input, context: context, result: result) output = result[:output] if output.respond_to?(:to_html) output.to_html else output.to_s end end |