Module: Asciidoctor::PDF::Sanitizer

Included in:
Pdfmark, Asciidoctor::Prawn::Extensions
Defined in:
lib/asciidoctor/pdf/sanitizer.rb

Constant Summary collapse

XMLSpecialChars =
{
  '&lt;' => '<',
  '&gt;' => '>',
  '&amp;' => '&',
}
XMLSpecialCharsRx =
/&(?:[lg]t|amp);/
InverseXMLSpecialChars =
XMLSpecialChars.invert
InverseXMLSpecialCharsRx =
/[#{InverseXMLSpecialChars.keys.join}]/
SanitizeXMLRx =
/<[^>]+>\0?/
CharRefRx =
/&(?:amp;)?(?:([a-z][a-z]+\d{0,2})|#(?:(\d\d\d{0,4})|x(\h\h\h{0,3})));/
UnescapedAmpersandRx =
/&(?!(?:[a-z][a-z]+\d{0,2}|#(?:\d\d\d{0,4}|x\h\h\h{0,3}));)/

Instance Method Summary collapse

Instance Method Details

#encode_quotes(string) ⇒ Object



49
50
51
# File 'lib/asciidoctor/pdf/sanitizer.rb', line 49

def encode_quotes string
  (string.include? '"') ? (string.gsub '"', '&quot;') : string
end

#escape_amp(string) ⇒ Object



45
46
47
# File 'lib/asciidoctor/pdf/sanitizer.rb', line 45

def escape_amp string
  string.gsub UnescapedAmpersandRx, '&amp;'
end

#escape_xml(string) ⇒ Object



37
38
39
# File 'lib/asciidoctor/pdf/sanitizer.rb', line 37

def escape_xml string
  string.gsub InverseXMLSpecialCharsRx, InverseXMLSpecialChars
end

#sanitize(string, compact: true) ⇒ Object

Strip leading, trailing and repeating whitespace, remove XML tags along with an enclosed null character, and resolve all entities in the specified string.

FIXME: move to a module so we can mix it in elsewhere FIXME: add option to control escaping entities, or a filter mechanism in general



31
32
33
34
35
# File 'lib/asciidoctor/pdf/sanitizer.rb', line 31

def sanitize string, compact: true
  string = string.gsub SanitizeXMLRx, '' if string.include? '<'
  string = string.gsub(CharRefRx) { $1 ? BuiltInNamedEntities[$1] : ([$2 ? $2.to_i : ($3.to_i 16)].pack 'U1') } if string.include? '&'
  compact ? (string.strip.tr_s ' ', ' ') : string
end

#unescape_xml(string) ⇒ Object



41
42
43
# File 'lib/asciidoctor/pdf/sanitizer.rb', line 41

def unescape_xml string
  string.gsub XMLSpecialCharsRx, XMLSpecialChars
end