Module: IsoDoc::Function::Utils
Constant Summary collapse
- DOCTYPE_HDR =
"<!DOCTYPE html SYSTEM " \ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'.freeze
- HUGESTRICT =
Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::STRICT
- CLAUSE_ANCESTOR =
".//ancestor::*[local-name() = 'annex' or " \ "local-name() = 'definitions' or " \ "local-name() = 'acknowledgements' or local-name() = 'term' or " \ "local-name() = 'appendix' or local-name() = 'foreword' or " \ "local-name() = 'introduction' or local-name() = 'terms' or " \ "local-name() = 'clause' or local-name() = 'references']/@id".freeze
- NOTE_CONTAINER_ANCESTOR =
".//ancestor::*[local-name() = 'annex' or " \ "local-name() = 'foreword' or local-name() = 'appendix' or " \ "local-name() = 'introduction' or local-name() = 'terms' or " \ "local-name() = 'acknowledgements' or local-name() = 'term' or " \ "local-name() = 'clause' or local-name() = 'references' or " \ "local-name() = 'figure' or local-name() = 'formula' or " \ "local-name() = 'table' or local-name() = 'example']/@id".freeze
- LABELLED_ANCESTOR_ELEMENTS =
%w(example requirement recommendation permission note table figure sourcecode).freeze
Instance Method Summary collapse
- #attr_code(attributes) ⇒ Object
- #cleanup_entities(text, is_xml: true) ⇒ Object
- #date_range(date) ⇒ Object
- #emf?(type) ⇒ Boolean
- #empty2nil(str) ⇒ Object
- #eps?(type) ⇒ Boolean
- #external_path(path) ⇒ Object
-
#extract_delims(text) ⇒ Object
avoid ‘; avoid {{ (Liquid Templates); avoid [[ (Javascript).
- #from_xhtml(xml) ⇒ Object
- #get_clause_id(node) ⇒ Object
-
#get_note_container_id(node, type) ⇒ Object
no recursion on references.
- #header_strip(hdr) ⇒ Object
- #header_strip_elem?(elem) ⇒ Boolean
- #image_localfile(img) ⇒ Object
- #insert_tab(out, count) ⇒ Object
- #labelled_ancestor(elem, exceptions = []) ⇒ Object
- #liquid(doc) ⇒ Object
- #noko ⇒ Object
- #ns(xpath) ⇒ Object
- #numeric_escapes(xml) ⇒ Object
- #populate_template(docxml, _format = nil) ⇒ Object
- #save_dataimage(uri, _relative_dir = true) ⇒ Object
- #save_svg(img) ⇒ Object
- #sentence_join(array) ⇒ Object
- #start_of_sentence(node) ⇒ Object
- #to_xhtml(xml) ⇒ Object
- #to_xhtml_fragment(xml) ⇒ Object
- #to_xhtml_prep(xml) ⇒ Object
- #to_xml(node) ⇒ Object
Instance Method Details
#attr_code(attributes) ⇒ Object
35 36 37 38 39 |
# File 'lib/isodoc/function/utils.rb', line 35 def attr_code(attributes) attributes.compact.transform_values do |v| v.is_a?(String) ? HTMLEntities.new.decode(v) : v end end |
#cleanup_entities(text, is_xml: true) ⇒ Object
230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/isodoc/function/utils.rb', line 230 def cleanup_entities(text, is_xml: true) c = HTMLEntities.new if is_xml text.split(/([<>])/).each_slice(4).map do |a| a[0] = c.encode(c.decode(a[0]), :hexadecimal) a end.join else c.encode(c.decode(text), :hexadecimal) end end |
#date_range(date) ⇒ Object
14 15 16 |
# File 'lib/isodoc/function/utils.rb', line 14 def date_range(date) self.class.date_range(date) end |
#emf?(type) ⇒ Boolean
221 222 223 224 |
# File 'lib/isodoc/function/utils.rb', line 221 def emf?(type) %w(application/emf application/x-emf image/x-emf image/x-mgx-emf application/x-msmetafile image/x-xbitmap image/emf).include? type end |
#empty2nil(str) ⇒ Object
161 162 163 164 |
# File 'lib/isodoc/function/utils.rb', line 161 def empty2nil(str) !str.nil? && str.is_a?(String) && str.empty? and return nil str end |
#eps?(type) ⇒ Boolean
226 227 228 |
# File 'lib/isodoc/function/utils.rb', line 226 def eps?(type) %w(application/postscript image/x-eps).include? type end |
#external_path(path) ⇒ Object
242 243 244 245 246 247 248 249 250 |
# File 'lib/isodoc/function/utils.rb', line 242 def external_path(path) win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) || (RUBY_PLATFORM =~ /mswin|mingw/)) if win path.tr!(%{/}, "\\") path[/\s/] ? "\"#{path}\"" : path else path end end |
#extract_delims(text) ⇒ Object
avoid ‘; avoid {{ (Liquid Templates); avoid [[ (Javascript)
121 122 123 124 125 126 127 128 129 |
# File 'lib/isodoc/function/utils.rb', line 121 def extract_delims(text) @openmathdelim = "(#(" @closemathdelim = ")#)" while text.include?(@openmathdelim) || text.include?(@closemathdelim) @openmathdelim += "(" @closemathdelim += ")" end [@openmathdelim, @closemathdelim] end |
#from_xhtml(xml) ⇒ Object
74 75 76 77 |
# File 'lib/isodoc/function/utils.rb', line 74 def from_xhtml(xml) numeric_escapes(to_xml(xml) .sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")) end |
#get_clause_id(node) ⇒ Object
87 88 89 |
# File 'lib/isodoc/function/utils.rb', line 87 def get_clause_id(node) node.xpath(CLAUSE_ANCESTOR)&.last&.text || nil end |
#get_note_container_id(node, type) ⇒ Object
no recursion on references
101 102 103 104 105 106 107 |
# File 'lib/isodoc/function/utils.rb', line 101 def get_note_container_id(node, type) xpath = NOTE_CONTAINER_ANCESTOR.dup %w(figure table example).include?(type) and xpath.sub!(%r[ or local-name\(\) = '#{type}'], "") container = node.xpath(xpath) container&.last&.text || nil end |
#header_strip(hdr) ⇒ Object
131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/isodoc/function/utils.rb', line 131 def header_strip(hdr) h1 = to_xhtml_fragment(hdr.to_s.gsub(%r{<br\s*/>}, " ") .gsub(%r{</?p(\s[^<>]+)?>}, "") .gsub(/<\/?h[123456][^<>]*>/, "").gsub(/<\/?b[^<>]*>/, "").dup) h1.traverse do |x| if x.name == "span" && x["style"]&.include?("mso-tab-count") x.replace(" ") elsif header_strip_elem?(x) then x.remove elsif x.name == "a" then x.replace(x.children) end end from_xhtml(h1) end |
#header_strip_elem?(elem) ⇒ Boolean
145 146 147 148 149 150 |
# File 'lib/isodoc/function/utils.rb', line 145 def header_strip_elem?(elem) elem.name == "img" || (elem.name == "span" && elem["class"] == "MsoCommentReference") || (elem.name == "a" && elem["class"] == "FootnoteRef") || (elem.name == "span" && elem["style"]&.include?("mso-bookmark")) end |
#image_localfile(img) ⇒ Object
201 202 203 204 205 206 207 208 209 210 |
# File 'lib/isodoc/function/utils.rb', line 201 def image_localfile(img) img.name == "svg" && !img["src"] and return save_svg(img) case img["src"] when /^data:/ then save_dataimage(img["src"], false) when %r{^([A-Z]:)?/} then img["src"] when nil then nil else File.join(@localdir, img["src"]) end end |
#insert_tab(out, count) ⇒ Object
26 27 28 29 |
# File 'lib/isodoc/function/utils.rb', line 26 def insert_tab(out, count) tab = %w(Hans Hant Jpan Kore).include?(@script) ? " " : "  " [1..count].each { out << tab } end |
#labelled_ancestor(elem, exceptions = []) ⇒ Object
216 217 218 219 |
# File 'lib/isodoc/function/utils.rb', line 216 def labelled_ancestor(elem, exceptions = []) !elem.ancestors.map(&:name) .intersection(LABELLED_ANCESTOR_ELEMENTS - exceptions).empty? end |
#liquid(doc) ⇒ Object
152 153 154 155 156 157 158 159 |
# File 'lib/isodoc/function/utils.rb', line 152 def liquid(doc) # unescape HTML escapes in doc doc = doc.split(%r<(\{%|%\})>).each_slice(4).map do |a| a[2] = a[2].gsub(/</, "<").gsub(/>/, ">") if a.size > 2 a.join end.join Liquid::Template.parse(doc) end |
#noko ⇒ Object
31 32 33 |
# File 'lib/isodoc/function/utils.rb', line 31 def noko(&) Metanorma::Utils::noko_html(&) end |
#ns(xpath) ⇒ Object
18 19 20 |
# File 'lib/isodoc/function/utils.rb', line 18 def ns(xpath) self.class.ns(xpath) end |
#numeric_escapes(xml) ⇒ Object
60 61 62 |
# File 'lib/isodoc/function/utils.rb', line 60 def numeric_escapes(xml) Metanorma::Utils::numeric_escapes(xml) end |
#populate_template(docxml, _format = nil) ⇒ Object
166 167 168 169 170 171 172 173 174 175 |
# File 'lib/isodoc/function/utils.rb', line 166 def populate_template(docxml, _format = nil) = @meta .get .merge(@labels ? { labels: @labels } : {}) .merge(@meta.labels ? { labels: @meta.labels } : {}) .merge( || {}) liquid(docxml).render(.stringify_all_keys .transform_values { |v| empty2nil(v) }) .gsub("<", "<").gsub(">", ">").gsub("&", "&") end |
#save_dataimage(uri, _relative_dir = true) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/isodoc/function/utils.rb', line 177 def save_dataimage(uri, _relative_dir = true) %r{^data:(?<imgclass>image|application)/(?<imgtype>[^;]+);(?:charset=[^;]+;)?base64,(?<imgdata>.+)$} =~ uri imgtype = "emf" if emf?("#{imgclass}/#{imgtype}") imgtype = imgtype.sub(/\+[a-z0-9]+$/, "") # svg+xml imgtype = "png" unless /^[a-z0-9]+$/.match? imgtype imgtype == "postscript" and imgtype = "eps" Tempfile.open(["image", ".#{imgtype}"], mode: File::BINARY | File::SHARE_DELETE) do |f| f.binmode f.write(Base64.strict_decode64(imgdata)) @tempfile_cache << f # persist to the end f.path end end |
#save_svg(img) ⇒ Object
192 193 194 195 196 197 198 199 |
# File 'lib/isodoc/function/utils.rb', line 192 def save_svg(img) Tempfile.open(["image", ".svg"], mode: File::BINARY | File::SHARE_DELETE) do |f| f.write(img.to_xml) @tempfile_cache << f # persist to the end f.path end end |
#sentence_join(array) ⇒ Object
109 110 111 112 113 114 115 116 117 118 |
# File 'lib/isodoc/function/utils.rb', line 109 def sentence_join(array) return "" if array.nil? || array.empty? if array.length == 1 then array[0] else @i18n.l10n("#{array[0..-2].join(', ')} " \ "#{@i18n.and} #{array.last}", @lang, @script) end end |
#start_of_sentence(node) ⇒ Object
22 23 24 |
# File 'lib/isodoc/function/utils.rb', line 22 def start_of_sentence(node) self.class.start_of_sentence(node) end |
#to_xhtml(xml) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/isodoc/function/utils.rb', line 47 def to_xhtml(xml) xml = to_xhtml_prep(xml) begin Nokogiri::XML.parse(xml, nil, nil, HUGESTRICT) rescue Nokogiri::XML::SyntaxError => e File.open("#{@filename}.#{@format}.err", "w:UTF-8") do |f| f.write xml end abort "Malformed Output XML for #{@format}: #{e} " \ "(see #{@filename}.#{@format}.err)" end end |
#to_xhtml_fragment(xml) ⇒ Object
70 71 72 |
# File 'lib/isodoc/function/utils.rb', line 70 def to_xhtml_fragment(xml) Metanorma::Utils::to_xhtml_fragment(xml) end |
#to_xhtml_prep(xml) ⇒ Object
64 65 66 67 68 |
# File 'lib/isodoc/function/utils.rb', line 64 def to_xhtml_prep(xml) xml.gsub!(/<\?xml[^<>]*>/, "") xml.include?("<!DOCTYPE ") || (xml = DOCTYPE_HDR + xml) numeric_escapes(xml) end |
#to_xml(node) ⇒ Object
10 11 12 |
# File 'lib/isodoc/function/utils.rb', line 10 def to_xml(node) self.class.to_xml(node) end |