Class: Metanorma::Collection::Sectionsplit
- Inherits:
-
Object
- Object
- Metanorma::Collection::Sectionsplit
- Defined in:
- lib/metanorma/collection/sectionsplit/collection.rb,
lib/metanorma/collection/sectionsplit/sectionsplit.rb
Constant Summary collapse
- SPLITSECTIONS =
[["//preface/*", "preface"], ["//sections/*", "sections"], ["//annex", nil], ["//bibliography/*[not(@hidden = 'true')]", "bibliography"], ["//indexsect", nil], ["//colophon", nil]].freeze
Instance Attribute Summary collapse
-
#filecache ⇒ Object
Returns the value of attribute filecache.
-
#key ⇒ Object
Returns the value of attribute key.
Instance Method Summary collapse
- #att_dir(file) ⇒ Object
- #block?(node) ⇒ Boolean
- #build_collection ⇒ Object
- #coll_cover ⇒ Object
- #collection_manifest(filename, files, origxml, _presxml, dir) ⇒ Object
- #collection_setup(filename, dir) ⇒ Object
- #collectionyaml(files, xml) ⇒ Object
- #conflate_floatingtitles(nodes) ⇒ Object
- #create_sectionfile(xml, out, file, chunks, parentnode) ⇒ Object
- #empty_attachments(xml) ⇒ Object
- #empty_doc(xml) ⇒ Object
- #emptydoc(xml, ordinal) ⇒ Object
-
#initialize(opts) ⇒ Sectionsplit
constructor
A new instance of Sectionsplit.
- #ns(xpath) ⇒ Object
- #section_split_attachments(out: nil) ⇒ Object
- #section_split_cover(col, ident, one_doc_coll) ⇒ Object
- #section_split_cover1(ident, renderer, dir, _one_doc_coll) ⇒ Object
- #sectionfile(fulldoc, xml, file, chunks, parentnode) ⇒ Object
- #sectionfile_insert(ins, chunks, parentnode) ⇒ Object
-
#sectionsplit ⇒ Object
Input XML is Semantic.
- #sectionsplit1(xml, empty, empty1, idx) ⇒ Object
- #sectionsplit2(xml, empty, chunks, parentnode, opt) ⇒ Object
- #sectionsplit_prep(file, filename, dir) ⇒ Object
- #sectionsplit_preprocess_semxml(file, filename) ⇒ Object
- #sectionsplit_update_xrefs(xml) ⇒ Object
- #sectionsplit_write_semxml(filename, xml) ⇒ Object
- #semantic_xml_ids_gather(out) ⇒ Object
- #semxml_presxml_nodes_match(nodes, chunks) ⇒ Object
- #titlerender(section) ⇒ Object
- #truncate_semxml(out, chunks) ⇒ Object
Constructor Details
#initialize(opts) ⇒ Sectionsplit
Returns a new instance of Sectionsplit.
12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 12 def initialize(opts) @input_filename = opts[:input] @base = opts[:base] @output_filename = opts[:output] @xml = opts[:xml] @dir = opts[:dir] @compile_opts = opts[:compile_opts] || {} @fileslookup = opts[:fileslookup] @ident = opts[:ident] @isodoc = opts[:isodoc] @document_suffix = opts[:document_suffix] end |
Instance Attribute Details
#filecache ⇒ Object
Returns the value of attribute filecache.
10 11 12 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 10 def filecache @filecache end |
#key ⇒ Object
Returns the value of attribute key.
10 11 12 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 10 def key @key end |
Instance Method Details
#att_dir(file) ⇒ Object
65 66 67 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 65 def att_dir(file) "_#{File.basename(file, '.*')}_attachments" end |
#block?(node) ⇒ Boolean
67 68 69 70 71 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 67 def block?(node) %w(p table formula admonition ol ul dl figure quote sourcecode example pre note pagebreak hr bookmark requirement recommendation permission svgmap inputform toc passthrough review imagemap).include?(node.name) end |
#build_collection ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 4 def build_collection collection_setup(@base, @dir) files = sectionsplit input_xml = Nokogiri::XML(File.read(@input_filename, encoding: "UTF-8"), &:huge) collection_manifest(@base, files, input_xml, @xml, @dir).render( { format: %i(html), output_folder: "#{@output_filename}_collection", coverpage: File.join(@dir, "cover.html") }.merge(@compile_opts), ) (out: "#{@output_filename}_collection") end |
#coll_cover ⇒ Object
24 25 26 27 28 29 30 31 32 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 24 def coll_cover <<~COVER <html><head><meta charset="UTF-8"/></head><body> <h1>{{ doctitle }}</h1> <h2>{{ docnumber }}</h2> <nav>{{ navigation }}</nav> </body></html> COVER end |
#collection_manifest(filename, files, origxml, _presxml, dir) ⇒ Object
34 35 36 37 38 39 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 34 def collection_manifest(filename, files, origxml, _presxml, dir) File.open(File.join(dir, "#{filename}.html.yaml"), "w:UTF-8") do |f| f.write(collectionyaml(files, origxml)) end Metanorma::Collection.parse File.join(dir, "#{filename}.html.yaml") end |
#collection_setup(filename, dir) ⇒ Object
16 17 18 19 20 21 22 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 16 def collection_setup(filename, dir) FileUtils.mkdir_p "#{filename}_collection" if filename FileUtils.mkdir_p dir File.open(File.join(dir, "cover.html"), "w:UTF-8") do |f| f.write(coll_cover) end end |
#collectionyaml(files, xml) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 41 def collectionyaml(files, xml) ret = { directives: ["presentation-xml", "bare-after-first"], bibdata: { title: { type: "title-main", language: @lang, content: xml.at(ns("//bibdata/title")).text }, type: "collection", docid: { type: xml.at(ns("//bibdata/docidentifier/@type")).text, id: xml.at(ns("//bibdata/docidentifier")).text, }, }, manifest: { level: "collection", title: "Collection", docref: files.sort_by { |f| f[:order] }.each.map do |f| { fileref: f[:url], identifier: f[:title] } end }, } ::Metanorma::Util::recursive_string_keys(ret).to_yaml end |
#conflate_floatingtitles(nodes) ⇒ Object
73 74 75 76 77 78 79 80 81 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 73 def conflate_floatingtitles(nodes) holdover = false nodes.each_with_object([]) do |x, m| if holdover then m.last << x else m << [x] end holdover = block?(x) end end |
#create_sectionfile(xml, out, file, chunks, parentnode) ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 161 def create_sectionfile(xml, out, file, chunks, parentnode) ins = out.at(ns("//metanorma-extension")) || out.at(ns("//bibdata")) sectionfile_insert(ins, chunks, parentnode) Metanorma::Collection::XrefProcess::xref_process(out, xml, @key, @ident, @isodoc) truncate_semxml(out, chunks) outname = "#{file}.xml" File.open(File.join(@splitdir, outname), "w:UTF-8") do |f| f.write(out) end outname end |
#empty_attachments(xml) ⇒ Object
147 148 149 150 151 152 153 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 147 def (xml) out = xml.dup out.xpath(ns("//metanorma-ext//attachment | " \ "//semantic__metanorma-ext//semantic__attachment")) .each(&:remove) # keep only one copy of attachments out end |
#empty_doc(xml) ⇒ Object
137 138 139 140 141 142 143 144 145 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 137 def empty_doc(xml) out = xml.dup out.xpath( ns("//preface | //sections | //annex | //bibliography/clause | " \ "//bibliography/references[not(@hidden = 'true')] | " \ "//indexsect | //colophon"), ).each(&:remove) out end |
#emptydoc(xml, ordinal) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 124 def emptydoc(xml, ordinal) out = xml.dup out.xpath( ns("//preface | //sections | //annex | //bibliography/clause | " \ "//bibliography/references[not(@hidden = 'true')] | " \ "//indexsect | //colophon"), ).each(&:remove) ordinal.zero? or out.xpath(ns("//metanorma-ext//attachment | " \ "//semantic__metanorma-ext//semantic__attachment")) .each(&:remove) # keep only one copy of attachments out end |
#ns(xpath) ⇒ Object
25 26 27 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 25 def ns(xpath) @isodoc.ns(xpath) end |
#section_split_attachments(out: nil) ⇒ Object
69 70 71 72 73 74 75 76 77 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 69 def (out: nil) = att_dir(@tmp_filename) File.directory?() or return dir = out || File.dirname(@input_filename) ret = File.join(dir, att_dir(@output_filename)) FileUtils.rm_rf ret FileUtils.mv , ret File.basename(ret) end |
#section_split_cover(col, ident, one_doc_coll) ⇒ Object
79 80 81 82 83 84 85 86 87 88 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 79 def section_split_cover(col, ident, one_doc_coll) dir = File.dirname(col.file) collection_setup(nil, dir) r = ::Metanorma::Collection::Renderer .new(col, dir, output_folder: "#{ident}_collection", format: %i(html), coverpage: File.join(dir, "cover.html")) r.coverpage section_split_cover1(ident, r, dir, one_doc_coll) end |
#section_split_cover1(ident, renderer, dir, _one_doc_coll) ⇒ Object
90 91 92 93 94 95 96 97 |
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 90 def section_split_cover1(ident, renderer, dir, _one_doc_coll) filename = File.basename("#{ident}_index.html") # ident can be a directory with YAML indirection dest = File.join(dir, filename) FileUtils.mv File.join(renderer.outdir, "index.html"), dest FileUtils.rm_rf renderer.outdir filename end |
#sectionfile(fulldoc, xml, file, chunks, parentnode) ⇒ Object
155 156 157 158 159 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 155 def sectionfile(fulldoc, xml, file, chunks, parentnode) fname = create_sectionfile(fulldoc, xml.dup, file, chunks, parentnode) { order: chunks.last["displayorder"].to_i, url: fname, title: titlerender(chunks.last) } end |
#sectionfile_insert(ins, chunks, parentnode) ⇒ Object
196 197 198 199 200 201 202 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 196 def sectionfile_insert(ins, chunks, parentnode) if parentnode ins.next = "<#{parentnode}/>" chunks.each { |c| ins.next.add_child(c.dup) } else chunks.each { |c| ins.next = c.dup } end end |
#sectionsplit ⇒ Object
Input XML is Semantic
36 37 38 39 40 41 42 43 44 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 36 def sectionsplit xml = sectionsplit_prep(File.read(@input_filename), @base, @dir) @key = Metanorma::Collection::XrefProcess::xref_preprocess(xml, @isodoc) empty = empty_doc(xml) empty1 = (empty) @mutex = Mutex.new @pool = Concurrent::FixedThreadPool.new(4) sectionsplit1(xml, empty, empty1, 0) end |
#sectionsplit1(xml, empty, empty1, idx) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 46 def sectionsplit1(xml, empty, empty1, idx) ret = SPLITSECTIONS.each_with_object([]) do |n, m| conflate_floatingtitles(xml.xpath(ns(n[0]))).each do |s| sectionsplit2(xml, idx.zero? ? empty : empty1, s, n[1], { acc: m, idx: idx }) idx += 1 end end @pool.shutdown @pool.wait_for_termination ret end |
#sectionsplit2(xml, empty, chunks, parentnode, opt) ⇒ Object
59 60 61 62 63 64 65 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 59 def sectionsplit2(xml, empty, chunks, parentnode, opt) @pool.post do a = sectionfile(xml, empty, "#{@base}.#{opt[:idx]}", chunks, parentnode) @mutex.synchronize { opt[:acc] << a } end end |
#sectionsplit_prep(file, filename, dir) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 83 def sectionsplit_prep(file, filename, dir) @splitdir = dir xml1, type = sectionsplit_preprocess_semxml(file, filename) flags = { format: :asciidoc, extension_keys: [:presentation], type: type }.merge(@compile_opts) Compile.new.compile(xml1, flags) f = File.open(xml1.sub(/\.xml$/, ".presentation.xml"), encoding: "utf-8") r = Nokogiri::XML(f, &:huge) r.xpath("//xmlns:svgmap1").each { |x| x.name = "svgmap" } r end |
#sectionsplit_preprocess_semxml(file, filename) ⇒ Object
96 97 98 99 100 101 102 103 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 96 def sectionsplit_preprocess_semxml(file, filename) xml = Nokogiri::XML(file, &:huge) type = xml.root.name.sub("-standard", "").to_sym sectionsplit_update_xrefs(xml) xml1 = sectionsplit_write_semxml(filename, xml) @tmp_filename = xml1 [xml1, type] end |
#sectionsplit_update_xrefs(xml) ⇒ Object
105 106 107 108 109 110 111 112 113 114 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 105 def sectionsplit_update_xrefs(xml) if c = @fileslookup&.parent n = c.nested c.nested = true # so unresolved erefs are not deleted c.update_xrefs(xml, @ident, {}) c.nested = n xml.xpath("//xmlns:svgmap").each { |x| x.name = "svgmap1" } # do not process svgmap until after files are split end end |
#sectionsplit_write_semxml(filename, xml) ⇒ Object
116 117 118 119 120 121 122 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 116 def sectionsplit_write_semxml(filename, xml) outname = Pathname.new("tmp_#{filename}").sub_ext(".xml").to_s File.open(outname, "w:UTF-8") do |f| f.write(@isodoc.to_xml(xml)) end outname end |
#semantic_xml_ids_gather(out) ⇒ Object
174 175 176 177 178 179 180 181 182 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 174 def semantic_xml_ids_gather(out) out.at(ns("//semantic__bibdata")) or return SPLITSECTIONS.each_with_object({}) do |s, m| out.xpath(ns(s[0].sub("//", "//semantic__"))).each do |x| x["id"] or next m[x["id"].sub(/^semantic__/, "")] = x end end end |
#semxml_presxml_nodes_match(nodes, chunks) ⇒ Object
184 185 186 187 188 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 184 def semxml_presxml_nodes_match(nodes, chunks) chunks.each do |x| nodes[x["id"]] and nodes.delete(x["id"]) end end |
#titlerender(section) ⇒ Object
204 205 206 207 208 209 210 211 212 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 204 def titlerender(section) title = section.at(ns("./title")) or return "[Untitled]" t = title.dup t.xpath(ns(".//tab | .//br")).each { |x| x.replace(" ") } t.xpath(ns(".//bookmark")).each(&:remove) t.xpath(ns(".//strong | .//span")) .each { |x| x.replace(x.children) } t.children.to_xml end |
#truncate_semxml(out, chunks) ⇒ Object
190 191 192 193 194 |
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 190 def truncate_semxml(out, chunks) nodes = semantic_xml_ids_gather(out) or return semxml_presxml_nodes_match(nodes, chunks) nodes.each_value(&:remove) end |