Class: Metanorma::Collection::Sectionsplit

Inherits:
Object
  • Object
show all
Defined in:
lib/metanorma/collection/sectionsplit/collection.rb,
lib/metanorma/collection/sectionsplit/sectionsplit.rb

Constant Summary collapse

SPLITSECTIONS =
[["//preface/*", "preface"], ["//sections/*", "sections"],
["//annex", nil],
["//bibliography/*[not(@hidden = 'true')]", "bibliography"],
["//indexsect", nil], ["//colophon", nil]].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Sectionsplit

Returns a new instance of Sectionsplit.



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 12

def initialize(opts)
  @input_filename = opts[:input]
  @base = opts[:base]
  @output_filename = opts[:output]
  @xml = opts[:xml]
  @dir = opts[:dir]
  @compile_opts = opts[:compile_opts] || {}
  @fileslookup = opts[:fileslookup]
  @ident = opts[:ident]
  @isodoc = opts[:isodoc]
  @document_suffix = opts[:document_suffix]
end

Instance Attribute Details

#filecacheObject

Returns the value of attribute filecache.



10
11
12
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 10

def filecache
  @filecache
end

#keyObject

Returns the value of attribute key.



10
11
12
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 10

def key
  @key
end

Instance Method Details

#att_dir(file) ⇒ Object



65
66
67
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 65

def att_dir(file)
  "_#{File.basename(file, '.*')}_attachments"
end

#block?(node) ⇒ Boolean

Returns:

  • (Boolean)


67
68
69
70
71
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 67

def block?(node)
  %w(p table formula admonition ol ul dl figure quote sourcecode example
     pre note pagebreak hr bookmark requirement recommendation permission
     svgmap inputform toc passthrough review imagemap).include?(node.name)
end

#build_collectionObject



4
5
6
7
8
9
10
11
12
13
14
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 4

def build_collection
  collection_setup(@base, @dir)
  files = sectionsplit
  input_xml = Nokogiri::XML(File.read(@input_filename,
                                      encoding: "UTF-8"), &:huge)
  collection_manifest(@base, files, input_xml, @xml, @dir).render(
    { format: %i(html), output_folder: "#{@output_filename}_collection",
      coverpage: File.join(@dir, "cover.html") }.merge(@compile_opts),
  )
  section_split_attachments(out: "#{@output_filename}_collection")
end

#coll_coverObject



24
25
26
27
28
29
30
31
32
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 24

def coll_cover
  <<~COVER
    <html><head><meta charset="UTF-8"/></head><body>
          <h1>{{ doctitle }}</h1>
          <h2>{{ docnumber }}</h2>
          <nav>{{ navigation }}</nav>
        </body></html>
  COVER
end

#collection_manifest(filename, files, origxml, _presxml, dir) ⇒ Object



34
35
36
37
38
39
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 34

def collection_manifest(filename, files, origxml, _presxml, dir)
  File.open(File.join(dir, "#{filename}.html.yaml"), "w:UTF-8") do |f|
    f.write(collectionyaml(files, origxml))
  end
  Metanorma::Collection.parse File.join(dir, "#{filename}.html.yaml")
end

#collection_setup(filename, dir) ⇒ Object



16
17
18
19
20
21
22
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 16

def collection_setup(filename, dir)
  FileUtils.mkdir_p "#{filename}_collection" if filename
  FileUtils.mkdir_p dir
  File.open(File.join(dir, "cover.html"), "w:UTF-8") do |f|
    f.write(coll_cover)
  end
end

#collectionyaml(files, xml) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 41

def collectionyaml(files, xml)
  ret = {
    directives: ["presentation-xml", "bare-after-first"],
    bibdata: {
      title: {
        type: "title-main", language: @lang,
        content: xml.at(ns("//bibdata/title")).text
      },
      type: "collection",
      docid: {
        type: xml.at(ns("//bibdata/docidentifier/@type")).text,
        id: xml.at(ns("//bibdata/docidentifier")).text,
      },
    },
    manifest: {
      level: "collection", title: "Collection",
      docref: files.sort_by { |f| f[:order] }.each.map do |f|
        { fileref: f[:url], identifier: f[:title] }
      end
    },
  }
  ::Metanorma::Util::recursive_string_keys(ret).to_yaml
end

#conflate_floatingtitles(nodes) ⇒ Object



73
74
75
76
77
78
79
80
81
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 73

def conflate_floatingtitles(nodes)
  holdover = false
  nodes.each_with_object([]) do |x, m|
    if holdover then m.last << x
    else m << [x]
    end
    holdover = block?(x)
  end
end

#create_sectionfile(xml, out, file, chunks, parentnode) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 161

def create_sectionfile(xml, out, file, chunks, parentnode)
  ins = out.at(ns("//metanorma-extension")) || out.at(ns("//bibdata"))
  sectionfile_insert(ins, chunks, parentnode)
  Metanorma::Collection::XrefProcess::xref_process(out, xml, @key,
                                                   @ident, @isodoc)
  truncate_semxml(out, chunks)
  outname = "#{file}.xml"
  File.open(File.join(@splitdir, outname), "w:UTF-8") do |f|
    f.write(out)
  end
  outname
end

#empty_attachments(xml) ⇒ Object



147
148
149
150
151
152
153
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 147

def empty_attachments(xml)
  out = xml.dup
  out.xpath(ns("//metanorma-ext//attachment | " \
               "//semantic__metanorma-ext//semantic__attachment"))
    .each(&:remove) # keep only one copy of attachments
  out
end

#empty_doc(xml) ⇒ Object



137
138
139
140
141
142
143
144
145
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 137

def empty_doc(xml)
  out = xml.dup
  out.xpath(
    ns("//preface | //sections | //annex | //bibliography/clause | " \
       "//bibliography/references[not(@hidden = 'true')] | " \
       "//indexsect | //colophon"),
  ).each(&:remove)
  out
end

#emptydoc(xml, ordinal) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 124

def emptydoc(xml, ordinal)
  out = xml.dup
  out.xpath(
    ns("//preface | //sections | //annex | //bibliography/clause | " \
       "//bibliography/references[not(@hidden = 'true')] | " \
       "//indexsect | //colophon"),
  ).each(&:remove)
  ordinal.zero? or out.xpath(ns("//metanorma-ext//attachment | " \
                                "//semantic__metanorma-ext//semantic__attachment"))
    .each(&:remove) # keep only one copy of attachments
  out
end

#ns(xpath) ⇒ Object



25
26
27
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 25

def ns(xpath)
  @isodoc.ns(xpath)
end

#section_split_attachments(out: nil) ⇒ Object



69
70
71
72
73
74
75
76
77
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 69

def section_split_attachments(out: nil)
  attachments = att_dir(@tmp_filename)
  File.directory?(attachments) or return
  dir = out || File.dirname(@input_filename)
  ret = File.join(dir, att_dir(@output_filename))
  FileUtils.rm_rf ret
  FileUtils.mv attachments, ret
  File.basename(ret)
end

#section_split_cover(col, ident, one_doc_coll) ⇒ Object



79
80
81
82
83
84
85
86
87
88
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 79

def section_split_cover(col, ident, one_doc_coll)
  dir = File.dirname(col.file)
  collection_setup(nil, dir)
  r = ::Metanorma::Collection::Renderer
    .new(col, dir, output_folder: "#{ident}_collection",
                   format: %i(html),
                   coverpage: File.join(dir, "cover.html"))
  r.coverpage
  section_split_cover1(ident, r, dir, one_doc_coll)
end

#section_split_cover1(ident, renderer, dir, _one_doc_coll) ⇒ Object



90
91
92
93
94
95
96
97
# File 'lib/metanorma/collection/sectionsplit/collection.rb', line 90

def section_split_cover1(ident, renderer, dir, _one_doc_coll)
  filename = File.basename("#{ident}_index.html")
  # ident can be a directory with YAML indirection
  dest = File.join(dir, filename)
  FileUtils.mv File.join(renderer.outdir, "index.html"), dest
  FileUtils.rm_rf renderer.outdir
  filename
end

#sectionfile(fulldoc, xml, file, chunks, parentnode) ⇒ Object



155
156
157
158
159
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 155

def sectionfile(fulldoc, xml, file, chunks, parentnode)
  fname = create_sectionfile(fulldoc, xml.dup, file, chunks, parentnode)
  { order: chunks.last["displayorder"].to_i, url: fname,
    title: titlerender(chunks.last) }
end

#sectionfile_insert(ins, chunks, parentnode) ⇒ Object



196
197
198
199
200
201
202
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 196

def sectionfile_insert(ins, chunks, parentnode)
  if parentnode
    ins.next = "<#{parentnode}/>"
    chunks.each { |c| ins.next.add_child(c.dup) }
  else chunks.each { |c| ins.next = c.dup }
  end
end

#sectionsplitObject

Input XML is Semantic



36
37
38
39
40
41
42
43
44
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 36

def sectionsplit
  xml = sectionsplit_prep(File.read(@input_filename), @base, @dir)
  @key = Metanorma::Collection::XrefProcess::xref_preprocess(xml, @isodoc)
  empty = empty_doc(xml)
  empty1 = empty_attachments(empty)
  @mutex = Mutex.new
  @pool = Concurrent::FixedThreadPool.new(4)
  sectionsplit1(xml, empty, empty1, 0)
end

#sectionsplit1(xml, empty, empty1, idx) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 46

def sectionsplit1(xml, empty, empty1, idx)
  ret = SPLITSECTIONS.each_with_object([]) do |n, m|
    conflate_floatingtitles(xml.xpath(ns(n[0]))).each do |s|
      sectionsplit2(xml, idx.zero? ? empty : empty1, s, n[1],
                    { acc: m, idx: idx })
      idx += 1
    end
  end
  @pool.shutdown
  @pool.wait_for_termination
  ret
end

#sectionsplit2(xml, empty, chunks, parentnode, opt) ⇒ Object



59
60
61
62
63
64
65
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 59

def sectionsplit2(xml, empty, chunks, parentnode, opt)
  @pool.post do
    a = sectionfile(xml, empty, "#{@base}.#{opt[:idx]}", chunks,
                    parentnode)
    @mutex.synchronize { opt[:acc] << a }
  end
end

#sectionsplit_prep(file, filename, dir) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 83

def sectionsplit_prep(file, filename, dir)
  @splitdir = dir
  xml1, type = sectionsplit_preprocess_semxml(file, filename)
  flags = { format: :asciidoc, extension_keys: [:presentation],
            type: type }.merge(@compile_opts)
  Compile.new.compile(xml1, flags)
  f = File.open(xml1.sub(/\.xml$/, ".presentation.xml"),
                encoding: "utf-8")
  r = Nokogiri::XML(f, &:huge)
  r.xpath("//xmlns:svgmap1").each { |x| x.name = "svgmap" }
  r
end

#sectionsplit_preprocess_semxml(file, filename) ⇒ Object



96
97
98
99
100
101
102
103
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 96

def sectionsplit_preprocess_semxml(file, filename)
  xml = Nokogiri::XML(file, &:huge)
  type = xml.root.name.sub("-standard", "").to_sym
  sectionsplit_update_xrefs(xml)
  xml1 = sectionsplit_write_semxml(filename, xml)
  @tmp_filename = xml1
  [xml1, type]
end

#sectionsplit_update_xrefs(xml) ⇒ Object



105
106
107
108
109
110
111
112
113
114
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 105

def sectionsplit_update_xrefs(xml)
  if c = @fileslookup&.parent
    n = c.nested
    c.nested = true # so unresolved erefs are not deleted
    c.update_xrefs(xml, @ident, {})
    c.nested = n
    xml.xpath("//xmlns:svgmap").each { |x| x.name = "svgmap1" }
    # do not process svgmap until after files are split
  end
end

#sectionsplit_write_semxml(filename, xml) ⇒ Object



116
117
118
119
120
121
122
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 116

def sectionsplit_write_semxml(filename, xml)
  outname = Pathname.new("tmp_#{filename}").sub_ext(".xml").to_s
  File.open(outname, "w:UTF-8") do |f|
    f.write(@isodoc.to_xml(xml))
  end
  outname
end

#semantic_xml_ids_gather(out) ⇒ Object



174
175
176
177
178
179
180
181
182
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 174

def semantic_xml_ids_gather(out)
  out.at(ns("//semantic__bibdata")) or return
  SPLITSECTIONS.each_with_object({}) do |s, m|
    out.xpath(ns(s[0].sub("//", "//semantic__"))).each do |x|
      x["id"] or next
      m[x["id"].sub(/^semantic__/, "")] = x
    end
  end
end

#semxml_presxml_nodes_match(nodes, chunks) ⇒ Object



184
185
186
187
188
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 184

def semxml_presxml_nodes_match(nodes, chunks)
  chunks.each do |x|
    nodes[x["id"]] and nodes.delete(x["id"])
  end
end

#titlerender(section) ⇒ Object



204
205
206
207
208
209
210
211
212
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 204

def titlerender(section)
  title = section.at(ns("./title")) or return "[Untitled]"
  t = title.dup
  t.xpath(ns(".//tab | .//br")).each { |x| x.replace(" ") }
  t.xpath(ns(".//bookmark")).each(&:remove)
  t.xpath(ns(".//strong | .//span"))
    .each { |x| x.replace(x.children) }
  t.children.to_xml
end

#truncate_semxml(out, chunks) ⇒ Object



190
191
192
193
194
# File 'lib/metanorma/collection/sectionsplit/sectionsplit.rb', line 190

def truncate_semxml(out, chunks)
  nodes = semantic_xml_ids_gather(out) or return
  semxml_presxml_nodes_match(nodes, chunks)
  nodes.each_value(&:remove)
end