Class: Metanorma::Collection::FileLookup

Inherits:
Object
  • Object
show all
Defined in:
lib/metanorma/collection/filelookup/base.rb,
lib/metanorma/collection/filelookup/utils.rb,
lib/metanorma/collection/filelookup/filelookup.rb,
lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, parent) ⇒ FileLookup

hash for each document in collection of document identifier to: document reference (fileref or id), type of document reference, and bibdata entry for that file

Parameters:

  • path (String)

    path to collection



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 18

def initialize(path, parent)
  @c = HTMLEntities.new
  @files = {}
  @parent = parent
  @xml = parent.xml
  @isodoc = parent.isodoc
  @isodoc_presxml = parent.isodoc_presxml
  @path = path
  @compile = parent.compile
  @documents = parent.documents
  @files_to_delete = []
  @disambig = Util::DisambigFiles.new
  @manifest = parent.manifest
  read_files(@manifest.entry, parent.manifest)
end

Instance Attribute Details

#files_to_deleteObject

Returns the value of attribute files_to_delete.



12
13
14
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12

def files_to_delete
  @files_to_delete
end

#parentObject

Returns the value of attribute parent.



12
13
14
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12

def parent
  @parent
end

Instance Method Details

#add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) ⇒ Object



72
73
74
75
76
77
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 72

def add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry)
  idx = File.join(File.dirname(sectionsplit_manifest.file), "index.html")
  FileUtils.cp entry[:ref], idx
  manifest["#{key}:index1.html"] =
    entry.merge(out_path: "index.html", ref: idx)
end

#add_document_suffix(identifier, doc) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 234

def add_document_suffix(identifier, doc)
  document_suffix = Metanorma::Utils::to_ncname(identifier)
  ids = doc.xpath("./@id | .//@id").map(&:value)
  Util::anchor_id_attributes.each do |(tag_name, attr_name)|
    Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name,
                              @isodoc)
  end
  Util::url_in_css_styles(doc, ids, document_suffix)
  doc.root["document_suffix"] ||= ""
  doc.root["document_suffix"] += document_suffix
end

#add_section_splitObject



6
7
8
9
10
11
12
13
14
15
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 6

def add_section_split
  ret = @files.keys.each_with_object({}) do |k, m|
    if @files[k][:sectionsplit] && !@files[k][:attachment]
      original_out_path = process_section_split_instance(k, m)
      cleanup_section_split_instance(k, m, original_out_path)
    end
    m[k] = @files[k]
  end
  @files = ret
end

#add_section_split_attachments(manifest, ident) ⇒ Object



89
90
91
92
93
94
95
96
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 89

def add_section_split_attachments(manifest, ident)
  attachments = @sectionsplit
    .section_split_attachments(out: File.dirname(manifest.file))
  attachments or return
  @files[ident][:out_path] = attachments
  { attachment: true, index: false, out_path: attachments,
    ref: File.join(File.dirname(manifest.file), attachments) }
end

#add_section_split_cover(manifest, sectionsplit_manifest, ident) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 59

def add_section_split_cover(manifest, sectionsplit_manifest, ident)
  cover = @sectionsplit
    .section_split_cover(sectionsplit_manifest,
                         @parent.dir_name_cleanse(ident),
                         one_doc_collection?)
  @files[ident][:out_path] = cover
  src = File.join(File.dirname(sectionsplit_manifest.file), cover)
  m = { attachment: true, index: false, out_path: cover, ref: src }
  manifest["#{ident}:index.html"] = m
  one_doc_collection? and
    add_cover_one_doc_coll(manifest, sectionsplit_manifest, ident, m)
end

#add_section_split_instance(file, manifest, key, idx) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 98

def add_section_split_instance(file, manifest, key, idx)
  presfile, newkey, xml = add_section_split_instance_prep(file, key)
  anchors = read_anchors(xml)
  # Preserve directory structure in out_path if parent has custom sectionsplit_filename with directory
  sectionsplit_fname = @files[key][:sectionsplit_filename]

  # file[:url] from sectionsplit.rb already has placeholders substituted and includes full path
  # Use it directly for out_path (without .xml extension)
  base_filename = File.basename(file[:url], ".xml")

  # Get the directory from file[:url] which already has placeholders substituted
  file_dir = File.dirname(file[:url])

  # If file[:url] has a directory (i.e., placeholders were substituted), use it
  out_path_value = if file_dir == "."
                     base_filename
                   else
                     File.join(file_dir, base_filename)
                   end

  m = { parentid: key, presentationxml: true, type: "fileref",
        rel_path: out_path_value, out_path: out_path_value,
        anchors: anchors, anchors_lookup: anchors_lookup(anchors),
        ids: read_ids(xml), format: @files[key][:format],
        sectionsplit_output: true, indirect_key: @sectionsplit.key,
        bibdata: @files[key][:bibdata], ref: presfile,
        sectionsplit_filename: sectionsplit_fname,
        idx: @files[key][:idx] }
  m[:bare] = true unless idx.zero?
  manifest[newkey] = m
  # Don't delete split output files - we want to keep them!
  # The original parent HTML file is deleted in cleanup_section_split_instance
end

#add_section_split_instance_prep(file, key) ⇒ Object



132
133
134
135
136
137
138
139
140
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 132

def add_section_split_instance_prep(file, key)
  # XML files are always stored flat in the _files directory (no subdirectories)
  # file[:url] contains full path with directory for HTML output, but XML is basename only
  xml_basename = File.basename(file[:url])
  presfile = File.join(File.dirname(@files[key][:ref]), xml_basename)
  newkey = key("#{key.strip} #{file[:title]}")
  xml = Nokogiri::XML(File.read(presfile), &:huge)
  [presfile, newkey, xml]
end

#anchors_lookup(anchors) ⇒ Object



40
41
42
43
44
# File 'lib/metanorma/collection/filelookup/utils.rb', line 40

def anchors_lookup(anchors)
  anchors.values.each_with_object({}) do |v, m|
    v.each_value { |v1| m[v1] = true }
  end
end

#bibdata_extract(xml) ⇒ Object



84
85
86
87
88
89
90
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 84

def bibdata_extract(xml)
  anchors = read_anchors(xml)
  { anchors: anchors, anchors_lookup: anchors_lookup(anchors),
    ids: read_ids(xml),
    bibdata: xml.at(ns("//bibdata")),
    document_suffix: xml.root["document_suffix"] }
end

#bibdata_process(entry, ident) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 72

def bibdata_process(entry, ident)
  if entry[:attachment]
    entry[:bibdata] =
      Metanorma::Collection::Document.attachment_bibitem(ident).root
  else
    file, _filename = targetfile(entry, read: true)
    xml = Nokogiri::XML(file, &:huge)
    add_document_suffix(ident, xml)
    entry.merge!(bibdata_extract(xml))
  end
end

#bibitem_process(entry) ⇒ Object



92
93
94
95
96
97
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 92

def bibitem_process(entry)
  entry[:bibitem] = entry[:bibdata].dup
  entry[:bibitem].name = "bibitem"
  entry[:bibitem]["hidden"] = "true"
  entry[:bibitem].at("./*[local-name() = 'ext']")&.remove
end

#cleanup_section_split_instance(key, manifest, original_out_path) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 44

def cleanup_section_split_instance(key, manifest, original_out_path)
  # Delete the sectionsplit index.html from source directory after it's copied to output
  @files_to_delete << manifest["#{key}:index.html"][:ref]
  # Delete the original files when sectionsplit happens (all formats: html, xml, presentation.xml)
  # Use the saved original out_path (before it was changed to index.html)
  if original_out_path
    base = File.join(@parent.outdir, original_out_path.sub(/\.xml$/, ""))
    @files_to_delete << "#{base}.html"
    @files_to_delete << "#{base}.xml"
    @files_to_delete << "#{base}.presentation.xml"
  end
  # @files[key].delete(:ids).delete(:anchors)
  @files[key][:indirect_key] = @sectionsplit.key
end

#derive_format(entry, parent) ⇒ Object



46
47
48
49
50
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 46

def derive_format(entry, parent)
  entry.attachment and return
  entry.format ||= parent.format || %w(xml presentation html)
  entry.format |= ["xml", "presentation"]
end

#eachObject



30
31
32
# File 'lib/metanorma/collection/filelookup/base.rb', line 30

def each
  @files.each
end

#each_with_indexObject



34
35
36
# File 'lib/metanorma/collection/filelookup/base.rb', line 34

def each_with_index
  @files.each_with_index
end

#file_entry(ref, identifier, idx) ⇒ Object

ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp idx is the index of the document in the manifest



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 105

def file_entry(ref, identifier, idx)
  ref.file or return
  abs = @documents[Util::key identifier].file
  # For sectionsplit outputs from YAML manifest, we need to compute the full path
  # by combining sectionsplit_filename directory with ref.file basename
  sso = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
  out_path, rel_path = file_entry_paths(ref, idx, sso)
  ret = if ref.file
          { type: "fileref", ref: abs, rel_path: rel_path, url: ref.url,
            out_path: out_path, idx: idx,
            output_filename: ref.output_filename,
            sectionsplit_filename: ref.sectionsplit_filename,
            pdffile: ref.pdffile, format: ref.format&.map(&:to_sym) }
            .compact
        else { type: "id", ref: ref.id }
        end
  file_entry_copy(ref, ret)
  ret.compact
end

#file_entry_copy(ref, ret) ⇒ Object



225
226
227
228
229
230
231
232
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 225

def file_entry_copy(ref, ret)
  %w(attachment sectionsplit index presentation-xml url
     bare-after-first output_filename sectionsplit_filename
     sectionsplit_output).each do |s|
    ref.respond_to?(s.to_sym) and
      ret[s.delete("-").to_sym] = ref.send(s)
  end
end

#file_entry_paths(ref, idx, sso) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 142

def file_entry_paths(ref, idx, sso)
  base = File.basename(ref.file, ".xml")
  if sso && ref.respond_to?(:sectionsplit_filename) &&
      ref.sectionsplit_filename
    # Extract directory from sectionsplit_filename
    dir = File.dirname(ref.sectionsplit_filename)
    if dir == "." # No directory in pattern
      [output_file_path(ref, idx), ref.file]
    else # Pattern has directory, prepend it
      full_path = File.join(dir, base)
      [full_path, "#{full_path}.xml"]
    end
  else [output_file_path(ref, idx), ref.file]
  end
end

#file_entry_struct(ref, abs) ⇒ Object

ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp extract_opts are the compilation options extracted as document attributes



132
133
134
135
136
137
138
139
140
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 132

def file_entry_struct(ref, abs)
  adoc = abs.sub(/\.xml$/, ".adoc")
  if adoc.end_with?(".adoc") && File.exist?(adoc)
    opts = Metanorma::Input::Asciidoc.new.extract_options(File.read(adoc))
  end
  { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
    out_path: output_file_path(ref), pdffile: ref.pdffile,
    format: ref.format&.map(&:to_sym), extract_opts: opts }.compact
end

#get(ident, attr = nil) ⇒ Object



20
21
22
23
24
# File 'lib/metanorma/collection/filelookup/base.rb', line 20

def get(ident, attr = nil)
  if attr then @files[key(ident)][attr]
  else @files[key(ident)]
  end
end

#key(ident) ⇒ Object



11
12
13
14
# File 'lib/metanorma/collection/filelookup/base.rb', line 11

def key(ident)
  @c.decode(ident).gsub(/(\p{Zs})+/, " ")
    .sub(/^metanorma-collection /, "")
end

#keysObject



16
17
18
# File 'lib/metanorma/collection/filelookup/base.rb', line 16

def keys
  @files.keys
end

#ns(xpath) ⇒ Object



38
39
40
# File 'lib/metanorma/collection/filelookup/base.rb', line 38

def ns(xpath)
  @isodoc.ns(xpath)
end

#one_doc_collection?Boolean

Returns:

  • (Boolean)


79
80
81
82
83
84
85
86
87
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 79

def one_doc_collection?
  docs = 0
  @files.each_value do |v|
    v[:attachment] and next
    v[:presentationxml] and next
    docs += 1
  end
  docs <= 1
end

#output_file_path(ref, idx) ⇒ Object

TODO make the output file location reflect source location universally, not just for attachments: no File.basename

For files with custom directory structure, construct path with directory For files with output_filename, use that (with substitutions) For others, use basename of ref.file



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 184

def output_file_path(ref, idx)
  has_custom_dir, file_has_dir, params = output_file_path_prep(ref, idx)
  # Apply sectionsplit_filename directory structure if:
  # 1. File has sectionsplit enabled (parent document being split), OR
  # 2. File is a sectionsplit output (from collection or single-file sectionsplit)
  # Regular files that inherit sectionsplit_filename from collection level
  # but are not sectionsplit outputs should NOT use it
  is_sectionsplit_output = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
  use_sectionsplit_dir = ref.sectionsplit_filename && has_custom_dir &&
    (ref.sectionsplit || is_sectionsplit_output || file_has_dir)
  f = if use_sectionsplit_dir
        # For sectionsplit outputs, return just the basename
        # The directory will be applied during file_compile_format
        # via preserve_directory_structure?
        File.basename(ref.file)
      elsif ref.output_filename
        substitute_filename_pattern(ref.output_filename, **params)
      elsif file_has_dir
        ref.file # Preserve directory structure already in ref.file
      elsif ref.attachment
        ref.file
      else File.basename(ref.file)
      end
  ret = @disambig.source2dest_filename(f, preserve_dirs: ref.attachment)
  warn ret
  ret
end

#output_file_path_prep(ref, idx) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 212

def output_file_path_prep(ref, idx)
  b = File.basename(ref.file)
  b_no_ext = File.basename(ref.file, ".*")
  # Check for sectionsplit_filename (for both parent and split output files)
  # or output_filename
  custom_filename = ref.sectionsplit_filename || ref.output_filename
  has_custom_dir = custom_filename && File.dirname(custom_filename) != "."
  # Also check if ref.file itself contains a directory
  file_has_dir = File.dirname(ref.file) != "."
  params = { document_num: idx, basename: b_no_ext, basename_legacy: b }
  [has_custom_dir, file_has_dir, params]
end

#preserve_directory_structure?(ident) ⇒ Boolean

Check if we should preserve directory structure for an identifier Returns the custom filename if directory structure should be preserved, nil otherwise

Returns:

  • (Boolean)


356
357
358
359
360
361
362
363
364
365
366
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 356

def preserve_directory_structure?(ident)
  ret = if get(ident, :sectionsplit_output)
          # For sectionsplit outputs, use rel_path which has the directory
          get(ident, :rel_path) || get(ident, :out_path)
        elsif get(ident, :sectionsplit)
          get(ident, :sectionsplit_filename)
        else get(ident, :output_filename)
        end
  # Return the custom filename only if it contains a directory
  ret && File.dirname(ret) != "." ? ret : nil
end

#process_section_split_instance(key, manifest) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 17

def process_section_split_instance(key, manifest)
  # Save the original out_path before it gets modified
  original_out_path = @files[key][:out_path]
  s, sectionsplit_manifest = sectionsplit(key)
  # section_split_instance_threads(s, manifest, key)
  s.each_with_index do |f1, i|
    add_section_split_instance(f1, manifest, key, i)
  end
  a = add_section_split_attachments(sectionsplit_manifest, key) and
    manifest["#{key}:attachments"] = a
  add_section_split_cover(manifest, sectionsplit_manifest, key)
  # Return the original path for cleanup
  original_out_path
end

#read_anchors(xml) ⇒ Object

map locality type and label (e.g. “clause” “1”) to id = anchor for a document Note: will only key clauses, which have unambiguous reference label in locality. Notes, examples etc with containers are just plunked against UUIDs, so that their IDs can at least be registered to be tracked as existing.



20
21
22
23
24
25
26
27
# File 'lib/metanorma/collection/filelookup/utils.rb', line 20

def read_anchors(xml)
  xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n,
                            { locale: @locale })
  xrefs.parse xml
  xrefs.get.each_with_object({}) do |(k, v), ret|
    read_anchors1(k, v, ret)
  end
end

#read_anchors1(key, val, ret) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/metanorma/collection/filelookup/utils.rb', line 29

def read_anchors1(key, val, ret)
  val[:type] ||= "clause"
  ret[val[:type]] ||= {}
  index = if val[:container] || val[:label].nil? || val[:label].empty?
            UUIDTools::UUID.random_create.to_s
          else val[:label].gsub(%r{<[^<>]+>}, "")
          end
  ret[val[:type]][index] = key
  v = val[:value] and ret[val[:type]][v.gsub(%r{<[^<>]+>}, "")] = key
end

#read_file(manifest, idx) ⇒ Object



52
53
54
55
56
57
58
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 52

def read_file(manifest, idx)
  i, k = read_file_idents(manifest)
  entry = file_entry(manifest, k, idx) or return
  bibdata_process(entry, i)
  bibitem_process(entry)
  @files[key(i)] = entry
end

#read_file_idents(manifest) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 60

def read_file_idents(manifest)
  id = manifest.identifier
  sanitised_id = key(@isodoc.docid_prefix("", manifest.identifier.dup))
  #       if manifest.bibdata and # NO, DO NOT FISH FOR THE GENUINE IDENTIFIER IN BIBDATA
  #         d = manifest.bibdata.docidentifier.detect { |x| x.primary } ||
  #           manifest.bibdata.docidentifier.first
  #         k = d.id
  #         i = key(@isodoc.docid_prefix(d.type, d.id.dup))
  #       end
  [id, sanitised_id]
end

#read_files(entries, parent, idx = 0) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 34

def read_files(entries, parent, idx = 0)
  Array(entries).each do |e|
    derive_format(e, parent)
    if e.file
      read_file(e, idx)
      idx += 1
    end
    idx = read_files(e.entry, e, idx)
  end
  idx
end

#read_ids(xml) ⇒ Object

Also parse all ids in doc (including ones which won’t be xref targets)



5
6
7
8
9
10
11
12
# File 'lib/metanorma/collection/filelookup/utils.rb', line 5

def read_ids(xml)
  ret = {}
  xml.traverse do |x|
    x.text? and next
    x["id"] and ret[x["id"]] = true
  end
  ret
end

#ref_file(ref, data, read, doc) ⇒ Object



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 277

def ref_file(ref, data, read, doc)
  file = File.read(ref, encoding: "utf-8") if read
  # Use the actual output path from :outputs if available (set after compilation)
  # Otherwise fall back to :out_path (set at initialization)
  filename = if doc && data[:outputs] && data[:outputs][:html]
               data[:outputs][:html].sub(
                 %r{^#{Regexp.escape(@parent.outdir)}/}, ""
               )
             else
               data[:out_path].dup
             end
  if doc && !data[:outputs]
    filename = ref_file_xml2html(filename)
  end
  [file, filename]
end

#ref_file_xml2html(filename) ⇒ Object

Check if file has a recognized MIME type (other than XML) If so, don’t append .html (e.g., .svg, .png, .jpg, etc.) Only process if it doesn’t have a recognized non-XML extension If filename ends in .xml, replace with .html Otherwise (including sectionsplit files like “file.xml.0” or custom titles), append .html



300
301
302
303
304
305
306
307
308
309
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 300

def ref_file_xml2html(filename)
  unless Util::mime_file_recognised?(filename) &&
      !filename.end_with?(".xml")
    filename = if filename.end_with?(".xml")
                 filename.sub(/\.xml$/, ".html")
               else "#{filename}.html"
               end
  end
  filename
end

#section_split_instance_threads(s, manifest, key) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 32

def section_split_instance_threads(s, manifest, key)
  @mutex = Mutex.new
  pool = Concurrent::FixedThreadPool.new(4)
  s.each_with_index do |f1, i|
    pool.post do
      add_section_split_instance(f1, manifest, key, i)
    end
  end
  pool.shutdown
  pool.wait_for_termination
end

#sectionsplit(ident) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 142

def sectionsplit(ident)
  file = @files[ident][:ref]
  # @base must always be just basename, never contain directory components
  # Directory structure comes from sectionsplit_filename pattern only
  base = File.basename(@files[ident][:out_path] || file)
  @sectionsplit = ::Metanorma::Collection::Sectionsplit
    .new(input: file, base: base,
         dir: File.dirname(file), output: @files[ident][:out_path],
         compile_opts: @parent.compile_options, ident: ident,
         fileslookup: self, isodoc: @isodoc,
         parent_idx: @files[ident][:idx],
         sectionsplit_filename: @files[ident][:sectionsplit_filename],
         isodoc_presxml: @isodoc_presxml,
         document_suffix: @files[ident][:document_suffix])
  coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] }
  xml = Nokogiri::XML(File.read(file, encoding: "UTF-8"), &:huge)
  [coll, @sectionsplit
    .collection_manifest(File.basename(file), coll, xml, nil,
                         File.dirname(file))]
end

#set(ident, attr, value) ⇒ Object



26
27
28
# File 'lib/metanorma/collection/filelookup/base.rb', line 26

def set(ident, attr, value)
  @files[key(ident)][attr] = value
end

#substitute_filename_pattern(pattern, options = {}) ⇒ Object

Substitute special strings in filename patterns

Parameters:

  • pattern (String)

    filename pattern with placeholders

  • options (Hash) (defaults to: {})

    substitution values

Options Hash (options):

  • :document_num (Integer)

    document index

  • :basename (String)

    filename without extension

  • :basename_legacy (String)

    full filename with extension

  • :sectionsplit_num (Integer)

    sectionsplit index



165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 165

def substitute_filename_pattern(pattern, options = {})
  pattern or return pattern
  result = pattern.dup
  options[:document_num] and
    result.gsub!(/\{document-num\}/, options[:document_num].to_s)
  result.gsub!(/\{basename\}/, options[:basename]) if options[:basename]
  options[:basename_legacy] and
    result.gsub!(/\{basename_legacy\}/, options[:basename_legacy])
  options[:sectionsplit_num] and
    result.gsub!(/\{sectionsplit-num\}/, options[:sectionsplit_num].to_s)
  result
end

#targetfile(data, options) ⇒ Array<String, nil>

return file contents + output filename for each file in the collection, given a docref entry so my URL should end with html or pdf or whatever formed relative to YAML file, not input path, relative to calling function

Parameters:

  • data (Hash)

    docref entry

  • read (Boolean)

    read the file in and return it

  • doc (Boolean)

    I am a Metanorma document,

  • relative (Boolean)

    Return output path,

Returns:



263
264
265
266
267
268
269
270
271
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 263

def targetfile(data, options)
  options = { read: false, doc: true, relative: false }.merge(options)
  path = options[:relative] ? data[:rel_path] : data[:ref]
  if data[:type] == "fileref"
    ref_file path, data, options[:read], options[:doc]
  else
    xml_file data[:id], options[:read]
  end
end

#targetfile_id(ident, options) ⇒ Object



273
274
275
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 273

def targetfile_id(ident, options)
  targetfile(get(ident), options)
end

#url(ident, options) ⇒ Object

return citation url for file so my URL should end with html or pdf or whatever

Parameters:

  • doc (Boolean)

    I am a Metanorma document,



49
50
51
52
# File 'lib/metanorma/collection/filelookup/utils.rb', line 49

def url(ident, options)
  data = get(ident)
  data[:url] || targetfile(data, options)[1]
end

#url?(ident) ⇒ Boolean

are references to the file to be linked to a file in the collection, or externally? Determines whether file suffix anchors are to be used

Returns:

  • (Boolean)


6
7
8
9
# File 'lib/metanorma/collection/filelookup/base.rb', line 6

def url?(ident)
  data = get(ident) or return false
  data[:url]
end

#xml_file(id, read) ⇒ Object



311
312
313
314
315
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 311

def xml_file(id, read)
  file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
  filename = "#{id}.html"
  [file, filename]
end