Class: Metanorma::Collection::FileLookup
- Inherits:
-
Object
- Object
- Metanorma::Collection::FileLookup
- Defined in:
- lib/metanorma/collection/filelookup/base.rb,
lib/metanorma/collection/filelookup/utils.rb,
lib/metanorma/collection/filelookup/filelookup.rb,
lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb
Instance Attribute Summary collapse
-
#files_to_delete ⇒ Object
Returns the value of attribute files_to_delete.
-
#parent ⇒ Object
Returns the value of attribute parent.
Instance Method Summary collapse
- #add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) ⇒ Object
- #add_document_suffix(identifier, doc) ⇒ Object
- #add_section_split ⇒ Object
- #add_section_split_attachments(manifest, ident) ⇒ Object
- #add_section_split_cover(manifest, sectionsplit_manifest, ident) ⇒ Object
- #add_section_split_instance(file, manifest, key, idx) ⇒ Object
- #add_section_split_instance_prep(file, key) ⇒ Object
- #anchors_lookup(anchors) ⇒ Object
- #bibdata_extract(xml) ⇒ Object
- #bibdata_process(entry, ident) ⇒ Object
- #bibitem_process(entry) ⇒ Object
- #cleanup_section_split_instance(key, manifest, original_out_path) ⇒ Object
- #derive_format(entry, parent) ⇒ Object
- #each ⇒ Object
- #each_with_index ⇒ Object
-
#file_entry(ref, identifier, idx) ⇒ Object
ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp idx is the index of the document in the manifest.
- #file_entry_copy(ref, ret) ⇒ Object
- #file_entry_paths(ref, idx, sso) ⇒ Object
-
#file_entry_struct(ref, abs) ⇒ Object
ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp extract_opts are the compilation options extracted as document attributes.
- #get(ident, attr = nil) ⇒ Object
-
#initialize(path, parent) ⇒ FileLookup
constructor
hash for each document in collection of document identifier to: document reference (fileref or id), type of document reference, and bibdata entry for that file.
- #key(ident) ⇒ Object
- #keys ⇒ Object
- #ns(xpath) ⇒ Object
- #one_doc_collection? ⇒ Boolean
-
#output_file_path(ref, idx) ⇒ Object
TODO make the output file location reflect source location universally, not just for attachments: no File.basename.
- #output_file_path_prep(ref, idx) ⇒ Object
-
#preserve_directory_structure?(ident) ⇒ Boolean
Check if we should preserve directory structure for an identifier Returns the custom filename if directory structure should be preserved, nil otherwise.
- #process_section_split_instance(key, manifest) ⇒ Object
-
#read_anchors(xml) ⇒ Object
map locality type and label (e.g. “clause” “1”) to id = anchor for a document Note: will only key clauses, which have unambiguous reference label in locality.
- #read_anchors1(key, val, ret) ⇒ Object
- #read_file(manifest, idx) ⇒ Object
- #read_file_idents(manifest) ⇒ Object
- #read_files(entries, parent, idx = 0) ⇒ Object
-
#read_ids(xml) ⇒ Object
Also parse all ids in doc (including ones which won’t be xref targets).
- #ref_file(ref, data, read, doc) ⇒ Object
-
#ref_file_xml2html(filename) ⇒ Object
Check if file has a recognized MIME type (other than XML) If so, don’t append .html (e.g., .svg, .png, .jpg, etc.) Only process if it doesn’t have a recognized non-XML extension If filename ends in .xml, replace with .html Otherwise (including sectionsplit files like “file.xml.0” or custom titles), append .html.
- #section_split_instance_threads(s, manifest, key) ⇒ Object
- #sectionsplit(ident) ⇒ Object
- #set(ident, attr, value) ⇒ Object
-
#substitute_filename_pattern(pattern, options = {}) ⇒ Object
Substitute special strings in filename patterns.
-
#targetfile(data, options) ⇒ Array<String, nil>
return file contents + output filename for each file in the collection, given a docref entry so my URL should end with html or pdf or whatever formed relative to YAML file, not input path, relative to calling function.
- #targetfile_id(ident, options) ⇒ Object
-
#url(ident, options) ⇒ Object
return citation url for file so my URL should end with html or pdf or whatever.
-
#url?(ident) ⇒ Boolean
are references to the file to be linked to a file in the collection, or externally? Determines whether file suffix anchors are to be used.
- #xml_file(id, read) ⇒ Object
Constructor Details
#initialize(path, parent) ⇒ FileLookup
hash for each document in collection of document identifier to: document reference (fileref or id), type of document reference, and bibdata entry for that file
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 18 def initialize(path, parent) @c = HTMLEntities.new @files = {} @parent = parent @xml = parent.xml @isodoc = parent.isodoc @isodoc_presxml = parent.isodoc_presxml @path = path @compile = parent.compile @documents = parent.documents @files_to_delete = [] @disambig = Util::DisambigFiles.new @manifest = parent.manifest read_files(@manifest.entry, parent.manifest) end |
Instance Attribute Details
#files_to_delete ⇒ Object
Returns the value of attribute files_to_delete.
12 13 14 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12 def files_to_delete @files_to_delete end |
#parent ⇒ Object
Returns the value of attribute parent.
12 13 14 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12 def parent @parent end |
Instance Method Details
#add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) ⇒ Object
72 73 74 75 76 77 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 72 def add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) idx = File.join(File.dirname(sectionsplit_manifest.file), "index.html") FileUtils.cp entry[:ref], idx manifest["#{key}:index1.html"] = entry.merge(out_path: "index.html", ref: idx) end |
#add_document_suffix(identifier, doc) ⇒ Object
234 235 236 237 238 239 240 241 242 243 244 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 234 def add_document_suffix(identifier, doc) document_suffix = Metanorma::Utils::to_ncname(identifier) ids = doc.xpath("./@id | .//@id").map(&:value) Util::anchor_id_attributes.each do |(tag_name, attr_name)| Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name, @isodoc) end Util::url_in_css_styles(doc, ids, document_suffix) doc.root["document_suffix"] ||= "" doc.root["document_suffix"] += document_suffix end |
#add_section_split ⇒ Object
6 7 8 9 10 11 12 13 14 15 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 6 def add_section_split ret = @files.keys.each_with_object({}) do |k, m| if @files[k][:sectionsplit] && !@files[k][:attachment] original_out_path = process_section_split_instance(k, m) cleanup_section_split_instance(k, m, original_out_path) end m[k] = @files[k] end @files = ret end |
#add_section_split_attachments(manifest, ident) ⇒ Object
89 90 91 92 93 94 95 96 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 89 def (manifest, ident) = @sectionsplit .(out: File.dirname(manifest.file)) or return @files[ident][:out_path] = { attachment: true, index: false, out_path: , ref: File.join(File.dirname(manifest.file), ) } end |
#add_section_split_cover(manifest, sectionsplit_manifest, ident) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 59 def add_section_split_cover(manifest, sectionsplit_manifest, ident) cover = @sectionsplit .section_split_cover(sectionsplit_manifest, @parent.dir_name_cleanse(ident), one_doc_collection?) @files[ident][:out_path] = cover src = File.join(File.dirname(sectionsplit_manifest.file), cover) m = { attachment: true, index: false, out_path: cover, ref: src } manifest["#{ident}:index.html"] = m one_doc_collection? and add_cover_one_doc_coll(manifest, sectionsplit_manifest, ident, m) end |
#add_section_split_instance(file, manifest, key, idx) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 98 def add_section_split_instance(file, manifest, key, idx) presfile, newkey, xml = add_section_split_instance_prep(file, key) anchors = read_anchors(xml) # Preserve directory structure in out_path if parent has custom sectionsplit_filename with directory sectionsplit_fname = @files[key][:sectionsplit_filename] # file[:url] from sectionsplit.rb already has placeholders substituted and includes full path # Use it directly for out_path (without .xml extension) base_filename = File.basename(file[:url], ".xml") # Get the directory from file[:url] which already has placeholders substituted file_dir = File.dirname(file[:url]) # If file[:url] has a directory (i.e., placeholders were substituted), use it out_path_value = if file_dir == "." base_filename else File.join(file_dir, base_filename) end m = { parentid: key, presentationxml: true, type: "fileref", rel_path: out_path_value, out_path: out_path_value, anchors: anchors, anchors_lookup: anchors_lookup(anchors), ids: read_ids(xml), format: @files[key][:format], sectionsplit_output: true, indirect_key: @sectionsplit.key, bibdata: @files[key][:bibdata], ref: presfile, sectionsplit_filename: sectionsplit_fname, idx: @files[key][:idx] } m[:bare] = true unless idx.zero? manifest[newkey] = m # Don't delete split output files - we want to keep them! # The original parent HTML file is deleted in cleanup_section_split_instance end |
#add_section_split_instance_prep(file, key) ⇒ Object
132 133 134 135 136 137 138 139 140 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 132 def add_section_split_instance_prep(file, key) # XML files are always stored flat in the _files directory (no subdirectories) # file[:url] contains full path with directory for HTML output, but XML is basename only xml_basename = File.basename(file[:url]) presfile = File.join(File.dirname(@files[key][:ref]), xml_basename) newkey = key("#{key.strip} #{file[:title]}") xml = Nokogiri::XML(File.read(presfile), &:huge) [presfile, newkey, xml] end |
#anchors_lookup(anchors) ⇒ Object
40 41 42 43 44 |
# File 'lib/metanorma/collection/filelookup/utils.rb', line 40 def anchors_lookup(anchors) anchors.values.each_with_object({}) do |v, m| v.each_value { |v1| m[v1] = true } end end |
#bibdata_extract(xml) ⇒ Object
84 85 86 87 88 89 90 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 84 def bibdata_extract(xml) anchors = read_anchors(xml) { anchors: anchors, anchors_lookup: anchors_lookup(anchors), ids: read_ids(xml), bibdata: xml.at(ns("//bibdata")), document_suffix: xml.root["document_suffix"] } end |
#bibdata_process(entry, ident) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 72 def bibdata_process(entry, ident) if entry[:attachment] entry[:bibdata] = Metanorma::Collection::Document.(ident).root else file, _filename = targetfile(entry, read: true) xml = Nokogiri::XML(file, &:huge) add_document_suffix(ident, xml) entry.merge!(bibdata_extract(xml)) end end |
#bibitem_process(entry) ⇒ Object
92 93 94 95 96 97 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 92 def bibitem_process(entry) entry[:bibitem] = entry[:bibdata].dup entry[:bibitem].name = "bibitem" entry[:bibitem]["hidden"] = "true" entry[:bibitem].at("./*[local-name() = 'ext']")&.remove end |
#cleanup_section_split_instance(key, manifest, original_out_path) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 44 def cleanup_section_split_instance(key, manifest, original_out_path) # Delete the sectionsplit index.html from source directory after it's copied to output @files_to_delete << manifest["#{key}:index.html"][:ref] # Delete the original files when sectionsplit happens (all formats: html, xml, presentation.xml) # Use the saved original out_path (before it was changed to index.html) if original_out_path base = File.join(@parent.outdir, original_out_path.sub(/\.xml$/, "")) @files_to_delete << "#{base}.html" @files_to_delete << "#{base}.xml" @files_to_delete << "#{base}.presentation.xml" end # @files[key].delete(:ids).delete(:anchors) @files[key][:indirect_key] = @sectionsplit.key end |
#derive_format(entry, parent) ⇒ Object
46 47 48 49 50 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 46 def derive_format(entry, parent) entry. and return entry.format ||= parent.format || %w(xml presentation html) entry.format |= ["xml", "presentation"] end |
#each ⇒ Object
30 31 32 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 30 def each @files.each end |
#each_with_index ⇒ Object
34 35 36 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 34 def each_with_index @files.each_with_index end |
#file_entry(ref, identifier, idx) ⇒ Object
ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp idx is the index of the document in the manifest
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 105 def file_entry(ref, identifier, idx) ref.file or return abs = @documents[Util::key identifier].file # For sectionsplit outputs from YAML manifest, we need to compute the full path # by combining sectionsplit_filename directory with ref.file basename sso = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output out_path, rel_path = file_entry_paths(ref, idx, sso) ret = if ref.file { type: "fileref", ref: abs, rel_path: rel_path, url: ref.url, out_path: out_path, idx: idx, output_filename: ref.output_filename, sectionsplit_filename: ref.sectionsplit_filename, pdffile: ref.pdffile, format: ref.format&.map(&:to_sym) } .compact else { type: "id", ref: ref.id } end file_entry_copy(ref, ret) ret.compact end |
#file_entry_copy(ref, ret) ⇒ Object
225 226 227 228 229 230 231 232 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 225 def file_entry_copy(ref, ret) %w(attachment sectionsplit index presentation-xml url bare-after-first output_filename sectionsplit_filename sectionsplit_output).each do |s| ref.respond_to?(s.to_sym) and ret[s.delete("-").to_sym] = ref.send(s) end end |
#file_entry_paths(ref, idx, sso) ⇒ Object
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 142 def file_entry_paths(ref, idx, sso) base = File.basename(ref.file, ".xml") if sso && ref.respond_to?(:sectionsplit_filename) && ref.sectionsplit_filename # Extract directory from sectionsplit_filename dir = File.dirname(ref.sectionsplit_filename) if dir == "." # No directory in pattern [output_file_path(ref, idx), ref.file] else # Pattern has directory, prepend it full_path = File.join(dir, base) [full_path, "#{full_path}.xml"] end else [output_file_path(ref, idx), ref.file] end end |
#file_entry_struct(ref, abs) ⇒ Object
ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp extract_opts are the compilation options extracted as document attributes
132 133 134 135 136 137 138 139 140 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 132 def file_entry_struct(ref, abs) adoc = abs.sub(/\.xml$/, ".adoc") if adoc.end_with?(".adoc") && File.exist?(adoc) opts = Metanorma::Input::Asciidoc.new.(File.read(adoc)) end { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url, out_path: output_file_path(ref), pdffile: ref.pdffile, format: ref.format&.map(&:to_sym), extract_opts: opts }.compact end |
#get(ident, attr = nil) ⇒ Object
20 21 22 23 24 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 20 def get(ident, attr = nil) if attr then @files[key(ident)][attr] else @files[key(ident)] end end |
#key(ident) ⇒ Object
11 12 13 14 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 11 def key(ident) @c.decode(ident).gsub(/(\p{Zs})+/, " ") .sub(/^metanorma-collection /, "") end |
#keys ⇒ Object
16 17 18 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 16 def keys @files.keys end |
#ns(xpath) ⇒ Object
38 39 40 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 38 def ns(xpath) @isodoc.ns(xpath) end |
#one_doc_collection? ⇒ Boolean
79 80 81 82 83 84 85 86 87 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 79 def one_doc_collection? docs = 0 @files.each_value do |v| v[:attachment] and next v[:presentationxml] and next docs += 1 end docs <= 1 end |
#output_file_path(ref, idx) ⇒ Object
TODO make the output file location reflect source location universally, not just for attachments: no File.basename
For files with custom directory structure, construct path with directory For files with output_filename, use that (with substitutions) For others, use basename of ref.file
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 184 def output_file_path(ref, idx) has_custom_dir, file_has_dir, params = output_file_path_prep(ref, idx) # Apply sectionsplit_filename directory structure if: # 1. File has sectionsplit enabled (parent document being split), OR # 2. File is a sectionsplit output (from collection or single-file sectionsplit) # Regular files that inherit sectionsplit_filename from collection level # but are not sectionsplit outputs should NOT use it is_sectionsplit_output = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output use_sectionsplit_dir = ref.sectionsplit_filename && has_custom_dir && (ref.sectionsplit || is_sectionsplit_output || file_has_dir) f = if use_sectionsplit_dir # For sectionsplit outputs, return just the basename # The directory will be applied during file_compile_format # via preserve_directory_structure? File.basename(ref.file) elsif ref.output_filename substitute_filename_pattern(ref.output_filename, **params) elsif file_has_dir ref.file # Preserve directory structure already in ref.file elsif ref. ref.file else File.basename(ref.file) end ret = @disambig.source2dest_filename(f, preserve_dirs: ref.) warn ret ret end |
#output_file_path_prep(ref, idx) ⇒ Object
212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 212 def output_file_path_prep(ref, idx) b = File.basename(ref.file) b_no_ext = File.basename(ref.file, ".*") # Check for sectionsplit_filename (for both parent and split output files) # or output_filename custom_filename = ref.sectionsplit_filename || ref.output_filename has_custom_dir = custom_filename && File.dirname(custom_filename) != "." # Also check if ref.file itself contains a directory file_has_dir = File.dirname(ref.file) != "." params = { document_num: idx, basename: b_no_ext, basename_legacy: b } [has_custom_dir, file_has_dir, params] end |
#preserve_directory_structure?(ident) ⇒ Boolean
Check if we should preserve directory structure for an identifier Returns the custom filename if directory structure should be preserved, nil otherwise
356 357 358 359 360 361 362 363 364 365 366 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 356 def preserve_directory_structure?(ident) ret = if get(ident, :sectionsplit_output) # For sectionsplit outputs, use rel_path which has the directory get(ident, :rel_path) || get(ident, :out_path) elsif get(ident, :sectionsplit) get(ident, :sectionsplit_filename) else get(ident, :output_filename) end # Return the custom filename only if it contains a directory ret && File.dirname(ret) != "." ? ret : nil end |
#process_section_split_instance(key, manifest) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 17 def process_section_split_instance(key, manifest) # Save the original out_path before it gets modified original_out_path = @files[key][:out_path] s, sectionsplit_manifest = sectionsplit(key) # section_split_instance_threads(s, manifest, key) s.each_with_index do |f1, i| add_section_split_instance(f1, manifest, key, i) end a = (sectionsplit_manifest, key) and manifest["#{key}:attachments"] = a add_section_split_cover(manifest, sectionsplit_manifest, key) # Return the original path for cleanup original_out_path end |
#read_anchors(xml) ⇒ Object
map locality type and label (e.g. “clause” “1”) to id = anchor for a document Note: will only key clauses, which have unambiguous reference label in locality. Notes, examples etc with containers are just plunked against UUIDs, so that their IDs can at least be registered to be tracked as existing.
20 21 22 23 24 25 26 27 |
# File 'lib/metanorma/collection/filelookup/utils.rb', line 20 def read_anchors(xml) xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n, { locale: @locale }) xrefs.parse xml xrefs.get.each_with_object({}) do |(k, v), ret| read_anchors1(k, v, ret) end end |
#read_anchors1(key, val, ret) ⇒ Object
29 30 31 32 33 34 35 36 37 38 |
# File 'lib/metanorma/collection/filelookup/utils.rb', line 29 def read_anchors1(key, val, ret) val[:type] ||= "clause" ret[val[:type]] ||= {} index = if val[:container] || val[:label].nil? || val[:label].empty? UUIDTools::UUID.random_create.to_s else val[:label].gsub(%r{<[^<>]+>}, "") end ret[val[:type]][index] = key v = val[:value] and ret[val[:type]][v.gsub(%r{<[^<>]+>}, "")] = key end |
#read_file(manifest, idx) ⇒ Object
52 53 54 55 56 57 58 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 52 def read_file(manifest, idx) i, k = read_file_idents(manifest) entry = file_entry(manifest, k, idx) or return bibdata_process(entry, i) bibitem_process(entry) @files[key(i)] = entry end |
#read_file_idents(manifest) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 60 def read_file_idents(manifest) id = manifest.identifier sanitised_id = key(@isodoc.docid_prefix("", manifest.identifier.dup)) # if manifest.bibdata and # NO, DO NOT FISH FOR THE GENUINE IDENTIFIER IN BIBDATA # d = manifest.bibdata.docidentifier.detect { |x| x.primary } || # manifest.bibdata.docidentifier.first # k = d.id # i = key(@isodoc.docid_prefix(d.type, d.id.dup)) # end [id, sanitised_id] end |
#read_files(entries, parent, idx = 0) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 34 def read_files(entries, parent, idx = 0) Array(entries).each do |e| derive_format(e, parent) if e.file read_file(e, idx) idx += 1 end idx = read_files(e.entry, e, idx) end idx end |
#read_ids(xml) ⇒ Object
Also parse all ids in doc (including ones which won’t be xref targets)
5 6 7 8 9 10 11 12 |
# File 'lib/metanorma/collection/filelookup/utils.rb', line 5 def read_ids(xml) ret = {} xml.traverse do |x| x.text? and next x["id"] and ret[x["id"]] = true end ret end |
#ref_file(ref, data, read, doc) ⇒ Object
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 277 def ref_file(ref, data, read, doc) file = File.read(ref, encoding: "utf-8") if read # Use the actual output path from :outputs if available (set after compilation) # Otherwise fall back to :out_path (set at initialization) filename = if doc && data[:outputs] && data[:outputs][:html] data[:outputs][:html].sub( %r{^#{Regexp.escape(@parent.outdir)}/}, "" ) else data[:out_path].dup end if doc && !data[:outputs] filename = ref_file_xml2html(filename) end [file, filename] end |
#ref_file_xml2html(filename) ⇒ Object
Check if file has a recognized MIME type (other than XML) If so, don’t append .html (e.g., .svg, .png, .jpg, etc.) Only process if it doesn’t have a recognized non-XML extension If filename ends in .xml, replace with .html Otherwise (including sectionsplit files like “file.xml.0” or custom titles), append .html
300 301 302 303 304 305 306 307 308 309 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 300 def ref_file_xml2html(filename) unless Util::mime_file_recognised?(filename) && !filename.end_with?(".xml") filename = if filename.end_with?(".xml") filename.sub(/\.xml$/, ".html") else "#{filename}.html" end end filename end |
#section_split_instance_threads(s, manifest, key) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 32 def section_split_instance_threads(s, manifest, key) @mutex = Mutex.new pool = Concurrent::FixedThreadPool.new(4) s.each_with_index do |f1, i| pool.post do add_section_split_instance(f1, manifest, key, i) end end pool.shutdown pool.wait_for_termination end |
#sectionsplit(ident) ⇒ Object
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 142 def sectionsplit(ident) file = @files[ident][:ref] # @base must always be just basename, never contain directory components # Directory structure comes from sectionsplit_filename pattern only base = File.basename(@files[ident][:out_path] || file) @sectionsplit = ::Metanorma::Collection::Sectionsplit .new(input: file, base: base, dir: File.dirname(file), output: @files[ident][:out_path], compile_opts: @parent., ident: ident, fileslookup: self, isodoc: @isodoc, parent_idx: @files[ident][:idx], sectionsplit_filename: @files[ident][:sectionsplit_filename], isodoc_presxml: @isodoc_presxml, document_suffix: @files[ident][:document_suffix]) coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] } xml = Nokogiri::XML(File.read(file, encoding: "UTF-8"), &:huge) [coll, @sectionsplit .collection_manifest(File.basename(file), coll, xml, nil, File.dirname(file))] end |
#set(ident, attr, value) ⇒ Object
26 27 28 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 26 def set(ident, attr, value) @files[key(ident)][attr] = value end |
#substitute_filename_pattern(pattern, options = {}) ⇒ Object
Substitute special strings in filename patterns
165 166 167 168 169 170 171 172 173 174 175 176 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 165 def substitute_filename_pattern(pattern, = {}) pattern or return pattern result = pattern.dup [:document_num] and result.gsub!(/\{document-num\}/, [:document_num].to_s) result.gsub!(/\{basename\}/, [:basename]) if [:basename] [:basename_legacy] and result.gsub!(/\{basename_legacy\}/, [:basename_legacy]) [:sectionsplit_num] and result.gsub!(/\{sectionsplit-num\}/, [:sectionsplit_num].to_s) result end |
#targetfile(data, options) ⇒ Array<String, nil>
return file contents + output filename for each file in the collection, given a docref entry so my URL should end with html or pdf or whatever formed relative to YAML file, not input path, relative to calling function
263 264 265 266 267 268 269 270 271 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 263 def targetfile(data, ) = { read: false, doc: true, relative: false }.merge() path = [:relative] ? data[:rel_path] : data[:ref] if data[:type] == "fileref" ref_file path, data, [:read], [:doc] else xml_file data[:id], [:read] end end |
#targetfile_id(ident, options) ⇒ Object
273 274 275 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 273 def targetfile_id(ident, ) targetfile(get(ident), ) end |
#url(ident, options) ⇒ Object
return citation url for file so my URL should end with html or pdf or whatever
49 50 51 52 |
# File 'lib/metanorma/collection/filelookup/utils.rb', line 49 def url(ident, ) data = get(ident) data[:url] || targetfile(data, )[1] end |
#url?(ident) ⇒ Boolean
are references to the file to be linked to a file in the collection, or externally? Determines whether file suffix anchors are to be used
6 7 8 9 |
# File 'lib/metanorma/collection/filelookup/base.rb', line 6 def url?(ident) data = get(ident) or return false data[:url] end |
#xml_file(id, read) ⇒ Object
311 312 313 314 315 |
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 311 def xml_file(id, read) file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read filename = "#{id}.html" [file, filename] end |