Module: Metanorma::Standoc::Validate
- Included in:
- Converter
- Defined in:
- lib/metanorma/standoc/validate.rb,
lib/metanorma/standoc/validate_term.rb,
lib/metanorma/standoc/validate_table.rb,
lib/metanorma/standoc/validate_section.rb
Constant Summary collapse
- MATHML_NS =
"http://www.w3.org/1998/Math/MathML".freeze
- SVG_NS =
"http://www.w3.org/2000/svg".freeze
- WILDCARD_ATTRS =
"//*[@format] | //stem | //bibdata//description | " \ "//formattedref | //bibdata//note | //bibdata/abstract | " \ "//bibitem/abstract | //bibitem/note | //metanorma-extension".freeze
- TOO_BIG_IMG_ERR =
<<~ERR.freeze Image too large for Data URI encoding: disable Data URI encoding (`:data-uri-image: false`), or set `:data-uri-maxsize: 0` ERR
- SOURCELOCALITY =
"./origin//locality[@type = 'clause']/" \ "referenceFrom".freeze
Instance Method Summary collapse
- #asset_style(root) ⇒ Object
- #asset_title_style(root) ⇒ Object
- #callouts_error(elem, callouts, annotations) ⇒ Object
- #concept_validate(doc, tag, refterm) ⇒ Object
- #concept_validate_ids(doc) ⇒ Object
- #concept_validate_msg(_doc, tag, refterm, xref) ⇒ Object
- #content_validate(doc) ⇒ Object
- #empty_table_validate(doc) ⇒ Object
- #expand_path(loc) ⇒ Object
-
#formattedstr_strip(doc) ⇒ Object
RelaxNG cannot cope well with wildcard attributes.
- #hanging_para_style(root) ⇒ Object
- #iev_validate(xmldoc) ⇒ Object
- #iev_validate1(term, loc, xmldoc) ⇒ Object
- #image_exists(doc) ⇒ Object
- #image_toobig(doc) ⇒ Object
- #image_validate(doc) ⇒ Object
- #init_iev ⇒ Object
- #math_validate(doc) ⇒ Object
- #math_validate_error(math, elem, error) ⇒ Object
- #mathml_sanitise(math) ⇒ Object
- #max_td_count(table) ⇒ Object
-
#maxcols_check(col, maxcols, tcell) ⇒ Object
if maxcols or maxrows negative, do not check them.
-
#maxcols_validate1(tcell, row, curr, cells2d, maxcols, mode) ⇒ Object
code doesn’t actually do anything, since Asciidoctor refuses to generate table with inconsistent column count.
- #maxrowcols_validate(table, maxcols, mode: "row_cols") ⇒ Object
- #maxrowcols_validate0(table, maxcols, tablechild, mode) ⇒ Object
- #maxrows_validate(table, cells2d, tablechild, mode) ⇒ Object
- #nested_asset_report(outer, inner, doc) ⇒ Object
- #nested_asset_validate(doc) ⇒ Object
- #nested_asset_validate_basic(doc) ⇒ Object
- #nested_asset_xref_report(outer, inner, _doc) ⇒ Object
- #nested_note_validate(doc) ⇒ Object
- #norm_ref_validate(doc) ⇒ Object
- #png_validate(doc) ⇒ Object
- #png_validate1(img, buffer) ⇒ Object
- #preferred_validate(doc) ⇒ Object
- #preferred_validate_report(terms) ⇒ Object
- #repeat_id_validate(doc) ⇒ Object
- #repeat_id_validate1(elem) ⇒ Object
- #schema_validate(doc, schema) ⇒ Object
- #schema_validate1(file, doc, schema) ⇒ Object
- #section_validate(doc) ⇒ Object
- #sourcecode_style(root) ⇒ Object
- #style_warning(node, msg, text = nil) ⇒ Object
- #table_tracker_update(cells2d, row, curr, rowspan, colspan) ⇒ Object
- #table_validate(doc) ⇒ Object
- #validate(doc) ⇒ Object
-
#xref_validate(doc) ⇒ Object
manually check for xref/@target, xref/@to integrity.
Instance Method Details
#asset_style(root) ⇒ Object
43 44 45 |
# File 'lib/metanorma/standoc/validate_section.rb', line 43 def asset_style(root) asset_title_style(root) end |
#asset_title_style(root) ⇒ Object
34 35 36 37 38 39 40 41 |
# File 'lib/metanorma/standoc/validate_section.rb', line 34 def asset_title_style(root) root.xpath("//figure[image][not(name)]").each do |node| style_warning(node, "Figure should have title", nil) end root.xpath("//table[not(name)]").each do |node| style_warning(node, "Table should have title", nil) end end |
#callouts_error(elem, callouts, annotations) ⇒ Object
20 21 22 23 24 25 26 |
# File 'lib/metanorma/standoc/validate_section.rb', line 20 def callouts_error(elem, callouts, annotations) if callouts.size != annotations.size && !annotations.empty? err = "mismatch of callouts (#{callouts.size}) and annotations " \ "(#{annotations.size})" @log.add("Crossreferences", elem, err, severity: 0) end end |
#concept_validate(doc, tag, refterm) ⇒ Object
36 37 38 39 40 41 42 43 |
# File 'lib/metanorma/standoc/validate_term.rb', line 36 def concept_validate(doc, tag, refterm) concept_validate_ids(doc) doc.xpath("//#{tag}/xref").each do |x| @concept_ids[x["target"]] and next @log.add("Anchors", x, concept_validate_msg(doc, tag, refterm, x), severity: 0) end end |
#concept_validate_ids(doc) ⇒ Object
45 46 47 48 49 50 51 |
# File 'lib/metanorma/standoc/validate_term.rb', line 45 def concept_validate_ids(doc) @concept_ids ||= doc.xpath("//term | //definitions//dt") .each_with_object({}) { |x, m| m[x["id"]] = true } @concept_terms_tags ||= doc.xpath("//terms") .each_with_object({}) { |t, m| m[t["id"]] = true } nil end |
#concept_validate_msg(_doc, tag, refterm, xref) ⇒ Object
53 54 55 56 57 58 59 60 61 62 |
# File 'lib/metanorma/standoc/validate_term.rb', line 53 def concept_validate_msg(_doc, tag, refterm, xref) ret = <<~LOG #{tag.capitalize} #{xref.at("../#{refterm}")&.text} is pointing to #{xref['target']}, which is not a term or symbol LOG if @concept_terms_tags[xref["target"]] ret = ret.strip ret += ". Did you mean to point to a subterm?" end ret end |
#content_validate(doc) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/metanorma/standoc/validate.rb', line 13 def content_validate(doc) @doctype = doc.at("//bibdata/ext/doctype")&.text repeat_id_validate(doc.root) # feeds xref_validate xref_validate(doc) # feeds nested_asset_validate nested_asset_validate(doc) section_validate(doc) norm_ref_validate(doc) iev_validate(doc.root) concept_validate(doc, "concept", "refterm") concept_validate(doc, "related", "preferred//name") preferred_validate(doc) table_validate(doc) requirement_validate(doc) image_validate(doc) math_validate(doc) fatalerrors = @log. fatalerrors.empty? or clean_abort(fatalerrors.join("\n"), doc) end |
#empty_table_validate(doc) ⇒ Object
17 18 19 20 21 |
# File 'lib/metanorma/standoc/validate_table.rb', line 17 def empty_table_validate(doc) doc.xpath("//table[not(.//tr)]").each do |t| @log.add("Table", t, "Empty table", severity: 0) end end |
#expand_path(loc) ⇒ Object
159 160 161 162 163 164 |
# File 'lib/metanorma/standoc/validate.rb', line 159 def (loc) relative_path = File.join(@localdir, loc) [loc, relative_path].detect do |p| File.exist?(p) ? p : nil end end |
#formattedstr_strip(doc) ⇒ Object
RelaxNG cannot cope well with wildcard attributes. So we strip any attributes from FormattedString instances (which can contain xs:any markup, and are signalled with @format) before validation.
131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/metanorma/standoc/validate.rb', line 131 def formattedstr_strip(doc) doc.xpath(WILDCARD_ATTRS, "m" => SVG_NS).each do |n| n.elements.each do |e| e.traverse do |e1| e1.element? and e1.each { |k, _v| e1.delete(k) } # rubocop:disable Style/HashEachMethods end end end doc.xpath("//m:svg", "m" => SVG_NS).each { |n| n.replace("<svg/>") } doc end |
#hanging_para_style(root) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/metanorma/standoc/validate_section.rb', line 47 def hanging_para_style(root) root.xpath("//clause | //annex | //foreword | //introduction | " \ "//acknowledgements").each do |c| next unless c.at("./clause") next if c.elements.reject do |n| %w(clause title).include? n.name end.empty? style_warning(c, "Hanging paragraph in clause") end end |
#iev_validate(xmldoc) ⇒ Object
14 15 16 17 18 19 20 21 22 23 |
# File 'lib/metanorma/standoc/validate_term.rb', line 14 def iev_validate(xmldoc) @iev = init_iev or return xmldoc.xpath("//term").each do |t| t.xpath(".//termsource").each do |src| (/^IEC[ Â ]60050-/.match(src.at("./origin/@citeas")&.text) && loc = src.xpath(SOURCELOCALITY)&.text) or next iev_validate1(t, loc, xmldoc) end end end |
#iev_validate1(term, loc, xmldoc) ⇒ Object
25 26 27 28 29 30 31 32 33 34 |
# File 'lib/metanorma/standoc/validate_term.rb', line 25 def iev_validate1(term, loc, xmldoc) iev = @iev.fetch(loc, xmldoc.at("//language")&.text || "en") or return pref = term.xpath("./preferred//name").inject([]) do |m, x| m << x.text&.downcase end pref.include?(iev.downcase) or @log.add("Bibliography", term, %(Term "#{pref[0]}" does not match ) + %(IEV #{loc} "#{iev}"), severity: 1) end |
#image_exists(doc) ⇒ Object
149 150 151 152 153 154 155 156 157 |
# File 'lib/metanorma/standoc/validate.rb', line 149 def image_exists(doc) doc.xpath("//image").each do |i| Vectory::Utils::url?(i["src"]) and next Vectory::Utils::datauri?(i["src"]) and next (i["src"]) and next @log.add("Images", i.parent, "Image not found: #{i['src']}", severity: 0) end end |
#image_toobig(doc) ⇒ Object
190 191 192 193 194 195 196 |
# File 'lib/metanorma/standoc/validate.rb', line 190 def image_toobig(doc) @dataurimaxsize.zero? and return doc.xpath("//image").each do |i| i["src"].size > @dataurimaxsize and @log.add("Images", i.parent, TOO_BIG_IMG_ERR, severity: 0) end end |
#image_validate(doc) ⇒ Object
143 144 145 146 147 |
# File 'lib/metanorma/standoc/validate.rb', line 143 def image_validate(doc) image_exists(doc) image_toobig(doc) png_validate(doc) end |
#init_iev ⇒ Object
7 8 9 10 11 12 |
# File 'lib/metanorma/standoc/validate_term.rb', line 7 def init_iev @no_isobib and return nil @iev and return @iev @iev = Iev::Db.new(@iev_globalname, @iev_localname) unless @no_isobib @iev end |
#math_validate(doc) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/metanorma/standoc/validate.rb', line 35 def math_validate(doc) doc.xpath("//m:math", "m" => MATHML_NS).each do |m| if m.parent["validate"] == "false" m.parent.delete("validate") else math = mathml_sanitise(m.dup) Plurimath::Math.parse(math, "mathml").to_mathml end rescue StandardError => e math_validate_error(math, m, e) end end |
#math_validate_error(math, elem, error) ⇒ Object
53 54 55 56 57 58 59 60 61 |
# File 'lib/metanorma/standoc/validate.rb', line 53 def math_validate_error(math, elem, error) a = elem.parent.at("./asciimath") l = elem.parent.at("./latexmath") orig = "" a and orig += "\n\tAsciimath original: #{@c.decode(a.children.to_xml)}" l and orig += "\n\tLatexmath original: #{@c.decode(l.children.to_xml)}" @log.add("Maths", elem, "Invalid MathML: #{math}\n #{error}#{orig}", severity: 0) end |
#mathml_sanitise(math) ⇒ Object
48 49 50 51 |
# File 'lib/metanorma/standoc/validate.rb', line 48 def mathml_sanitise(math) math.to_xml(encoding: "US-ASCII").gsub(/ xmlns=["'][^"']+["']/, "") .gsub(%r{<[^:/>]+:}, "<").gsub(%r{</[^:/>]+:}, "</") end |
#max_td_count(table) ⇒ Object
23 24 25 26 27 28 29 30 |
# File 'lib/metanorma/standoc/validate_table.rb', line 23 def max_td_count(table) max = 0 table.xpath("./tr").each do |tr| n = tr.xpath("./td | ./th").size max < n and max = n end max end |
#maxcols_check(col, maxcols, tcell) ⇒ Object
if maxcols or maxrows negative, do not check them
87 88 89 90 91 92 |
# File 'lib/metanorma/standoc/validate_table.rb', line 87 def maxcols_check(col, maxcols, tcell) if maxcols.positive? && col > maxcols @log.add("Table", tcell, "Table exceeds maximum number of columns "\ "defined (#{maxcols})", severity: 0) end end |
#maxcols_validate1(tcell, row, curr, cells2d, maxcols, mode) ⇒ Object
code doesn’t actually do anything, since Asciidoctor refuses to generate table with inconsistent column count
57 58 59 60 61 62 63 |
# File 'lib/metanorma/standoc/validate_table.rb', line 57 def maxcols_validate1(tcell, row, curr, cells2d, maxcols, mode) rs = tcell&.attr("rowspan")&.to_i || 1 cs = tcell&.attr("colspan")&.to_i || 1 curr = table_tracker_update(cells2d, row, curr, rs, cs) maxcols_check(curr + cs - 1, maxcols, tcell) if mode == "row_cols" curr + cs end |
#maxrowcols_validate(table, maxcols, mode: "row_cols") ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/metanorma/standoc/validate_table.rb', line 32 def maxrowcols_validate(table, maxcols, mode: "row_cols") case mode when "row_cols" maxrowcols_validate0(table, maxcols, "*", mode) when "thead_row" %w{thead tbody tfoot}.each do |w| maxrowcols_validate0(table, maxcols, w, mode) end end end |
#maxrowcols_validate0(table, maxcols, tablechild, mode) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/metanorma/standoc/validate_table.rb', line 43 def maxrowcols_validate0(table, maxcols, tablechild, mode) cells2d = table.xpath("./#{tablechild}/tr") .each_with_object([]) { |_r, m| m << {} } table.xpath("./#{tablechild}/tr").each_with_index do |tr, r| curr = 0 tr.xpath("./td | ./th").each do |td| curr = maxcols_validate1(td, r, curr, cells2d, maxcols, mode) end end maxrows_validate(table, cells2d, tablechild, mode) end |
#maxrows_validate(table, cells2d, tablechild, mode) ⇒ Object
77 78 79 80 81 82 83 84 |
# File 'lib/metanorma/standoc/validate_table.rb', line 77 def maxrows_validate(table, cells2d, tablechild, mode) err = "are inconsistent" mode == "thead_row" and err = "cannot go outside #{tablechild}" err = "Table rows in table #{err}: check rowspan" if cells2d.any? { |x| x.size != cells2d.first.size } @log.add("Table", table, err, severity: 0) end end |
#nested_asset_report(outer, inner, doc) ⇒ Object
85 86 87 88 89 90 91 |
# File 'lib/metanorma/standoc/validate.rb', line 85 def nested_asset_report(outer, inner, doc) outer.name == "figure" && inner.name == "figure" and return err = "There is an instance of #{inner.name} nested within #{outer.name}" @log.add("Style", inner, err) nested_asset_xref_report(outer, inner, doc) end |
#nested_asset_validate(doc) ⇒ Object
63 64 65 66 |
# File 'lib/metanorma/standoc/validate.rb', line 63 def nested_asset_validate(doc) nested_asset_validate_basic(doc) nested_note_validate(doc) end |
#nested_asset_validate_basic(doc) ⇒ Object
68 69 70 71 72 73 74 75 |
# File 'lib/metanorma/standoc/validate.rb', line 68 def nested_asset_validate_basic(doc) a = "//example | //figure | //termnote | //termexample | //table" doc.xpath("#{a} | //note").each do |m| m.xpath(a.gsub(%r{//}, ".//")).each do |n| nested_asset_report(m, n, doc) end end end |
#nested_asset_xref_report(outer, inner, _doc) ⇒ Object
93 94 95 96 97 98 |
# File 'lib/metanorma/standoc/validate.rb', line 93 def nested_asset_xref_report(outer, inner, _doc) i = @doc_xrefs[inner["id"]] or return err2 = "There is a crossreference to an instance of #{inner.name} " \ "nested within #{outer.name}: #{i.to_xml}" @log.add("Style", i, err2) end |
#nested_note_validate(doc) ⇒ Object
77 78 79 80 81 82 83 |
# File 'lib/metanorma/standoc/validate.rb', line 77 def nested_note_validate(doc) doc.xpath("//termnote | //note").each do |m| m.xpath(".//note").each do |n| nested_asset_report(m, n, doc) end end end |
#norm_ref_validate(doc) ⇒ Object
59 60 61 62 63 64 65 66 |
# File 'lib/metanorma/standoc/validate_section.rb', line 59 def norm_ref_validate(doc) doc.xpath("//references[@normative = 'true']/bibitem").each do |b| docid = b.at("./docidentifier[@type = 'metanorma']") or next /^\[\d+\]$/.match?(docid.text) or next @log.add("Bibliography", b, "Numeric reference in normative references", severity: 1) end end |
#png_validate(doc) ⇒ Object
166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/metanorma/standoc/validate.rb', line 166 def png_validate(doc) doc.xpath("//image[@mimetype = 'image/png']").each do |i| Vectory::Utils::url?(i["src"]) and next decoded = if Vectory::Utils::datauri?(i["src"]) Vectory::Utils::decode_datauri(i["src"])[:data] else path = (i["src"]) or next File.binread(path) end png_validate1(i, decoded) end end |
#png_validate1(img, buffer) ⇒ Object
179 180 181 182 183 184 |
# File 'lib/metanorma/standoc/validate.rb', line 179 def png_validate1(img, buffer) PngCheck.check_buffer(buffer) rescue PngCheck::CorruptPngError => e @log.add("Images", img.parent, "Corrupt PNG image detected: #{e.}") end |
#preferred_validate(doc) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/metanorma/standoc/validate_term.rb', line 64 def preferred_validate(doc) ret = doc.xpath("//term").each_with_object({}) do |t, m| prefix = t.at("./domain")&.text t.xpath("./preferred//name").each do |n| ret = n.text prefix and ret = "<#{prefix}> #{ret}" m[ret] ||= [] m[ret] << t end end preferred_validate_report(ret) end |
#preferred_validate_report(terms) ⇒ Object
77 78 79 80 81 82 83 84 |
# File 'lib/metanorma/standoc/validate_term.rb', line 77 def preferred_validate_report(terms) terms.each do |k, v| v.size > 1 or next loc = v.map { |x| x["id"] }.join(", ") err = "Term #{k} occurs twice as preferred designation: #{loc}" @log.add("Terms", v.first, err, severity: 1) end end |
#repeat_id_validate(doc) ⇒ Object
213 214 215 216 217 218 |
# File 'lib/metanorma/standoc/validate.rb', line 213 def repeat_id_validate(doc) @doc_ids = {} doc.xpath("//*[@id]").each do |x| repeat_id_validate1(x) end end |
#repeat_id_validate1(elem) ⇒ Object
204 205 206 207 208 209 210 211 |
# File 'lib/metanorma/standoc/validate.rb', line 204 def repeat_id_validate1(elem) if @doc_ids[elem["id"]] @log.add("Anchors", elem, "Anchor #{elem['id']} has already been " \ "used at line #{@doc_ids[elem['id']]}", severity: 0) end @doc_ids[elem["id"]] = elem.line end |
#schema_validate(doc, schema) ⇒ Object
100 101 102 103 104 105 106 107 108 |
# File 'lib/metanorma/standoc/validate.rb', line 100 def schema_validate(doc, schema) Tempfile.open(["tmp", ".xml"], encoding: "UTF-8") do |f| schema_validate1(f, doc, schema) rescue Jing::Error => e clean_abort("Jing failed with error: #{e}", doc) ensure f.close! end end |
#schema_validate1(file, doc, schema) ⇒ Object
110 111 112 113 114 115 116 117 118 119 |
# File 'lib/metanorma/standoc/validate.rb', line 110 def schema_validate1(file, doc, schema) file.write(to_xml(doc)) file.close errors = Jing.new(schema, encoding: "UTF-8").validate(file.path) warn "Syntax Valid!" if errors.none? errors.each do |e| @log.add("Metanorma XML Syntax", "XML Line #{'%06d' % e[:line]}:#{e[:column]}", e[:message]) end end |
#section_validate(doc) ⇒ Object
6 7 8 9 10 |
# File 'lib/metanorma/standoc/validate_section.rb', line 6 def section_validate(doc) sourcecode_style(doc.root) hanging_para_style(doc.root) asset_style(doc.root) end |
#sourcecode_style(root) ⇒ Object
12 13 14 15 16 17 18 |
# File 'lib/metanorma/standoc/validate_section.rb', line 12 def sourcecode_style(root) root.xpath("//sourcecode").each do |x| callouts = x.elements.select { |e| e.name == "callout" } annotations = x.elements.select { |e| e.name == "annotation" } callouts_error(x, callouts, annotations) end end |
#style_warning(node, msg, text = nil) ⇒ Object
28 29 30 31 32 |
# File 'lib/metanorma/standoc/validate_section.rb', line 28 def style_warning(node, msg, text = nil) w = msg w += ": #{text}" if text @log.add("Style", node, w) end |
#table_tracker_update(cells2d, row, curr, rowspan, colspan) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/metanorma/standoc/validate_table.rb', line 65 def table_tracker_update(cells2d, row, curr, rowspan, colspan) cells2d[row] ||= {} while cells2d[row][curr] curr += 1 end (row..(row + rowspan - 1)).each do |y2| cells2d[y2] ||= {} (curr..(curr + colspan - 1)).each { |x2| cells2d[y2][x2] = 1 } end curr end |
#table_validate(doc) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 |
# File 'lib/metanorma/standoc/validate_table.rb', line 4 def table_validate(doc) empty_table_validate(doc) doc.xpath("//table[colgroup]").each do |t| maxrowcols_validate(t, t.xpath("./colgroup/col").size) end doc.xpath("//table[.//*[@colspan] | .//*[@rowspan]]").each do |t| maxrowcols_validate(t, max_td_count(t), mode: "row_cols") end doc.xpath("//table[.//*[@rowspan]]").each do |t| maxrowcols_validate(t, max_td_count(t), mode: "thead_row") end end |
#validate(doc) ⇒ Object
198 199 200 201 202 |
# File 'lib/metanorma/standoc/validate.rb', line 198 def validate(doc) content_validate(doc) schema_validate(formattedstr_strip(doc.dup), File.join(File.dirname(__FILE__), "isodoc-compile.rng")) end |
#xref_validate(doc) ⇒ Object
manually check for xref/@target, xref/@to integrity
221 222 223 224 225 226 227 228 229 |
# File 'lib/metanorma/standoc/validate.rb', line 221 def xref_validate(doc) @doc_xrefs = doc.xpath("//xref/@target | //xref/@to") .each_with_object({}) do |x, m| m[x.text] = x @doc_ids[x.text] and next @log.add("Anchors", x.parent, "Crossreference target #{x} is undefined", severity: 1) end end |