Module: Metanorma::Standoc::Validate

Included in:
Converter
Defined in:
lib/metanorma/standoc/validate.rb,
lib/metanorma/standoc/validate_term.rb,
lib/metanorma/standoc/validate_table.rb,
lib/metanorma/standoc/validate_section.rb

Constant Summary collapse

MATHML_NS =
"http://www.w3.org/1998/Math/MathML".freeze
SVG_NS =
"http://www.w3.org/2000/svg".freeze
WILDCARD_ATTRS =
"//*[@format] | //stem | //bibdata//description | " \
"//formattedref | //bibdata//note | //bibdata/abstract | " \
"//bibitem/abstract | //bibitem/note | //metanorma-extension".freeze
TOO_BIG_IMG_ERR =
<<~ERR.freeze
  Image too large for Data URI encoding: disable Data URI encoding (`:data-uri-image: false`), or set `:data-uri-maxsize: 0`
ERR
SOURCELOCALITY =
"./origin//locality[@type = 'clause']/" \
"referenceFrom".freeze

Instance Method Summary collapse

Instance Method Details

#asset_style(root) ⇒ Object



43
44
45
# File 'lib/metanorma/standoc/validate_section.rb', line 43

def asset_style(root)
  asset_title_style(root)
end

#asset_title_style(root) ⇒ Object



34
35
36
37
38
39
40
41
# File 'lib/metanorma/standoc/validate_section.rb', line 34

def asset_title_style(root)
  root.xpath("//figure[image][not(name)]").each do |node|
    style_warning(node, "Figure should have title", nil)
  end
  root.xpath("//table[not(name)]").each do |node|
    style_warning(node, "Table should have title", nil)
  end
end

#callouts_error(elem, callouts, annotations) ⇒ Object



20
21
22
23
24
25
26
# File 'lib/metanorma/standoc/validate_section.rb', line 20

def callouts_error(elem, callouts, annotations)
  if callouts.size != annotations.size && !annotations.empty?
    err = "mismatch of callouts (#{callouts.size}) and annotations " \
          "(#{annotations.size})"
    @log.add("Crossreferences", elem, err, severity: 0)
  end
end

#concept_validate(doc, tag, refterm) ⇒ Object



36
37
38
39
40
41
42
43
# File 'lib/metanorma/standoc/validate_term.rb', line 36

def concept_validate(doc, tag, refterm)
  concept_validate_ids(doc)
  doc.xpath("//#{tag}/xref").each do |x|
    @concept_ids[x["target"]] and next
    @log.add("Anchors", x, concept_validate_msg(doc, tag, refterm, x),
             severity: 0)
  end
end

#concept_validate_ids(doc) ⇒ Object



45
46
47
48
49
50
51
# File 'lib/metanorma/standoc/validate_term.rb', line 45

def concept_validate_ids(doc)
  @concept_ids ||= doc.xpath("//term | //definitions//dt")
    .each_with_object({}) { |x, m| m[x["id"]] = true }
  @concept_terms_tags ||= doc.xpath("//terms")
    .each_with_object({}) { |t, m| m[t["id"]] = true }
  nil
end

#concept_validate_msg(_doc, tag, refterm, xref) ⇒ Object



53
54
55
56
57
58
59
60
61
62
# File 'lib/metanorma/standoc/validate_term.rb', line 53

def concept_validate_msg(_doc, tag, refterm, xref)
  ret = <<~LOG
    #{tag.capitalize} #{xref.at("../#{refterm}")&.text} is pointing to #{xref['target']}, which is not a term or symbol
  LOG
  if @concept_terms_tags[xref["target"]]
    ret = ret.strip
    ret += ". Did you mean to point to a subterm?"
  end
  ret
end

#content_validate(doc) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/metanorma/standoc/validate.rb', line 13

def content_validate(doc)
  @doctype = doc.at("//bibdata/ext/doctype")&.text
  repeat_id_validate(doc.root) # feeds xref_validate
  xref_validate(doc) # feeds nested_asset_validate
  nested_asset_validate(doc)
  section_validate(doc)
  norm_ref_validate(doc)
  iev_validate(doc.root)
  concept_validate(doc, "concept", "refterm")
  concept_validate(doc, "related", "preferred//name")
  preferred_validate(doc)
  table_validate(doc)
  requirement_validate(doc)
  image_validate(doc)
  math_validate(doc)
  fatalerrors = @log.abort_messages
  fatalerrors.empty? or
    clean_abort(fatalerrors.join("\n"), doc)
end

#empty_table_validate(doc) ⇒ Object



17
18
19
20
21
# File 'lib/metanorma/standoc/validate_table.rb', line 17

def empty_table_validate(doc)
  doc.xpath("//table[not(.//tr)]").each do |t|
    @log.add("Table", t, "Empty table", severity: 0)
  end
end

#expand_path(loc) ⇒ Object



159
160
161
162
163
164
# File 'lib/metanorma/standoc/validate.rb', line 159

def expand_path(loc)
  relative_path = File.join(@localdir, loc)
  [loc, relative_path].detect do |p|
    File.exist?(p) ? p : nil
  end
end

#formattedstr_strip(doc) ⇒ Object

RelaxNG cannot cope well with wildcard attributes. So we strip any attributes from FormattedString instances (which can contain xs:any markup, and are signalled with @format) before validation.



131
132
133
134
135
136
137
138
139
140
141
# File 'lib/metanorma/standoc/validate.rb', line 131

def formattedstr_strip(doc)
  doc.xpath(WILDCARD_ATTRS, "m" => SVG_NS).each do |n|
    n.elements.each do |e|
      e.traverse do |e1|
        e1.element? and e1.each { |k, _v| e1.delete(k) } # rubocop:disable Style/HashEachMethods
      end
    end
  end
  doc.xpath("//m:svg", "m" => SVG_NS).each { |n| n.replace("<svg/>") }
  doc
end

#hanging_para_style(root) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/metanorma/standoc/validate_section.rb', line 47

def hanging_para_style(root)
  root.xpath("//clause | //annex | //foreword | //introduction | " \
             "//acknowledgements").each do |c|
    next unless c.at("./clause")
    next if c.elements.reject do |n|
              %w(clause title).include? n.name
            end.empty?

    style_warning(c, "Hanging paragraph in clause")
  end
end

#iev_validate(xmldoc) ⇒ Object



14
15
16
17
18
19
20
21
22
23
# File 'lib/metanorma/standoc/validate_term.rb', line 14

def iev_validate(xmldoc)
  @iev = init_iev or return
  xmldoc.xpath("//term").each do |t|
    t.xpath(".//termsource").each do |src|
      (/^IEC[  ]60050-/.match(src.at("./origin/@citeas")&.text) &&
    loc = src.xpath(SOURCELOCALITY)&.text) or next
      iev_validate1(t, loc, xmldoc)
    end
  end
end

#iev_validate1(term, loc, xmldoc) ⇒ Object



25
26
27
28
29
30
31
32
33
34
# File 'lib/metanorma/standoc/validate_term.rb', line 25

def iev_validate1(term, loc, xmldoc)
  iev = @iev.fetch(loc,
                   xmldoc.at("//language")&.text || "en") or return
  pref = term.xpath("./preferred//name").inject([]) do |m, x|
    m << x.text&.downcase
  end
  pref.include?(iev.downcase) or
    @log.add("Bibliography", term, %(Term "#{pref[0]}" does not match ) +
             %(IEV #{loc} "#{iev}"), severity: 1)
end

#image_exists(doc) ⇒ Object



149
150
151
152
153
154
155
156
157
# File 'lib/metanorma/standoc/validate.rb', line 149

def image_exists(doc)
  doc.xpath("//image").each do |i|
    Vectory::Utils::url?(i["src"]) and next
    Vectory::Utils::datauri?(i["src"]) and next
    expand_path(i["src"]) and next
    @log.add("Images", i.parent,
             "Image not found: #{i['src']}", severity: 0)
  end
end

#image_toobig(doc) ⇒ Object



190
191
192
193
194
195
196
# File 'lib/metanorma/standoc/validate.rb', line 190

def image_toobig(doc)
  @dataurimaxsize.zero? and return
  doc.xpath("//image").each do |i|
    i["src"].size > @dataurimaxsize and
      @log.add("Images", i.parent, TOO_BIG_IMG_ERR, severity: 0)
  end
end

#image_validate(doc) ⇒ Object



143
144
145
146
147
# File 'lib/metanorma/standoc/validate.rb', line 143

def image_validate(doc)
  image_exists(doc)
  image_toobig(doc)
  png_validate(doc)
end

#init_ievObject



7
8
9
10
11
12
# File 'lib/metanorma/standoc/validate_term.rb', line 7

def init_iev
  @no_isobib and return nil
  @iev and return @iev
  @iev = Iev::Db.new(@iev_globalname, @iev_localname) unless @no_isobib
  @iev
end

#math_validate(doc) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/metanorma/standoc/validate.rb', line 35

def math_validate(doc)
  doc.xpath("//m:math", "m" => MATHML_NS).each do |m|
    if m.parent["validate"] == "false"
      m.parent.delete("validate")
    else
      math = mathml_sanitise(m.dup)
      Plurimath::Math.parse(math, "mathml").to_mathml
    end
  rescue StandardError => e
    math_validate_error(math, m, e)
  end
end

#math_validate_error(math, elem, error) ⇒ Object



53
54
55
56
57
58
59
60
61
# File 'lib/metanorma/standoc/validate.rb', line 53

def math_validate_error(math, elem, error)
  a = elem.parent.at("./asciimath")
  l = elem.parent.at("./latexmath")
  orig = ""
  a and orig += "\n\tAsciimath original: #{@c.decode(a.children.to_xml)}"
  l and orig += "\n\tLatexmath original: #{@c.decode(l.children.to_xml)}"
  @log.add("Maths", elem,
           "Invalid MathML: #{math}\n #{error}#{orig}", severity: 0)
end

#mathml_sanitise(math) ⇒ Object



48
49
50
51
# File 'lib/metanorma/standoc/validate.rb', line 48

def mathml_sanitise(math)
  math.to_xml(encoding: "US-ASCII").gsub(/ xmlns=["'][^"']+["']/, "")
    .gsub(%r{<[^:/>]+:}, "<").gsub(%r{</[^:/>]+:}, "</")
end

#max_td_count(table) ⇒ Object



23
24
25
26
27
28
29
30
# File 'lib/metanorma/standoc/validate_table.rb', line 23

def max_td_count(table)
  max = 0
  table.xpath("./tr").each do |tr|
    n = tr.xpath("./td | ./th").size
    max < n and max = n
  end
  max
end

#maxcols_check(col, maxcols, tcell) ⇒ Object

if maxcols or maxrows negative, do not check them



87
88
89
90
91
92
# File 'lib/metanorma/standoc/validate_table.rb', line 87

def maxcols_check(col, maxcols, tcell)
  if maxcols.positive? && col > maxcols
    @log.add("Table", tcell, "Table exceeds maximum number of columns "\
                             "defined (#{maxcols})", severity: 0)
  end
end

#maxcols_validate1(tcell, row, curr, cells2d, maxcols, mode) ⇒ Object

code doesn’t actually do anything, since Asciidoctor refuses to generate table with inconsistent column count



57
58
59
60
61
62
63
# File 'lib/metanorma/standoc/validate_table.rb', line 57

def maxcols_validate1(tcell, row, curr, cells2d, maxcols, mode)
  rs = tcell&.attr("rowspan")&.to_i || 1
  cs = tcell&.attr("colspan")&.to_i || 1
  curr = table_tracker_update(cells2d, row, curr, rs, cs)
  maxcols_check(curr + cs - 1, maxcols, tcell) if mode == "row_cols"
  curr + cs
end

#maxrowcols_validate(table, maxcols, mode: "row_cols") ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/metanorma/standoc/validate_table.rb', line 32

def maxrowcols_validate(table, maxcols, mode: "row_cols")
  case mode
  when "row_cols"
    maxrowcols_validate0(table, maxcols, "*", mode)
  when "thead_row"
    %w{thead tbody tfoot}.each do |w|
      maxrowcols_validate0(table, maxcols, w, mode)
    end
  end
end

#maxrowcols_validate0(table, maxcols, tablechild, mode) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
# File 'lib/metanorma/standoc/validate_table.rb', line 43

def maxrowcols_validate0(table, maxcols, tablechild, mode)
  cells2d = table.xpath("./#{tablechild}/tr")
    .each_with_object([]) { |_r, m| m << {} }
  table.xpath("./#{tablechild}/tr").each_with_index do |tr, r|
    curr = 0
    tr.xpath("./td | ./th").each do |td|
      curr = maxcols_validate1(td, r, curr, cells2d, maxcols, mode)
    end
  end
  maxrows_validate(table, cells2d, tablechild, mode)
end

#maxrows_validate(table, cells2d, tablechild, mode) ⇒ Object



77
78
79
80
81
82
83
84
# File 'lib/metanorma/standoc/validate_table.rb', line 77

def maxrows_validate(table, cells2d, tablechild, mode)
  err = "are inconsistent"
  mode == "thead_row" and err = "cannot go outside #{tablechild}"
  err = "Table rows in table #{err}: check rowspan"
  if cells2d.any? { |x| x.size != cells2d.first.size }
    @log.add("Table", table, err, severity: 0)
  end
end

#nested_asset_report(outer, inner, doc) ⇒ Object



85
86
87
88
89
90
91
# File 'lib/metanorma/standoc/validate.rb', line 85

def nested_asset_report(outer, inner, doc)
  outer.name == "figure" && inner.name == "figure" and return
  err =
    "There is an instance of #{inner.name} nested within #{outer.name}"
  @log.add("Style", inner, err)
  nested_asset_xref_report(outer, inner, doc)
end

#nested_asset_validate(doc) ⇒ Object



63
64
65
66
# File 'lib/metanorma/standoc/validate.rb', line 63

def nested_asset_validate(doc)
  nested_asset_validate_basic(doc)
  nested_note_validate(doc)
end

#nested_asset_validate_basic(doc) ⇒ Object



68
69
70
71
72
73
74
75
# File 'lib/metanorma/standoc/validate.rb', line 68

def nested_asset_validate_basic(doc)
  a = "//example | //figure | //termnote | //termexample | //table"
  doc.xpath("#{a} | //note").each do |m|
    m.xpath(a.gsub(%r{//}, ".//")).each do |n|
      nested_asset_report(m, n, doc)
    end
  end
end

#nested_asset_xref_report(outer, inner, _doc) ⇒ Object



93
94
95
96
97
98
# File 'lib/metanorma/standoc/validate.rb', line 93

def nested_asset_xref_report(outer, inner, _doc)
  i = @doc_xrefs[inner["id"]] or return
  err2 = "There is a crossreference to an instance of #{inner.name} " \
         "nested within #{outer.name}: #{i.to_xml}"
  @log.add("Style", i, err2)
end

#nested_note_validate(doc) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/metanorma/standoc/validate.rb', line 77

def nested_note_validate(doc)
  doc.xpath("//termnote | //note").each do |m|
    m.xpath(".//note").each do |n|
      nested_asset_report(m, n, doc)
    end
  end
end

#norm_ref_validate(doc) ⇒ Object



59
60
61
62
63
64
65
66
# File 'lib/metanorma/standoc/validate_section.rb', line 59

def norm_ref_validate(doc)
  doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
    docid = b.at("./docidentifier[@type = 'metanorma']") or next
    /^\[\d+\]$/.match?(docid.text) or next
    @log.add("Bibliography", b,
             "Numeric reference in normative references", severity: 1)
  end
end

#png_validate(doc) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/metanorma/standoc/validate.rb', line 166

def png_validate(doc)
  doc.xpath("//image[@mimetype = 'image/png']").each do |i|
    Vectory::Utils::url?(i["src"]) and next
    decoded = if Vectory::Utils::datauri?(i["src"])
                Vectory::Utils::decode_datauri(i["src"])[:data]
              else
                path = expand_path(i["src"]) or next
                File.binread(path)
              end
    png_validate1(i, decoded)
  end
end

#png_validate1(img, buffer) ⇒ Object



179
180
181
182
183
184
# File 'lib/metanorma/standoc/validate.rb', line 179

def png_validate1(img, buffer)
  PngCheck.check_buffer(buffer)
rescue PngCheck::CorruptPngError => e
  @log.add("Images", img.parent,
           "Corrupt PNG image detected: #{e.message}")
end

#preferred_validate(doc) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/metanorma/standoc/validate_term.rb', line 64

def preferred_validate(doc)
  out = []
  ret = doc.xpath("//term").each_with_object({}) do |t, m|
    prefix = t.at("./domain")&.text
    t.xpath("./preferred//name").each do |n|
      ret = n.text
      prefix and ret = "<#{prefix}> #{ret}"
      (m[ret] and out << ret) or m[ret] = t
    end
  end
  preferred_validate_report(out, ret)
end

#preferred_validate_report(terms, locations) ⇒ Object



77
78
79
80
81
82
# File 'lib/metanorma/standoc/validate_term.rb', line 77

def preferred_validate_report(terms, locations)
  terms.each do |e|
    err = "Term #{e} occurs twice as preferred designation"
    @log.add("Terms", locations[e], err, severity: 1)
  end
end

#repeat_id_validate(doc) ⇒ Object



213
214
215
216
217
218
# File 'lib/metanorma/standoc/validate.rb', line 213

def repeat_id_validate(doc)
  @doc_ids = {}
  doc.xpath("//*[@id]").each do |x|
    repeat_id_validate1(x)
  end
end

#repeat_id_validate1(elem) ⇒ Object



204
205
206
207
208
209
210
211
# File 'lib/metanorma/standoc/validate.rb', line 204

def repeat_id_validate1(elem)
  if @doc_ids[elem["id"]]
    @log.add("Anchors", elem,
             "Anchor #{elem['id']} has already been " \
             "used at line #{@doc_ids[elem['id']]}", severity: 0)
  end
  @doc_ids[elem["id"]] = elem.line
end

#schema_validate(doc, schema) ⇒ Object



100
101
102
103
104
105
106
107
108
# File 'lib/metanorma/standoc/validate.rb', line 100

def schema_validate(doc, schema)
  Tempfile.open(["tmp", ".xml"], encoding: "UTF-8") do |f|
    schema_validate1(f, doc, schema)
  rescue Jing::Error => e
    clean_abort("Jing failed with error: #{e}", doc)
  ensure
    f.close!
  end
end

#schema_validate1(file, doc, schema) ⇒ Object



110
111
112
113
114
115
116
117
118
119
# File 'lib/metanorma/standoc/validate.rb', line 110

def schema_validate1(file, doc, schema)
  file.write(to_xml(doc))
  file.close
  errors = Jing.new(schema, encoding: "UTF-8").validate(file.path)
  warn "Syntax Valid!" if errors.none?
  errors.each do |e|
    @log.add("Metanorma XML Syntax",
             "XML Line #{'%06d' % e[:line]}:#{e[:column]}", e[:message])
  end
end

#section_validate(doc) ⇒ Object



6
7
8
9
10
# File 'lib/metanorma/standoc/validate_section.rb', line 6

def section_validate(doc)
  sourcecode_style(doc.root)
  hanging_para_style(doc.root)
  asset_style(doc.root)
end

#sourcecode_style(root) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/metanorma/standoc/validate_section.rb', line 12

def sourcecode_style(root)
  root.xpath("//sourcecode").each do |x|
    callouts = x.elements.select { |e| e.name == "callout" }
    annotations = x.elements.select { |e| e.name == "annotation" }
    callouts_error(x, callouts, annotations)
  end
end

#style_warning(node, msg, text = nil) ⇒ Object



28
29
30
31
32
# File 'lib/metanorma/standoc/validate_section.rb', line 28

def style_warning(node, msg, text = nil)
  w = msg
  w += ": #{text}" if text
  @log.add("Style", node, w)
end

#table_tracker_update(cells2d, row, curr, rowspan, colspan) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/metanorma/standoc/validate_table.rb', line 65

def table_tracker_update(cells2d, row, curr, rowspan, colspan)
  cells2d[row] ||= {}
  while cells2d[row][curr]
    curr += 1
  end
  (row..(row + rowspan - 1)).each do |y2|
    cells2d[y2] ||= {}
    (curr..(curr + colspan - 1)).each { |x2| cells2d[y2][x2] = 1 }
  end
  curr
end

#table_validate(doc) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
# File 'lib/metanorma/standoc/validate_table.rb', line 4

def table_validate(doc)
  empty_table_validate(doc)
  doc.xpath("//table[colgroup]").each do |t|
    maxrowcols_validate(t, t.xpath("./colgroup/col").size)
  end
  doc.xpath("//table[.//*[@colspan] | .//*[@rowspan]]").each do |t|
    maxrowcols_validate(t, max_td_count(t), mode: "row_cols")
  end
  doc.xpath("//table[.//*[@rowspan]]").each do |t|
    maxrowcols_validate(t, max_td_count(t), mode: "thead_row")
  end
end

#validate(doc) ⇒ Object



198
199
200
201
202
# File 'lib/metanorma/standoc/validate.rb', line 198

def validate(doc)
  content_validate(doc)
  schema_validate(formattedstr_strip(doc.dup),
                  File.join(File.dirname(__FILE__), "isodoc-compile.rng"))
end

#xref_validate(doc) ⇒ Object

manually check for xref/@target, xref/@to integrity



221
222
223
224
225
226
227
228
229
# File 'lib/metanorma/standoc/validate.rb', line 221

def xref_validate(doc)
  @doc_xrefs = doc.xpath("//xref/@target | //xref/@to")
    .each_with_object({}) do |x, m|
    m[x.text] = x
    @doc_ids[x.text] and next
    @log.add("Anchors", x.parent,
             "Crossreference target #{x} is undefined", severity: 1)
  end
end