Class: Metanorma::Standoc::Cleanup::SpansToBibitem

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/metanorma/standoc/spans_to_bibitem.rb,
lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb

Constant Summary

Constants included from Utils

Utils::SUBCLAUSE_XPATH

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

adoc2xml, #asciimath_key, #attr_code, #csv_split, #dl_to_attrs, #dl_to_elems, #document_ns_attributes, #grkletters, #isodoc, #kv_parse, #link_unwrap, #noko, #quoted_csv_split, #refid?, #term_expr, #to_xml, #wrap_in_para, #xml_encode

Constructor Details

#initialize(bib) ⇒ SpansToBibitem

Returns a new instance of SpansToBibitem.



11
12
13
14
15
16
17
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 11

def initialize(bib)
  @bib = bib
  @err = []
  @spans = spans_preprocess(extract_spans(bib))
  ids = spans_preprocess(extract_docid(bib))
  @spans[:docid] = override_docids(ids[:docid], @spans[:docid])
end

Instance Attribute Details

#errObject (readonly)

Returns the value of attribute err.



9
10
11
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 9

def err
  @err
end

#outObject (readonly)

Returns the value of attribute out.



9
10
11
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 9

def out
  @out
end

Instance Method Details

#convertObject



28
29
30
31
32
33
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 28

def convert
  ret = spans_to_bibitem(@spans)
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
  @spans[:type] and @out["type"] = @spans[:type]
  self
end

#empty_span_hashObject



27
28
29
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 27

def empty_span_hash
  { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
end

#extract_docid(bib) ⇒ Object



20
21
22
23
24
25
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 20

def extract_docid(bib)
  bib.xpath("./docidentifier").each_with_object([]) do |d, m|
    m << { key: "docid", type: d["type"], val: d.text }
    d.remove unless bib.at("./title")
  end
end

#extract_spans(bib) ⇒ Object



5
6
7
8
9
10
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 5

def extract_spans(bib)
  bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
    s.at("./ancestor::span") and next
    extract_spans1(s, m)
  end
end

#extract_spans1(span, acc) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 12

def extract_spans1(span, acc)
  keys = span["class"].split(".", 2)
  acc << { key: keys[0], type: keys[1],
           val: span.children.to_xml }
  (span["class"] == "type" and span.remove) or
    span.replace(span.children)
end

#host_rearrange(ret) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 86

def host_rearrange(ret)
  ret[:in][:title] or return ret
  ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
    old
  end
  %i(series).each do |k|
    ret[:in][k] = ret[k]
    ret.delete(k)
  end
  /^in/.match?(ret[:type]) and ret[:in][:type] =
                                 ret[:type].sub(/^in/, "")
  ret
end

#multiple_givennames?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


118
119
120
121
122
123
124
125
126
127
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 118

def multiple_givennames?(span, contrib)
  (%w(formatted-initials givenname).include?(span[:key]) &&
    (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
    return false
  if contrib[-1][:"formatted-initials"]
    contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
    contrib[-1].delete(:"formatted-initials")
  end
  true
end

#override_docids(old, new) ⇒ Object



19
20
21
22
23
24
25
26
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 19

def override_docids(old, new)
  ret = new
  keys = new.map { |a| a[:type] }
  old.each do |e|
    keys.include?(e[:type]) or ret << e
  end
  ret
end

#span_preprocess1(span, ret) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 37

def span_preprocess1(span, ret)
  case span[:key]
  when "uri", "docid"
    val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
    ret[span[:key].to_sym] << { type: span[:type], val: val }
  when "date"
    ret[span[:key].to_sym] << { type: span[:type] || "published",
                                val: span[:val] }
  when "pages", "volume", "issue"
    ret[:extent][span[:key].to_sym] ||= []
    ret[:extent][span[:key].to_sym] << span[:val]
  when "pubplace", "title", "type", "series", "edition", "version"
    ret[span[:key].to_sym] = span[:val]
  when "note"
    ret[span[:key].to_sym] = { type: span[:type], val: span[:val] }
  when "in_title"
    ret[:in][:title] = span[:val]
  when "publisher"
    ret[:contrib] << { role: "publisher", entity: "organization",
                       name: span[:val] }
  when "surname", "initials", "givenname", "formatted-initials"
    ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
  when "fullname"
    ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
  when "organization"
    ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
  when "in_surname", "in_initials", "in_givenname",
    "in_formatted-initials"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_contrib(span, ret[:in][:contrib])
  when "in_fullname"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_fullname(span, ret[:in][:contrib])
  when "in_organization"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_org(span, ret[:in][:contrib])
  else
    msg = "unrecognised key '#{span[:key]}' in " \
          "`span:#{span[:key]}[#{span[:val]}]`"
    @err << { msg: msg }
  end
end

#span_to_contrib(span, title) ⇒ Object



122
123
124
125
126
127
128
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 122

def span_to_contrib(span, title)
  e = if span[:entity] == "organization"
        "<organization><name>#{span[:name]}</name></organization>"
      else span_to_person(span, title)
      end
  "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
end

#span_to_date(span) ⇒ Object



104
105
106
107
108
109
110
111
112
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 104

def span_to_date(span)
  val = if /[-–](?=\d{4})/.match?(span[:val])
          from, to = span[:val].split(/[-–](?=\d{4})/, 2)
          "<from>#{from}</from><to>#{to}</to>"
        else "<on>#{span[:val]}</on>"
        end
  type = span[:type] ? " type='#{span[:type]}'" : ""
  "<date#{type}>#{val}</date>"
end

#span_to_docid(span, key) ⇒ Object



97
98
99
100
101
102
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 97

def span_to_docid(span, key)
  if span[:type]
    "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
  else "<#{key}>#{span[:val]}</#{key}>"
  end
end

#span_to_extent(span, key) ⇒ Object



88
89
90
91
92
93
94
95
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 88

def span_to_extent(span, key)
  values = span.split(/[-–]/)
  ret = "<locality type='#{key}'>" \
        "<referenceFrom>#{values[0]}</referenceFrom>"
  values[1] and
    ret += "<referenceTo>#{values[1]}</referenceTo>"
  "#{ret}</locality>"
end

#span_to_person(span, title) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 137

def span_to_person(span, title)
  validate_span_to_person(span, title)
  pre = (span[:"formatted-initials"] and
               "<formatted-initials>" \
               "#{span[:"formatted-initials"]}</formatted-initials>") ||
    Array(span[:givenname]).map do |x|
      "<forename>#{x}</forename>"
    end.join
  "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
    "</person>"
end

#spans_preprocess(spans) ⇒ Object



31
32
33
34
35
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 31

def spans_preprocess(spans)
  ret = empty_span_hash
  spans.each { |s| span_preprocess1(s, ret) }
  host_rearrange(ret)
end

#spans_preprocess_contrib(span, contrib) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 100

def spans_preprocess_contrib(span, contrib)
  span[:key] == "initials" and span[:key] = "formatted-initials"
  spans_preprocess_new_contrib?(span, contrib) and
    contrib << { role: span[:type] || "author", entity: "person" }
  if multiple_givennames?(span, contrib)
    contrib[-1][:givenname] = [contrib[-1][:givenname],
                               span[:val]].flatten
  else contrib[-1][span[:key].to_sym] = span[:val]
  end
  contrib
end

#spans_preprocess_fullname(span, contrib) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 129

def spans_preprocess_fullname(span, contrib)
  name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
  out = { role: span[:type] || "author", entity: "person",
          surname: name[-1] }
  if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
    out[:"formatted-initials"] = name[0..-2].join(" ")
  else out[:givenname] = name[0..-2]
  end
  contrib << out
  contrib
end

#spans_preprocess_new_contrib?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


112
113
114
115
116
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 112

def spans_preprocess_new_contrib?(span, contrib)
  contrib.empty? ||
    (span[:key] == "surname" && contrib[-1][:surname]) ||
    contrib[-1][:role] != (span[:type] || "author")
end

#spans_preprocess_org(span, contrib) ⇒ Object



141
142
143
144
145
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 141

def spans_preprocess_org(span, contrib)
  contrib << { role: span[:type] || "author", entity: "organization",
               name: span[:val] }
  contrib
end

#spans_to_bibitem(spans) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 35

def spans_to_bibitem(spans)
  ret = ""
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
  ret += spans_to_bibitem_docid(spans)
  ret += spans_to_contribs(spans)
  ret += spans_to_bibitem_edn(spans)
  ret += spans_to_series(spans)
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
  ret += spans_to_bibitem_host(spans)
  ret += spans_to_bibitem_extent(spans[:extent])
  ret
end

#spans_to_bibitem_docid(spans) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 61

def spans_to_bibitem_docid(spans)
  ret = ""
  spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
  spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
  spans[:date].each { |s| ret += span_to_date(s) }
  ret
end

#spans_to_bibitem_edn(spans) ⇒ Object



69
70
71
72
73
74
75
76
77
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 69

def spans_to_bibitem_edn(spans)
  ret = ""
  spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
  spans[:version] and ret += "<version>#{spans[:version]}</version>"
  spans[:note] and
    ret += "<note type='#{spans[:note][:type]}'>#{spans[:note][:val]}" \
           "</note>".sub(/<note type=''>/, "<note>")
  ret
end

#spans_to_bibitem_extent(spans) ⇒ Object



79
80
81
82
83
84
85
86
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 79

def spans_to_bibitem_extent(spans)
  ret = ""
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
    spans[k]&.each { |s| ret += span_to_extent(s, v) }
  end
  ret.empty? and return ""
  "<extent>#{ret}</extent>"
end

#spans_to_bibitem_host(spans) ⇒ Object



53
54
55
56
57
58
59
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 53

def spans_to_bibitem_host(spans)
  spans[:in].empty? and return ""
  ret =
    "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
  spans[:in].delete(:type)
  ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
end

#spans_to_contribs(spans) ⇒ Object



114
115
116
117
118
119
120
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 114

def spans_to_contribs(spans)
  ret = ""
  spans[:contrib].each do |s|
    ret += span_to_contrib(s, spans[:title])
  end
  ret
end

#spans_to_series(spans) ⇒ Object



48
49
50
51
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 48

def spans_to_series(spans)
  spans[:series] or return ""
  "<series><title>#{spans[:series]}</title></series>"
end

#validate_span_to_person(span, title) ⇒ Object



130
131
132
133
134
135
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 130

def validate_span_to_person(span, title)
  span[:surname] and return
  msg = "Missing surname: issue with bibliographic markup " \
        "in \"#{title}\": #{span}"
  @err << { msg:, fatal: true }
end