Class: RelatonBipm::BipmSiBrochureParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_bipm/bipm_si_brochure_parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_fetcher) ⇒ BipmSiBrochureParser

Create new parser

Parameters:



8
9
10
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 8

def initialize(data_fetcher)
  @data_fetcher = WeakRef.new data_fetcher
end

Class Method Details

.parse(data_fetcher) ⇒ Object

Parse documents from SI brochure dataset and write thems to YAML files

Parameters:



17
18
19
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 17

def self.parse(data_fetcher)
  new(data_fetcher).parse
end

Instance Method Details

#deep_merge(hash1, hash2) ⇒ Hash

Deep merge two hashes

Parameters:

  • hash1 (Hash)
  • hash2 (Hash)

Returns:

  • (Hash)

    Merged hash



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 86

def deep_merge(hash1, hash2) # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
  hash1.merge(hash2) do |_, oldval, newval|
    if oldval.is_a?(Hash) && newval.is_a?(Hash)
      deep_merge(oldval, newval)
    elsif oldval.is_a?(Array) && newval.is_a?(Array)
      (oldval + newval).uniq { |i| downcase_all i }
    else
      newval || oldval
    end
  end
end

#downcase_all(content) ⇒ Array, ...

Downcase all values in hash or array

Parameters:

  • content (Array, Hash, String)

    hash, array or string

Returns:

  • (Array, Hash, String)

    hash, array or string with downcased values



105
106
107
108
109
110
111
112
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 105

def downcase_all(content)
  case content
  when Hash then content.transform_values { |v| downcase_all v }
  when Array then content.map { |v| downcase_all v }
  when String then content.downcase
  else content
  end
end

#fix_si_brochure_id(hash) ⇒ void

This method returns an undefined value.

Update ID of SI brochure

Parameters:

  • hash (Hash)

    hash of bibitem



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 62

def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  # isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
  # num = isbn && isbn["id"] == "978-92-822-2272-0" ?  "SI Brochure" : "SI Brochure, Appendix 4"

  hash["docid"].each do |id|
    next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)

    id["primary"] = true
    id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
  end

  num = hash["docid"].detect { |id| id["primary"] && id["language"] == "en" }["id"]
  hash["docnumber"].sub!(/^Brochure$/i, num.sub(/^BIPM\s/, ""))
  hash["id"] = num.gsub(/[,\s]/, "")
end

#parseObject

Parse SI brochure and write them to YAML files



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 24

def parse # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  # puts "Parsing SI brochure..."
  # puts "Ls #{Dir['*']}"
  # puts "Ls #{Dir['bipm-si-brochure/*']}"
  # puts "Ls #{Dir['bipm-si-brochure/site/*']}"
  # puts "Ls #{Dir['bipm-si-brochure/site/documents/*']}"
  Dir["bipm-si-brochure/_site/documents/*.rxl"].each do |f|
    puts "Parsing #{f}"
    docstd = Nokogiri::XML File.read f
    doc = docstd.at "/bibdata"
    hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
    fix_si_brochure_id hash1
    basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
    outfile = "#{basename}.#{@data_fetcher.ext}"
    key = hash1["docnumber"] || basename
    @data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
    hash = if File.exist? outfile
             warn_duplicate = false
             hash2 = YAML.load_file outfile
             fix_si_brochure_id hash2
             deep_merge hash1, hash2
           else
             warn_duplicate = true
             hash1
           end
    item = RelatonBipm::BipmBibliographicItem.from_hash(**hash)
    @data_fetcher.write_file outfile, item, warn_duplicate: warn_duplicate
    puts "Saved to #{outfile}"
  end
end