Module: DS::Extractor::TeiXml::ClassMethods

Included in:
DS::Extractor::TeiXml
Defined in:
lib/ds/extractor/tei_xml_extractor.rb

Constant Summary collapse

SIMPLE_NOTE_XPATH =

Notes

'/TEI/teiHeader/fileDesc/notesStmt/note[not(@type)]/text()'
BINDING_XPATH =
'/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/bindingDesc/binding/p/text()'
LAYOUT_XPATH =
'/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/objectDesc/layoutDesc/layout/text()'
SCRIPT_XPATH =
'/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/scriptDesc/scriptNote/text()'
DECO_XPATH =
'/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/decoDesc/decoNote[not(@n)]/text()'
RESOURCE_XPATH =
'/TEI/teiHeader/fileDesc/notesStmt/note[@type = "relatedResource"]/text()'
PROVENANCE_XPATH =
'/TEI/teiHeader/fileDesc/sourceDesc/msDesc/history/provenance/text()'
WHITESPACE_RE =
%r{\s+}
MEDIAL_PIPE_RE =

match pipes

%r{\s*\|\s*}

Instance Method Summary collapse

Instance Method Details

#build_notes(xml, xpath, prefix: nil) ⇒ Array<String>

Clean the note text and optionally a prefix. The prefix is prepended as:

"#{prefix}: Note text"

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI xml

  • xpath (String)

    the xpath for the note(s)

  • prefix (String) (defaults to: nil)

    value to prepend to the note; default: nil

Returns:

  • (Array<String>)


599
600
601
602
603
604
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 599

def build_notes xml, xpath, prefix: nil
  pref = prefix.blank? ? '' : "#{prefix}: "
  extract_normalized_strings(xml, xpath).map { |value|
    "#{pref}#{value}"
  }
end

#extract_acknowledgments(record) ⇒ Array<String>

Extracts acknowledgments from the TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (Array<String>)

    an array of acknowledgments extracted from the record



663
664
665
666
667
668
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 663

def extract_acknowledgments record
  names = extract_resps(record, *ACKNOWLEDGMENT_RESPS).map { |name|
    "#{name.role.capitalize}: #{name.as_recorded}"
  }
  names + extract_funder(record)
end

#extract_all_subjects(xml) ⇒ Object



384
385
386
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 384

def extract_all_subjects xml
  extract_subjects xml
end

#extract_all_subjects_as_recorded(xml) ⇒ Array<String>

Extracts all subjects from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array<String>)

    the extracted subjects



380
381
382
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 380

def extract_all_subjects_as_recorded xml
  extract_subjects_as_recorded xml
end

#extract_artists(xml) ⇒ Array<String>

Extracts artists from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted artists



186
187
188
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 186

def extract_artists xml
  extract_resps(xml, RESP_ARTIST)
end

#extract_artists_as_recorded(xml) ⇒ Array<String>

Extracts artists as recorded from the given record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted artists as recorded



170
171
172
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 170

def extract_artists_as_recorded xml
  extract_artists(xml).map(&:as_recorded)
end

#extract_artists_as_recorded_agr(xml) ⇒ Array<String>

Extracts artists as recorded with vernacular form from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted artists as recorded with vernacular form



178
179
180
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 178

def extract_artists_as_recorded_agr xml
  extract_artists(xml).map(&:vernacular)
end

#extract_associated_agents(xml) ⇒ Array

Extracts associated agents from the given XML record.

NB: Associated agents are not extracted from TEI XML. This method returns an empty array.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array)

    an empty array



245
246
247
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 245

def extract_associated_agents xml
  []
end

#extract_authors(xml) ⇒ Array<DS::Extractor::Name>

Extracts authors from the given XML record.

Parameters:

  • xml (Nokogiri::XML:Node)

    the XML record to extract authors from

Returns:



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 39

def extract_authors xml
  names = []
  xml.xpath('//msContents/msItem/author').map do |node|
    next if node.text =~ /Free Library of Philadelphia/

    name_node   = node.at_xpath('(name|persName)[not(@type = "vernacular")]')
    prenormal   = name_node ? name_node.text : node.text
    as_recorded = DS::Util.normalize_string prenormal

    ref        = node['ref']
    ref        = name_node['ref'] if name_node
    role       = 'author'
    vern_name  = node.at_xpath('(persName|name)[@type = "vernacular"]')
    vernacular = DS::Util.normalize_string(vern_name.text) if vern_name

    params = {
      as_recorded: as_recorded,
      ref:         ref,
      role:        role,
      vernacular:  vernacular
    }
    names << DS::Extractor::Name.new(**params)
  end
  names
end

#extract_authors_as_recorded(xml) ⇒ Array<String>

Extract authors as recorded from the given XML record.

Parameters:

  • xml (Nokogiri::XML:Node)

    a TEI XML record

Returns:

  • (Array<String>)

    list of authors as recorded



69
70
71
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 69

def extract_authors_as_recorded xml
  extract_authors(xml).map(&:as_recorded)
end

#extract_authors_as_recorded_agr(xml) ⇒ Array<String>

Extracts authors as recorded with vernacular form from the given XML record.

Parameters:

  • xml (Nokogiri::XML:Node)

    a TEI XML record

Returns:

  • (Array<String>)

    the extracted authors as recorded with vernacular form



77
78
79
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 77

def extract_authors_as_recorded_agr xml
  extract_authors(xml).map(&:vernacular)
end

#extract_cataloging_convention(record) ⇒ Object

SOURCE METADATA



27
28
29
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 27

def extract_cataloging_convention record
  'tei-xml'
end

#extract_date_range(record, range_sep:) ⇒ Array<String>

Extracts and formats date ranges as recorded in the given TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

  • range_sep (String)

    the separator for the date range

Returns:

  • (Array<String>)

    an array of formatted date ranges



459
460
461
462
463
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 459

def extract_date_range record, range_sep:
  record.xpath('//origDate').map { |orig|
    orig.xpath('@notBefore|@notAfter').map { |d| d.text.to_i }.sort.join(range_sep)
  }
end

#extract_former_owners(xml) ⇒ Array<String>

Extracts former owners from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted former owners



234
235
236
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 234

def extract_former_owners xml
  extract_resps(xml, RESP_FORMER_OWNER)
end

#extract_former_owners_as_recorded(xml) ⇒ Array<String>

Extracts former owners as recorded from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted former owners as recorded



218
219
220
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 218

def extract_former_owners_as_recorded xml
  extract_former_owners(xml).map &:as_recorded
end

#extract_former_owners_as_recorded_agr(xml) ⇒ Array<String>

Extracts former owners as recorded with vernacular form from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted former owners as recorded with vernacular form



226
227
228
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 226

def extract_former_owners_as_recorded_agr xml
  extract_former_owners(xml).map &:vernacular
end

#extract_funder(record) ⇒ Array<String>

Extracts the funder information from the TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (Array<String>)

    an array of funders extracted from the record



654
655
656
657
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 654

def extract_funder record
  xpath = '/TEI/teiHeader/fileDesc/titleStmt/funder'
  extract_normalized_strings(record, xpath).map { |name| "Funder: #{name}" }
end

#extract_genres(xml) ⇒ Array<DS::Extractor::Genre>

Extracts genres from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:



357
358
359
360
361
362
363
364
365
366
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 357

def extract_genres xml
  xpath = '/TEI/teiHeader/profileDesc/textClass/keywords[@n="form/genre"]/term'
  xml.xpath(xpath).map { |term|

    as_recorded          = DS::Util.normalize_string term.text
    vocab                = 'openn-form/genre'
    source_authority_uri = term['target']
    DS::Extractor::Genre.new as_recorded: as_recorded, vocab: vocab, source_authority_uri: source_authority_uri
  }
end

#extract_genres_as_recorded(xml) ⇒ Array<String>

Extracts genres from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array<String>)

    the extracted genres



349
350
351
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 349

def extract_genres_as_recorded xml
  extract_genres(xml).map &:as_recorded
end

#extract_holding_institution(record) ⇒ String

Extracts the holding institution from the given record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (String)

    the extracted holding institution



614
615
616
617
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 614

def extract_holding_institution record
  xpath = '(//msIdentifier/institution|//msIdentifier/repository)[1]'
  extract_normalized_strings(record, xpath).first
end

#extract_holding_institution_id_nummber(record) ⇒ String

Extracts the holding institution id number from the given record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (String)

    the extracted holding institution id number



623
624
625
626
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 623

def extract_holding_institution_id_nummber record
  xpath = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/msIdentifier/altIdentifier[@type="bibid"]/idno'
  extract_normalized_strings(record, xpath).first
end

#extract_language_codes(xml, separator: '|') ⇒ String

Extract language the ISO codes from textLang attributes @mainLang and @otherLangs and return as a pipe separated list.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI xml

Returns:

  • (String)


287
288
289
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 287

def extract_language_codes xml, separator: '|'
  extract_languages(xml).map &:codes
end

#extract_languages(record) ⇒ Array<DS::Extractor::Language>

Extracts the languages from the given TEI XML record using the specified xpath. Each language is mapped to a Language object containing the language as recorded and its ISO codes.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 296

def extract_languages record
  xpath = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/msContents/textLang'
  record.xpath(xpath).map { |text_lang|
    codes = Set.new
    codes << text_lang['mainLang']
    codes += text_lang['otherLang'].to_s.split
    if text_lang.text.present?
      as_recorded = text_lang.text
    else
      as_recorded = codes.join '|'
    end

    DS::Extractor::Language.new as_recorded: as_recorded, codes: codes
  }
end

#extract_languages_as_recorded(xml, separator: '|') ⇒ Array<String>

Extracts the languages as recorded from the given XML with an optional separator.

Parameters:

  • xml (Nokogiri::XML::Node)

    the XML node containing language information

  • separator (String) (defaults to: '|')

    the separator to use when multiple languages are extracted

Returns:

  • (Array<String>)

    the extracted languages as recorded



277
278
279
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 277

def extract_languages_as_recorded xml, separator: '|'
  extract_languages(xml).map &:as_recorded
end

Extracts the link to the record from the given record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (String)

    the extracted link to the record



641
642
643
644
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 641

def extract_link_to_record record
  xpath = '//altIdentifier[@type="resource"][1]/idno'
  extract_normalized_strings(record, xpath).first
end

#extract_material_as_recorded(record) ⇒ String

Extracts the material as recorded from the given TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (String)

    the extracted material as recorded



257
258
259
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 257

def extract_material_as_recorded record
  extract_materials(record).map(&:as_recorded).first
end

#extract_materials(record) ⇒ Array<DS::Extractor::Material>

Extracts materials from the given TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:



265
266
267
268
269
270
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 265

def extract_materials record
  xpath = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/objectDesc/supportDesc/support/p'
  extract_normalized_strings(record, xpath).map { |material|
    DS::Extractor::Material.new as_recorded: material
  }
end

#extract_normalized_strings(record, xpath) ⇒ Array<String>

Extracts normalized strings from the given record based on the provided xpath.

Parameters:

  • record (Nokogiri::XML::Node)

    the record to extract normalized strings from

  • xpath (String)

    the xpath to specify the location of the strings in the record

Returns:

  • (Array<String>)

    an array of normalized strings extracted from the record



679
680
681
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 679

def extract_normalized_strings record, xpath
  record.xpath(xpath).map { |node| DS::Util.normalize_string node.text }
end

#extract_notes(xml) ⇒ Array<String>

Create an array of notes. Physical description notes, like Binding, and Layout are mapped as prefixed notes as with TEI:

Binding: The binding note.
Layout: The layout note.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI xml

Returns:

  • (Array<String>)


572
573
574
575
576
577
578
579
580
581
582
583
584
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 572

def extract_notes xml
  notes = []

  notes += build_notes xml, SIMPLE_NOTE_XPATH
  notes += build_notes xml, BINDING_XPATH, prefix: "Binding"
  notes += build_notes xml, LAYOUT_XPATH, prefix: "Layout"
  notes += build_notes xml, SCRIPT_XPATH, prefix: "Script"
  notes += build_notes xml, DECO_XPATH, prefix: "Decoration"
  notes += build_notes xml, RESOURCE_XPATH, prefix: "Related resource"
  notes += build_notes xml, PROVENANCE_XPATH, prefix: "Provenance"

  notes
end

#extract_physical_description(xml) ⇒ String

Physical description

Return the extent and support concatenated; e.g.,

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI xml

Returns:

  • (String)


541
542
543
544
545
546
547
548
549
550
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 541

def extract_physical_description xml
  xpath   = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/objectDesc/supportDesc/extent/text()'
  extent  = extract_normalized_strings(xml, xpath).first
  extent  = "Extent: #{extent}" unless extent.blank?
  xpath   = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/physDesc/objectDesc/supportDesc/support/p/text()'
  support = extract_normalized_strings(xml, xpath).first

  desc = [extent, support].reject(&:blank?).join('; ').capitalize
  [desc]
end

#extract_places(record) ⇒ Array<DS::Extractor::Place>

Extracts places from the given TEI XML record as recorded.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:



417
418
419
420
421
422
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 417

def extract_places record
  xpath = '//origPlace'
  extract_normalized_strings(record, xpath).map { |place|
    DS::Extractor::Place.new as_recorded: place
  }
end

#extract_production_date_as_recorded(xml, range_sep: '-') ⇒ Array<String>

Extracts the date of production from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

  • range_sep (String) (defaults to: '-')

    the separator for the date range

Returns:

  • (Array<String>)

    the extracted dates of production as recorded



450
451
452
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 450

def extract_production_date_as_recorded xml, range_sep: '-'
  extract_date_range(xml, range_sep: range_sep)
end

#extract_production_places_as_recorded(record) ⇒ Array<String>

Extracts the places of production from the given TEI XML record as recorded.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array<String>)

    the extracted places of production as recorded



409
410
411
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 409

def extract_production_places_as_recorded record
  extract_places(record).map &:as_recorded
end

#extract_recon_genres(record) ⇒ Array<Array>

Extracts genre terms from the given TEI XML record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array<Array>)

    an array of arrays containing value, vocabulary, and number for each term



320
321
322
323
324
325
326
327
328
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 320

def extract_recon_genres record
  xpath = '/TEI/teiHeader/profileDesc/textClass/keywords[@n="form/genre"]/term'
  record.xpath(xpath).map { |term|
    value  = DS::Util.normalize_string term.text
    vocab  = 'openn-form/genre'
    number = term['target']
    [value, vocab, number]
  }
end

#extract_recon_names(xml) ⇒ Array<Name>

All names, authors, and names with resps: former owner, scribe, artist with returned as two-dimensional array with each row having these values:

* name as recorded
* role (author, former owner, etc.)
* name in vernacular script
* ref (authority URL)

All missing values are returned as nil:

[
  ["Horace", "author", nil, "https://viaf.org/viaf/100227522/"],
  ["Hodossy, Imre", "former owner", nil, nil],
  ["Jān Sipār Khān ibn Rustamdilkhān, -1701?", "former owner", "جان سپار خان بن رستمدلخان،", nil]
]

Parameters:

  • xml (Nokogiri::XML::NodeSet)

    the parsed TEI XML

Returns:



157
158
159
160
161
162
163
164
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 157

def extract_recon_names xml
  data = []

  data += extract_authors(xml).map(&:to_a)
  data += extract_resps(xml, *MS_CREATOR_RESPS).map(&:to_a)

  data
end

#extract_recon_places(xml) ⇒ Array<Array>

Extract the places of production for reconciliation CSV output.

Returns a two-dimensional array, each row is a place; and each row has one column: place name; for example:

[["Austria"],
 ["Germany"],
 ["France (?)"]]

Parameters:

  • xml (Nokogiri::XML:Node)

    a <TEI> node

Returns:

  • (Array<Array>)

    an array of arrays of values



436
437
438
439
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 436

def extract_recon_places xml
  xpath = '//origPlace/text()'
  extract_normalized_strings(xml, xpath).map { |place| [place] }
end

#extract_recon_subjects(xml) ⇒ Array

Extracts subject terms from the given TEI XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array)

    an array containing value, subfield codes, vocabulary, and number for each term



334
335
336
337
338
339
340
341
342
343
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 334

def extract_recon_subjects xml
  xpath = '/TEI/teiHeader/profileDesc/textClass/keywords[@n="subjects" or @n="keywords"]/term'
  xml.xpath(xpath).map do |term|
    value          = DS::Util.normalize_string term.text
    subfield_codes = nil
    vocab          = "openn-#{term.parent['n']}"
    number         = term['target']
    [value, subfield_codes, vocab, number]
  end
end

#extract_recon_titles(xml) ⇒ Array<Array>

Extracts the titles from the given TEI record to an array of titles.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI record

Returns:

  • (Array<Array>)

    list of titles converted to arrays



529
530
531
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 529

def extract_recon_titles xml
  extract_titles(xml).map { |t| t.to_a }
end

#extract_resps(xml, *resp_names) ⇒ Array<Name>

All respStmts for the given resp (e.g., ‘artist’) and return the values as Name instances

Parameters:

  • xml (Nokogiri::XML::NodeSet)

    the parsed TEI XML

Returns:



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 88

def extract_resps xml, *resp_names
  # There are a variety of respStmt patterns; for example:
  #
  #    <respStmt>
  #      <resp>former owner</resp>
  #      <persName type="authority">Jamālī, Yūsuf ibn Shaykh Muḥammad</persName>
  #      <persName type="vernacular">يوسف بن شيخ محمد الجمالي.</persName>
  #    </respStmt>
  #
  #    <respStmt>
  #      <resp>former owner</resp>
  #      <persName type="authority">Jamālī, Yūsuf ibn Shaykh Muḥammad</persName>
  #    </respStmt>
  #
  #    <respStmt>
  #      <resp>former owner</resp>
  #      <persName>Jamālī, Yūsuf ibn Shaykh Muḥammad</persName>
  #    </respStmt>
  #
  #    <respStmt>
  #      <resp>former owner</resp>
  #      <name>Jamālī, Yūsuf ibn Shaykh Muḥammad</name>
  #    </respStmt>
  #
  #
  resp_query = resp_names.map { |t|
    %Q{contains(translate(./resp/text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '#{t.to_s.strip.downcase}')}
  }.join ' or '

  xpath = "//respStmt[#{resp_query}]"
  xml.xpath(xpath).map { |node|

    auth_name   = node.at_xpath('(persName|name)[not(@type = "vernacular")]')
    as_recorded = DS::Util.normalize_string(auth_name.text) if auth_name
    ref         = auth_name['ref'] if auth_name
    vern_name   = node.at_xpath('(persName|name)[@type = "vernacular"]')
    vernacular  = DS::Util.normalize_string(vern_name.text) if vern_name
    resp        = node.at_xpath('resp/text()').to_s

    params = {
      as_recorded: as_recorded,
      ref:         ref,
      role:        resp.downcase.strip,
      vernacular:  vernacular
    }
    DS::Extractor::Name.new **params
  }
end

#extract_scribes(xml) ⇒ Array<String>

Extracts scribes from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted scribes



210
211
212
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 210

def extract_scribes xml
  extract_resps(xml, RESP_SCRIBE)
end

#extract_scribes_as_recorded(xml) ⇒ Array<String>

Extracts scribes as recorded from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted scribes as recorded



194
195
196
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 194

def extract_scribes_as_recorded xml
  extract_scribes(xml).map &:as_recorded
end

#extract_scribes_as_recorded_agr(xml) ⇒ Array<String>

Extracts scribes as recorded with vernacular form from the given XML record.

Parameters:

  • xml (Nokogiri::XML::Node)

    the parsed TEI XML

Returns:

  • (Array<String>)

    the extracted scribes as recorded with vernacular form



202
203
204
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 202

def extract_scribes_as_recorded_agr xml
  extract_scribes(xml).map &:vernacular
end

#extract_shelfmark(record) ⇒ String

Extracts the shelfmark from the given record.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI xml representing a TEI XML record

Returns:

  • (String)

    the extracted shelfmark



632
633
634
635
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 632

def extract_shelfmark record
  xpath = '/TEI/teiHeader/fileDesc/sourceDesc/msDesc/msIdentifier/idno[@type="call-number"]'
  extract_normalized_strings(record, xpath).first
end

#extract_subjects(xml) ⇒ Array<DS::Extractor::Subject>

Extracts subjects from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:



392
393
394
395
396
397
398
399
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 392

def extract_subjects xml
  xpath = '/TEI/teiHeader/profileDesc/textClass/keywords[@n="subjects" or @n="keywords"]/term'
  xml.xpath(xpath).map { |subject|
    subject_type = "openn-#{subject.parent['n']}"
    as_recorded = DS::Util.normalize_string subject.text
    DS::Extractor::Subject.new as_recorded: as_recorded, vocab: subject_type
  }
end

#extract_subjects_as_recorded(xml) ⇒ Array<String>

Extracts subjects from the given TEI XML record as recorded.

Parameters:

  • xml (Nokogiri::XML::Node)

    the TEI XML record

Returns:

  • (Array<String>)

    the extracted subjects



372
373
374
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 372

def extract_subjects_as_recorded xml
  extract_subjects(xml).map &:as_recorded
end

#extract_titles(record) ⇒ Array<Title>

Return an array of Title instances equal in number to the number of non-vernacular titles.

This is a bit of a hack. Titles are list serially and Roman- character and vernacular script titles are not paired. Thus:

<msItem>
  <title>Qaṭr al-nadā wa-ball al-ṣadā.</title>
  <title type="vernacular">قطر الندا وبل الصدا</title>
  <title>Second title</title>
  <author>
     <!-- ... -->
</msItem>

We assume that, when there is a vernacular title, it follows its Roman equivalent. This script runs through all <title> elements and creates a Title struct for each title where

@type != 'vernacular'

When @type is ‘vernacular’ is sets the as_recorded_agr of the previous Title instance to that value.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI record

Returns:



495
496
497
498
499
500
501
502
503
504
505
506
507
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 495

def extract_titles record
  titles = []
  record.xpath('//msItem[1]/title').each do |title|
    if title[:type] != 'vernacular'
      titles << DS::Extractor::Title.new(
        as_recorded: DS::Util.normalize_string(title.text)
      )
    else
      titles.last.vernacular = DS::Util.normalize_string title.text
    end
  end
  titles
end

#extract_titles_as_recorded(record) ⇒ Array<String>

Extracts the titles from the given TEI record as recorded.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI record

Returns:

  • (Array<String>)

    list of titles as recorded



513
514
515
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 513

def extract_titles_as_recorded record
  extract_titles(record).map { |t| t.as_recorded }
end

#extract_titles_as_recorded_agr(record) ⇒ Array<String>

Extracts the titles from the given TEI record as recorded in the vernacular language.

Parameters:

  • record (Nokogiri::XML::Node)

    the TEI record

Returns:

  • (Array<String>)

    list of titles in the vernacular language as recorded



521
522
523
# File 'lib/ds/extractor/tei_xml_extractor.rb', line 521

def extract_titles_as_recorded_agr record
  extract_titles(record).map { |t| t.vernacular }
end