30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
# File 'lib/bolognese/readers/schema_org_reader.rb', line 30
def read_schema_org(string: nil, **options)
if string.present?
errors = jsonlint(string)
return { "errors" => errors } if errors.present?
end
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate))
meta = string.present? ? Maremma.from_json(string) : {}
identifiers = ([options[:doi] || meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
r = normalize_id(r) if r.is_a?(String)
if r.is_a?(String) && r.start_with?("https://doi.org")
{ "identifierType" => "DOI", "identifier" => r }
elsif r.is_a?(String)
{ "identifierType" => "URL", "identifier" => r }
elsif r.is_a?(Hash)
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
end
end.compact.uniq
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
types = {
"resourceTypeGeneral" => resource_type_general,
"resourceType" => meta.fetch("additionalType", nil),
"schemaOrg" => schema_org,
"citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
}.compact
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
container = if meta.fetch(ct, nil).present?
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
{
"type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
"identifier" => url,
"identifierType" => url.present? ? "URL" : nil,
"volume" => meta.fetch("volumeNumber", nil),
"issue" => meta.fetch("issueNumber", nil),
"firstPage" => meta.fetch("pageStart", nil),
"lastPage" => meta.fetch("pageEnd", nil)
}.compact
else
{}
end
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
Array.wrap(schema_org_is_part_of(meta)) +
Array.wrap(schema_org_has_part(meta)) +
Array.wrap(schema_org_is_previous_version_of(meta)) +
Array.wrap(schema_org_is_new_version_of(meta)) +
Array.wrap(schema_org_references(meta)) +
Array.wrap(schema_org_is_referenced_by(meta)) +
Array.wrap(schema_org_is_supplement_to(meta)) +
Array.wrap(schema_org_is_supplemented_by(meta))
rights_list = {
"rightsUri" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
"rights" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
}
funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
if fr["@id"].present?
{
"funderName" => fr["name"],
"funderIdentifier" => fr["@id"],
"funderIdentifierType" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : "Other" }.compact
else
{
"funderName" => fr["name"] }.compact
end
end
dates = []
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
state = meta.present? || read_options.present? ? "findable" : "not_found"
geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
if gl.dig("geo", "box")
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
geo_location_box = {
"westBoundLongitude" => w,
"eastBoundLongitude" => e,
"southBoundLatitude" => s,
"northBoundLatitude" => n
}.compact.presence
else
geo_location_box = nil
end
geo_location_point = { "pointLongitude" => gl.dig("geo", "longitude"), "pointLatitude" => gl.dig("geo", "latitude") }.compact.presence
{
"geoLocationPlace" => gl.dig("geo", "address"),
"geoLocationPoint" => geo_location_point,
"geoLocationBox" => geo_location_box
}.compact
end
subjects = Array.wrap(meta.fetch("keywords", nil).to_s.split(", ")).map do |s|
{ "subject" => s }
end
{ "id" => id,
"types" => types,
"doi" => validate_doi(id),
"identifiers" => identifiers,
"url" => normalize_id(meta.fetch("url", nil)),
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
"sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
"formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)),
"titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : nil,
"creators" => creators,
"contributors" => contributors,
"publisher" => publisher,
"agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
"container" => container,
"related_identifiers" => related_identifiers,
"publication_year" => publication_year,
"dates" => dates,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil).to_s.presence,
"subjects" => subjects,
"state" => state,
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
"funding_references" => funding_references,
"geo_locations" => geo_locations
}.merge(read_options)
end
|