Class: Puree::XMLExtractor::Dataset

Inherits:
Resource show all
Defined in:
lib/puree/xml_extractor/dataset.rb

Overview

Dataset XML extractor.

Instance Method Summary collapse

Methods inherited from Resource

#created, #get_data?, #locale, #modified, #uuid, #xpath_query

Methods inherited from Base

#xpath_query_for_multi_value, #xpath_query_for_single_value

Constructor Details

#initialize(xml:) ⇒ Dataset

Returns a new instance of Dataset.



9
10
11
12
# File 'lib/puree/xml_extractor/dataset.rb', line 9

def initialize(xml:)
  super
  @resource_type = :dataset
end

Instance Method Details

#accessString?

Open access permission

Returns:

  • (String, nil)


16
17
18
# File 'lib/puree/xml_extractor/dataset.rb', line 16

def access
  xpath_query_for_single_value '/openAccessPermission/term/localizedString'
end

#associatedArray<Puree::Model::RelatedContentHeader>

Combines projects and publications



22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/puree/xml_extractor/dataset.rb', line 22

def associated
  xpath_result = xpath_query '/associatedContent/relatedContent'
  data_arr = []
  xpath_result.each { |i|
    related = Puree::Model::RelatedContentHeader.new
    related.type = i.xpath('typeClassification').text.strip
    related.title = i.xpath('title').text.strip
    related.uuid = i.attr('uuid').strip
    data_arr << related
  }
  data_arr.uniq { |d| d.uuid }
end

#availableTime?

Date made available

Returns:

  • (Time, nil)


37
38
39
# File 'lib/puree/xml_extractor/dataset.rb', line 37

def available
  Puree::Util::Date.hash_to_time temporal_date('dateMadeAvailable')
end

#descriptionString?

Returns:

  • (String, nil)


42
43
44
# File 'lib/puree/xml_extractor/dataset.rb', line 42

def description
  xpath_query_for_single_value '/descriptions/classificationDefinedField/value/localizedString'
end

#doiString?

Digital Object Identifier

Returns:

  • (String, nil)


48
49
50
# File 'lib/puree/xml_extractor/dataset.rb', line 48

def doi
  xpath_query_for_single_value '/doi'
end

#filesArray<Puree::Model::File>

Supporting files

Returns:



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/puree/xml_extractor/dataset.rb', line 54

def files
  xpath_result = xpath_query '/documents/document'
  docs = []
  xpath_result.each do |d|
    doc = Puree::Model::File.new
    doc.name = d.xpath('fileName').text.strip
    doc.mime = d.xpath('mimeType').text.strip
    doc.size = d.xpath('size').text.strip.to_i
    doc.url = d.xpath('url').text.strip
    # doc['createdDate'] = d.xpath('createdDate').text.strip
    # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
    # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
    document_license = d.xpath('documentLicense')
    if !document_license.empty?
      license = Puree::Model::CopyrightLicense.new
      license.name = document_license.xpath('term/localizedString').text.strip
      license.url = document_license.xpath('description/localizedString').text.strip
      doc.license = license if license.data?
    end
    docs << doc
  end
  docs.uniq { |d| d.url }
end

#keywordsArray<String>

Returns:

  • (Array<String>)


79
80
81
82
83
# File 'lib/puree/xml_extractor/dataset.rb', line 79

def keywords
  xpath_result =  xpath_query '/keywordGroups/keywordGroup/keyword/userDefinedKeyword/freeKeyword'
  data_arr = xpath_result.map { |i| i.text.strip }
  data_arr.uniq
end

Returns:



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/puree/xml_extractor/dataset.rb', line 86

def legal_conditions
  xpath_result = xpath_query '/legalConditions/legalCondition'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::LegalCondition.new
    model.name = i.xpath('typeClassification/term/localizedString').text.strip
    model.description = i.xpath('description').text.strip
    data << model
  }
  data.uniq { |d| d.name }
end

Returns:



99
100
101
102
103
104
105
106
107
108
109
# File 'lib/puree/xml_extractor/dataset.rb', line 99

def links
  xpath_result = xpath_query '/links/link'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::Link.new
    model.description = i.xpath('description').text.strip
    model.url = i.xpath('url').text.strip
    data << model
  }
  data.uniq { |d| d.url }
end

#organisationsArray<Puree::Model::OrganisationHeader>



112
113
114
115
# File 'lib/puree/xml_extractor/dataset.rb', line 112

def organisations
  xpath_result = xpath_query '/organisations/organisation'
  Puree::XMLExtractor::Shared.organisation_multi_header xpath_result
end

#ownerPuree::Model::OrganisationHeader?



118
119
120
121
# File 'lib/puree/xml_extractor/dataset.rb', line 118

def owner
  xpath_result = xpath_query '/managedBy'
  Puree::XMLExtractor::Shared.organisation_header xpath_result
end

#persons_externalArray<Puree::Model::EndeavourPerson>

Returns:



129
130
131
# File 'lib/puree/xml_extractor/dataset.rb', line 129

def persons_external
  persons 'external'
end

#persons_internalArray<Puree::Model::EndeavourPerson>

Returns:



124
125
126
# File 'lib/puree/xml_extractor/dataset.rb', line 124

def persons_internal
  persons 'internal'
end

#persons_otherArray<Puree::Model::EndeavourPerson>

Returns:



134
135
136
# File 'lib/puree/xml_extractor/dataset.rb', line 134

def persons_other
  persons 'other'
end

#productionPuree::Model::TemporalRange?

Date of data production

Returns:



140
141
142
# File 'lib/puree/xml_extractor/dataset.rb', line 140

def production
  temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
end

#projectsArray<Puree::Model::RelatedContentHeader>



145
146
147
# File 'lib/puree/xml_extractor/dataset.rb', line 145

def projects
  associated_type('Research').uniq
end

#publicationsArray<Puree::Model::RelatedContentHeader>



150
151
152
153
154
155
156
157
158
# File 'lib/puree/xml_extractor/dataset.rb', line 150

def publications
  data_arr = []
  associated.each do |i|
    if i.type != 'Research'
      data_arr << i
    end
  end
  data_arr
end

#publisherString?

Returns:

  • (String, nil)


161
162
163
# File 'lib/puree/xml_extractor/dataset.rb', line 161

def publisher
  xpath_query_for_single_value '/publisher/name'
end

#spatial_placesArray<String>

Returns:

  • (Array<String>)


166
167
168
169
170
171
172
173
174
# File 'lib/puree/xml_extractor/dataset.rb', line 166

def spatial_places
  # Data from free-form text box
  xpath_result = xpath_query '/geographicalCoverage/localizedString'
  data = []
  xpath_result.each do |i|
    data << i.text.strip
  end
  data.uniq
end

#spatial_pointPuree::Model::SpatialPoint?

Spatial coverage point

Returns:



178
179
180
181
182
183
184
185
186
187
188
# File 'lib/puree/xml_extractor/dataset.rb', line 178

def spatial_point
  xpath_result = xpath_query '/geoLocation/point'
  point = Puree::Model::SpatialPoint.new
  if !xpath_result[0].nil?
    arr = xpath_result.text.split(',')
    point.latitude = arr[0].strip.to_f
    point.longitude = arr[1].strip.to_f
    point
  end
  nil
end

#temporalPuree::Model::TemporalRange?

Temporal coverage

Returns:



197
198
199
# File 'lib/puree/xml_extractor/dataset.rb', line 197

def temporal
  temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
end

#titleString?

Returns:

  • (String, nil)


202
203
204
# File 'lib/puree/xml_extractor/dataset.rb', line 202

def title
  xpath_query_for_single_value '/title/localizedString'
end