Class: Puree::XMLExtractor::Dataset

Inherits:
Resource show all
Includes:
DescriptionMixin, KeywordMixin, OrganisationalUnitMixin, OwnerMixin, PersonMixin, PublisherMixin, ResearchOutputMixin, TitleMixin, WorkflowMixin
Defined in:
lib/puree/xml_extractor/dataset.rb

Overview

Dataset XML extractor.

Instance Method Summary collapse

Methods included from TitleMixin

#title

Methods included from WorkflowMixin

#workflow

Methods included from ResearchOutputMixin

#research_outputs

Methods included from PublisherMixin

#publisher

Methods included from OwnerMixin

#owner

Methods included from OrganisationalUnitMixin

#organisational_units

Methods included from DescriptionMixin

#description

Methods inherited from Resource

#created_at, #created_by, #id, #model, #modified_at, #modified_by, #previous_uuids, #uuid

Methods inherited from Base

#xpath_query_for_multi_value, #xpath_query_for_single_value

Constructor Details

#initialize(xml) ⇒ Dataset

Returns a new instance of Dataset.



18
19
20
21
# File 'lib/puree/xml_extractor/dataset.rb', line 18

def initialize(xml)
  super
  setup_model :dataset
end

Instance Method Details

#availableTime?

Date made available

Returns:

  • (Time, nil)


25
26
27
# File 'lib/puree/xml_extractor/dataset.rb', line 25

def available
  Puree::Util::Date.hash_to_time temporal_date('publicationDate')
end

#doiString?

Digital Object Identifier

Returns:

  • (String, nil)


31
32
33
# File 'lib/puree/xml_extractor/dataset.rb', line 31

def doi
  xpath_query_for_single_value '/doi'
end

#filesArray<Puree::Model::File>

Supporting files

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/puree/xml_extractor/dataset.rb', line 37

def files
  xpath_result = xpath_query '/documents/document'
  docs = []
  xpath_result.each do |d|
    doc = Puree::Model::File.new
    doc.name = d.xpath('title').text.strip
    # doc.mime = d.xpath('mimeType').text.strip
    # doc.size = d.xpath('size').text.strip.to_i
    doc.url = d.xpath('url').text.strip
    # doc['createdDate'] = d.xpath('createdDate').text.strip
    # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
    # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
    document_license = d.xpath('documentLicense').first
    if document_license
      license = Puree::Model::CopyrightLicense.new
      license.name = document_license.xpath('term/text').text.strip
      # license.name = document_license.xpath('term/localizedString').text.strip
      # license.url = document_license.xpath('description/localizedString').text.strip
      doc.license = license if license.data?
    end
    docs << doc
  end
  docs.uniq { |d| d.url }
end

#keywordsArray<String>

Returns:

  • (Array<String>)


63
64
65
# File 'lib/puree/xml_extractor/dataset.rb', line 63

def keywords
  keyword_group 'User-Defined Keywords'
end

#persons_externalArray<Puree::Model::EndeavourPerson>

Returns:



73
74
75
# File 'lib/puree/xml_extractor/dataset.rb', line 73

def persons_external
  persons 'external', '/personAssociations/personAssociation'
end

#persons_internalArray<Puree::Model::EndeavourPerson>

Returns:



68
69
70
# File 'lib/puree/xml_extractor/dataset.rb', line 68

def persons_internal
  persons 'internal', '/personAssociations/personAssociation'
end

#persons_otherArray<Puree::Model::EndeavourPerson>

Returns:



78
79
80
# File 'lib/puree/xml_extractor/dataset.rb', line 78

def persons_other
  persons 'other', '/personAssociations/personAssociation'
end

#productionPuree::Model::TemporalRange?

Date of data production

Returns:



84
85
86
# File 'lib/puree/xml_extractor/dataset.rb', line 84

def production
  temporal_range 'dataProductionPeriod/startDate', 'dataProductionPeriod/endDate'
end

#spatial_placesArray<String>

Returns:

  • (Array<String>)


89
90
91
92
93
94
95
96
97
# File 'lib/puree/xml_extractor/dataset.rb', line 89

def spatial_places
  # Data from free-form text box
  xpath_result = xpath_query '/geographicalCoverage'
  data = []
  xpath_result.each do |i|
    data << i.text.strip
  end
  data.uniq
end

#spatial_pointPuree::Model::SpatialPoint?

Spatial coverage point

Returns:



101
102
103
104
105
106
107
108
109
110
# File 'lib/puree/xml_extractor/dataset.rb', line 101

def spatial_point
  xpath_result = xpath_query '/geoLocation/point'
  if !xpath_result.empty?
    point = Puree::Model::SpatialPoint.new
    arr = xpath_result.text.strip.split(',')
    point.latitude = arr[0].to_f
    point.longitude = arr[1].to_f
    point
  end
end

#temporalPuree::Model::TemporalRange?

Temporal coverage

Returns:



114
115
116
# File 'lib/puree/xml_extractor/dataset.rb', line 114

def temporal
  temporal_range 'temporalCoveragePeriod/startDate', 'temporalCoveragePeriod/endDate'
end