Class: Nesstar::Api::CatalogApi

Inherits:
Object
  • Object
show all
Defined in:
lib/nesstar-api.rb

Instance Method Summary collapse

Instance Method Details

#add_datasetObject



221
222
223
# File 'lib/nesstar-api.rb', line 221

def add_dataset

end

#get_catalog(url, catalog) ⇒ Object

Get a hash of catalogs to child catalogs and datasets from a Nesstar instance. The children will be contained in an Array object Inputs are a url to the nesstar server eg nesstar.somewhere.com and a catalog id eg myCatalog If there appears to be no parent catalog for something then the key will be listed as ‘none’

Returns a Hash of catalogs to children



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/nesstar-api.rb', line 74

def get_catalog url, catalog
  
  #Hash of catalogs to their child datasets
  catalog_hash = Hash.new
  
  uri = URI.parse(url)
  query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
  full_uri = uri.merge query_string
  res = Net::HTTP.get full_uri
  doc = Nokogiri::HTML(res)
  
  parse_out_datasets doc, uri.host, catalog, catalog_hash

  return catalog_hash
end

#get_catalog_information(uri, catalog_id) ⇒ Object

return a catalog object with information inside it given a Nesstar url and catalog eg nesstar.somewhere.com and a catalog id eg myCatalog

Returns a Nesstar::Catalog object



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/nesstar-api.rb', line 129

def get_catalog_information uri, catalog_id
  catalog_uri = URI.parse(uri)
  catalog_uri.merge!("/obj/fCatalog/" + catalog_id)
  catalog_res = Net::HTTP.get(catalog_uri)
  gz = Zlib::GzipReader.new(StringIO.new(catalog_res))
  catalog_info = gz.read
  doc = Nokogiri::XML(catalog_info)
  label = doc.xpath('//s:label')
  description = doc.xpath('//s:comment')
  catalog = Nesstar::Catalog.new
  catalog.nesstar_id = catalog_id
  catalog.nesstar_uri = uri
  catalog.label = label[0].content.strip unless label[0] == nil
  catalog.description = description[0].content.strip unless description[0] == nil
  return catalog
end

#get_childrenObject



217
218
219
# File 'lib/nesstar-api.rb', line 217

def get_children

end

#get_ddi(uri, dataset) ⇒ Object

Get the ddi xml for a nesstar dataset given a Nesstar url and catalog eg nesstar.somewhere.com and a catalog id eg myCatalog

returns the ddi xml raw string



114
115
116
117
118
119
120
121
122
# File 'lib/nesstar-api.rb', line 114

def get_ddi uri, dataset
  ddi_uri = URI.parse(uri)
  ddi_uri.merge!("/obj/fStudy/" + dataset)
  ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
  res = Net::HTTP.get(ddi_uri)
  gz = Zlib::GzipReader.new(StringIO.new(res))
  xml = gz.read
  return xml
end

#get_nodes(url, catalog) ⇒ Object

Given a Nesstar url and catalog eg nesstar.somewhere.com and a catalog id eg myCatalog the return a RubyTree node object containing the hierarchy.

Returns a RubyTree Node



95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/nesstar-api.rb', line 95

def get_nodes url, catalog
  #tree of catalogs to their child datasets
  root_tree_node = Tree::TreeNode.new(catalog, "Catalog Content")
  
  uri = URI.parse(url)
  query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
  full_uri = uri.merge query_string
  res = Net::HTTP.get full_uri
  doc = Nokogiri::HTML(res)
  
  dataset_tree doc, uri.host, catalog, root_tree_node
  
  return root_tree_node
end

#get_simple_study_information(uri, dataset_id) ⇒ Object

Get basic information about the dataset only given a Nesstar url and catalog eg nesstar.somewhere.com and a catalog id eg myCatalog No information about the variables a study (ie dataset) contains are returned

Returns a Nesstar::Study object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/nesstar-api.rb', line 151

def get_simple_study_information uri, dataset_id
  dataset_uri = URI.parse(uri)
  dataset_uri.merge!("/obj/fStudy/" + dataset_id)
  dataset_res = Net::HTTP.get(dataset_uri)
  gz = Zlib::GzipReader.new(StringIO.new(dataset_res))
  dataset_info = gz.read
  doc = Nokogiri::XML(dataset_info)
  label = doc.xpath('//s:label')
  description = doc.xpath('//s:comment')
  study = Nesstar::Study.new
  study.nesstar_id = dataset_id
  study.nesstar_uri = uri
  study.title = label[0].content.strip unless label[0] == nil
  study.abstract = description[0].content.strip unless description[0] == nil
  return study
end

#get_study_information(uri, dataset_id) ⇒ Object

information about the dataset and its variables given a Nesstar url and catalog eg nesstar.somewhere.com and a catalog id eg myCatalog The study will contain variable level information if available

Returns a Nesstar::Study object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/nesstar-api.rb', line 174

def get_study_information uri, dataset_id
  #TODO use the get_ddi method above
  ddi_uri = URI.parse(uri)
  ddi_uri.merge!("/obj/fStudy/" + dataset_id)
  ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
  res = Net::HTTP.get(ddi_uri)
  gz = Zlib::GzipReader.new(StringIO.new(res))
  xml = gz.read
  catalog = Nesstar::Catalog.new
  study = Nesstar::Study.new
  study.nesstar_id = dataset_id
  study.nesstar_uri = uri
  study_info_hash = Hash.new
  parser = LibXML::XML::Parser.string(xml)
  doc = parser.parse
  studynodes = doc.find('//stdyDscr')
  abstracts = studynodes[0].find('//abstract')
  abstract = ""
  abstracts.each do |ab|
    abstract << ab.content.strip
  end
  abstract.strip!
  study.abstract = abstract
  study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
  study.id = studynodes[0].find('//IDNo')[0].first.content.strip
  
  #start and finish dates for study
  dates = []
  date = studynodes[0].find('//sumDscr/collDate')
  date.each do |d|
    a = d.attributes
    study_date = Nesstar::StudyDate.new
    study_date.type = a.get_attribute('event').value.strip
    study_date.date = a.get_attribute('date').value.strip
    dates.push(study_date)
  end
  study.dates = dates
  study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0] == nil
  # study.weight = studynodes[0].find('//sampProc')[0].first.content
  study.variables = get_variable_information doc
  return study
end

#parse_surveys_from_nodes(node, surveys_hash, survey_types_hash) ⇒ Object

Given tree of datasets and catalogs, contained in a RubyTree Node object figure out which are the survey types, which are surveys and link them to child datasets This is to match the MethodBox (www.methodbox.org) data model, basically a dataset belongs to a survey which has a survey type.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/nesstar-api.rb', line 28

def parse_surveys_from_nodes node, surveys_hash, survey_types_hash
  node.children.each do |node|
    if node.name.index('fStudy')
      #this is a dataset, find its survey and survey_type
      survey = node.parent
      if surveys_hash.has_key?(survey.name)
        surveys_hash[survey.name].push(node.name)
      else
        surveys_hash[survey.name] = []
        surveys_hash[survey.name].push(node.name)
      end
      survey_type = survey.parent
      if survey_type
        if survey_types_hash.has_key?(survey_type.name)
          if !survey_types_hash[survey_type.name].include?(survey.name)
            survey_types_hash[survey_type.name].push(survey.name)
          end
        else
          survey_types_hash[survey_type.name] = []
          survey_types_hash[survey_type.name].push(survey.name)
        end
      else
        if survey_types_hash.has_key?('none')
          if !survey_types_hash['none'].include?(survey.name)
            survey_types_hash['none'].push(survey.name)
          end
        else
          survey_types_hash['none'] = []
          survey_types_hash['none'].push(survey.name)
        end
      end
    else
        #its a catalog so keep going downwards
        parse_surveys_from_nodes node, surveys_hash, survey_types_hash
    end
  end
end