Class: TaxPub

Inherits:
Object
  • Object
show all
Defined in:
lib/taxpub.rb,
lib/taxpub/utils.rb,
lib/taxpub/version.rb,
lib/taxpub/reference.rb,
lib/taxpub/validator.rb,
lib/taxpub/exceptions.rb

Defined Under Namespace

Classes: Error, InvalidParameterValueError, InvalidTypeError, Reference, Utils, Validator

Constant Summary collapse

VERSION =
"0.1.1"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeTaxPub

Returns a new instance of TaxPub.



12
13
14
15
# File 'lib/taxpub.rb', line 12

def initialize
  @parameters = {}
  @doc = {}
end

Class Method Details

.versionObject



4
5
6
# File 'lib/taxpub/version.rb', line 4

def self.version
  VERSION
end

Instance Method Details

#abstractObject

Get the abstract



113
114
115
116
117
118
# File 'lib/taxpub.rb', line 113

def abstract
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/abstract"
  a = @doc.xpath(xpath).text
  Utils.clean_text(a)
end

#authorsObject

Get the authors



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/taxpub.rb', line 133

def authors
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/contrib[@contrib-type='author']"
  @doc.xpath(xpath).each do |author|
    affiliations = []
    author.xpath("xref/@rid").each do |rid|
      xpath = "//*/aff[@id='#{rid}']/addr-line"
      affiliations << Utils.clean_text(@doc.xpath(xpath).text)
    end
    orcid = author.xpath("uri[@content-type='orcid']").text
    given = Utils.clean_text(author.xpath("name/given-names").text)
    surname = Utils.clean_text(author.xpath("name/surname").text)
    data << {
      given: given,
      surname: surname,
      fullname: [given, surname].join(" "),
      email: author.xpath("email").text,
      affiliations: affiliations,
      orcid: orcid
    }
  end
  data
end

#conferenceObject

Get the conference metadata



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/taxpub.rb', line 172

def conference
  Validator.validate_nokogiri(@doc)
  xpath = "//*/conference"
  conf = @doc.xpath(xpath)
  return {} if conf.empty?
  session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
  session = Utils.clean_text(@doc.xpath(session_xpath).text)
  presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
  presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
  {
    date: Utils.clean_text(conf.at_xpath("conf-date").text),
    name: Utils.clean_text(conf.at_xpath("conf-name").text),
    acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
    location: Utils.clean_text(conf.at_xpath("conf-loc").text),
    theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
    session: session,
    presenter: presenter
  }
end

#contentObject

Get the raw text content of the Nokogiri document



86
87
88
89
# File 'lib/taxpub.rb', line 86

def content
  Validator.validate_nokogiri(@doc)
  Utils.clean_text(@doc.text)
end

#corresponding_authorObject

Get the corresponding author



162
163
164
165
166
167
# File 'lib/taxpub.rb', line 162

def corresponding_author
  Validator.validate_nokogiri(@doc)
  xpath = "//*/author-notes/fn[@fn-type='corresp']/p"
  author_string = Utils.clean_text(@doc.xpath(xpath).text)
  author_string.gsub("Corresponding author: ", "").chomp(".")
end

#docObject

View the parsed Nokogiri document



73
74
75
# File 'lib/taxpub.rb', line 73

def doc
  @doc
end

#doiObject

Get the DOI



94
95
96
97
98
# File 'lib/taxpub.rb', line 94

def doi
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
  Utils.expand_doi(@doc.xpath(xpath).text)
end

#figuresObject

Get the figures



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/taxpub.rb', line 229

def figures
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/fig"
  @doc.xpath(xpath).each do |fig|
    data << {
      label: Utils.clean_text(fig.xpath("label").text),
      caption: Utils.clean_text(fig.xpath("caption").text),
      graphic: {
        href: fig.xpath("graphic").attribute("href").text,
        id: fig.xpath("graphic").attribute("id").text
      }
    }
  end
  data
end

#file_pathObject



53
54
55
# File 'lib/taxpub.rb', line 53

def file_path
  @parameters[:file].path rescue nil
end

#file_path=(file_path) ⇒ Object

Set a file path for a TaxPub XML file

Example

instance.file_path = "/Users/jane/Desktop/taxpub.xml"


48
49
50
51
# File 'lib/taxpub.rb', line 48

def file_path=(file_path)
  Validator.validate_type(file_path, 'File')
  @parameters[:file] = File.new(file_path, "r")
end

#keywordsObject

Get the keywords



123
124
125
126
127
128
# File 'lib/taxpub.rb', line 123

def keywords
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/kwd-group/kwd"
  @doc.xpath(xpath)
      .map{|a| Utils.clean_text(a.text)}
end

#occurrencesObject

Get occurrences with dwc keys



211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/taxpub.rb', line 211

def occurrences
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/list[@list-content='occurrences']/list-item"
  @doc.xpath(xpath).each do |occ|
    obj = {}
    occ.xpath("*/named-content").each do |dwc|
      prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
      obj[prefix.to_sym] = dwc.text
    end
    data << obj
  end
  data
end

#paramsObject

View the built parameters



20
21
22
# File 'lib/taxpub.rb', line 20

def params
  @parameters
end

#parseObject

Build the Nokogiri document



60
61
62
63
64
65
66
67
68
# File 'lib/taxpub.rb', line 60

def parse
  if url
    @doc = Nokogiri::XML(open(url))
  elsif file_path
    @doc = File.open(file_path) { |f| Nokogiri::XML(f) }
  end
  Validator.validate_nokogiri(@doc)
  @doc
end

#referencesObject

Get the cited references



249
250
251
252
253
# File 'lib/taxpub.rb', line 249

def references
  Validator.validate_nokogiri(@doc)
  xpath = "//*/ref-list/ref"
  @doc.xpath(xpath).map{ |r| Reference.parse(r) }
end

#scientific_names(hsh = {}) ⇒ Object

Get the taxa

Attributes

  • hsh - Hash { with_ranks: true } for scientific names returned with ranks as keys



199
200
201
202
203
204
205
# File 'lib/taxpub.rb', line 199

def scientific_names(hsh = {})
  if hsh[:with_ranks]
    scientific_names_with_ranks
  else
    scientific_names_with_ranks.map{ |s| s.values.join(" ") }
  end
end

#titleObject

Get the title



103
104
105
106
107
108
# File 'lib/taxpub.rb', line 103

def title
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/title-group/article-title"
  t = @doc.xpath(xpath).text
  Utils.clean_text(t)
end

#typeObject



77
78
79
80
81
# File 'lib/taxpub.rb', line 77

def type
  Validator.validate_nokogiri(@doc)
  xpath = "/article/@article-type"
  @doc.xpath(xpath).text
end

#urlObject



37
38
39
# File 'lib/taxpub.rb', line 37

def url
  @parameters[:url] || nil
end

#url=(url) ⇒ Object

Specify a remote TaxPub URL Source must be an xml file

Example

instance.url = "https://tdwgproceedings.pensoft.net/article/15141/download/xml/"


32
33
34
35
# File 'lib/taxpub.rb', line 32

def url=(url)
  Validator.validate_url(url)
  @parameters[:url] = url
end