Class: TaxPub
- Inherits:
-
Object
- Object
- TaxPub
- Defined in:
- lib/taxpub.rb,
lib/taxpub/utils.rb,
lib/taxpub/version.rb,
lib/taxpub/reference.rb,
lib/taxpub/validator.rb,
lib/taxpub/exceptions.rb
Defined Under Namespace
Classes: Error, InvalidParameterValueError, InvalidTypeError, Reference, Utils, Validator
Constant Summary collapse
- VERSION =
"0.1.1"
Class Method Summary collapse
Instance Method Summary collapse
-
#abstract ⇒ Object
Get the abstract.
-
#authors ⇒ Object
Get the authors.
-
#conference ⇒ Object
Get the conference metadata.
-
#content ⇒ Object
Get the raw text content of the Nokogiri document.
-
#corresponding_author ⇒ Object
Get the corresponding author.
-
#doc ⇒ Object
View the parsed Nokogiri document.
-
#doi ⇒ Object
Get the DOI.
-
#figures ⇒ Object
Get the figures.
- #file_path ⇒ Object
-
#file_path=(file_path) ⇒ Object
Set a file path for a TaxPub XML file.
-
#initialize ⇒ TaxPub
constructor
A new instance of TaxPub.
-
#keywords ⇒ Object
Get the keywords.
-
#occurrences ⇒ Object
Get occurrences with dwc keys.
-
#params ⇒ Object
View the built parameters.
-
#parse ⇒ Object
Build the Nokogiri document.
-
#references ⇒ Object
Get the cited references.
-
#scientific_names(hsh = {}) ⇒ Object
Get the taxa.
-
#title ⇒ Object
Get the title.
- #type ⇒ Object
- #url ⇒ Object
-
#url=(url) ⇒ Object
Specify a remote TaxPub URL Source must be an xml file.
Constructor Details
#initialize ⇒ TaxPub
Returns a new instance of TaxPub.
12 13 14 15 |
# File 'lib/taxpub.rb', line 12 def initialize @parameters = {} @doc = {} end |
Class Method Details
.version ⇒ Object
4 5 6 |
# File 'lib/taxpub/version.rb', line 4 def self.version VERSION end |
Instance Method Details
#abstract ⇒ Object
Get the abstract
113 114 115 116 117 118 |
# File 'lib/taxpub.rb', line 113 def abstract Validator.validate_nokogiri(@doc) xpath = "//*/article-meta/abstract" a = @doc.xpath(xpath).text Utils.clean_text(a) end |
#authors ⇒ Object
Get the authors
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/taxpub.rb', line 133 def Validator.validate_nokogiri(@doc) data = [] xpath = "//*/contrib[@contrib-type='author']" @doc.xpath(xpath).each do || affiliations = [] .xpath("xref/@rid").each do |rid| xpath = "//*/aff[@id='#{rid}']/addr-line" affiliations << Utils.clean_text(@doc.xpath(xpath).text) end orcid = .xpath("uri[@content-type='orcid']").text given = Utils.clean_text(.xpath("name/given-names").text) surname = Utils.clean_text(.xpath("name/surname").text) data << { given: given, surname: surname, fullname: [given, surname].join(" "), email: .xpath("email").text, affiliations: affiliations, orcid: orcid } end data end |
#conference ⇒ Object
Get the conference metadata
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/taxpub.rb', line 172 def conference Validator.validate_nokogiri(@doc) xpath = "//*/conference" conf = @doc.xpath(xpath) return {} if conf.empty? session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject" session = Utils.clean_text(@doc.xpath(session_xpath).text) presenter_xpath = "//*/sec[@sec-type='Presenting author']/p" presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text) { date: Utils.clean_text(conf.at_xpath("conf-date").text), name: Utils.clean_text(conf.at_xpath("conf-name").text), acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text), location: Utils.clean_text(conf.at_xpath("conf-loc").text), theme: Utils.clean_text(conf.at_xpath("conf-theme").text), session: session, presenter: presenter } end |
#content ⇒ Object
Get the raw text content of the Nokogiri document
86 87 88 89 |
# File 'lib/taxpub.rb', line 86 def content Validator.validate_nokogiri(@doc) Utils.clean_text(@doc.text) end |
#corresponding_author ⇒ Object
Get the corresponding author
162 163 164 165 166 167 |
# File 'lib/taxpub.rb', line 162 def Validator.validate_nokogiri(@doc) xpath = "//*/author-notes/fn[@fn-type='corresp']/p" = Utils.clean_text(@doc.xpath(xpath).text) .gsub("Corresponding author: ", "").chomp(".") end |
#doc ⇒ Object
View the parsed Nokogiri document
73 74 75 |
# File 'lib/taxpub.rb', line 73 def doc @doc end |
#doi ⇒ Object
Get the DOI
94 95 96 97 98 |
# File 'lib/taxpub.rb', line 94 def doi Validator.validate_nokogiri(@doc) xpath = "//*/article-meta/article-id[@pub-id-type='doi']" Utils.(@doc.xpath(xpath).text) end |
#figures ⇒ Object
Get the figures
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
# File 'lib/taxpub.rb', line 229 def figures Validator.validate_nokogiri(@doc) data = [] xpath = "//*/fig" @doc.xpath(xpath).each do |fig| data << { label: Utils.clean_text(fig.xpath("label").text), caption: Utils.clean_text(fig.xpath("caption").text), graphic: { href: fig.xpath("graphic").attribute("href").text, id: fig.xpath("graphic").attribute("id").text } } end data end |
#file_path ⇒ Object
53 54 55 |
# File 'lib/taxpub.rb', line 53 def file_path @parameters[:file].path rescue nil end |
#file_path=(file_path) ⇒ Object
Set a file path for a TaxPub XML file
Example
instance.file_path = "/Users/jane/Desktop/taxpub.xml"
48 49 50 51 |
# File 'lib/taxpub.rb', line 48 def file_path=(file_path) Validator.validate_type(file_path, 'File') @parameters[:file] = File.new(file_path, "r") end |
#keywords ⇒ Object
Get the keywords
123 124 125 126 127 128 |
# File 'lib/taxpub.rb', line 123 def keywords Validator.validate_nokogiri(@doc) xpath = "//*/article-meta/kwd-group/kwd" @doc.xpath(xpath) .map{|a| Utils.clean_text(a.text)} end |
#occurrences ⇒ Object
Get occurrences with dwc keys
211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/taxpub.rb', line 211 def occurrences Validator.validate_nokogiri(@doc) data = [] xpath = "//*/list[@list-content='occurrences']/list-item" @doc.xpath(xpath).each do |occ| obj = {} occ.xpath("*/named-content").each do |dwc| prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "") obj[prefix.to_sym] = dwc.text end data << obj end data end |
#params ⇒ Object
View the built parameters
20 21 22 |
# File 'lib/taxpub.rb', line 20 def params @parameters end |
#parse ⇒ Object
Build the Nokogiri document
60 61 62 63 64 65 66 67 68 |
# File 'lib/taxpub.rb', line 60 def parse if url @doc = Nokogiri::XML(open(url)) elsif file_path @doc = File.open(file_path) { |f| Nokogiri::XML(f) } end Validator.validate_nokogiri(@doc) @doc end |
#references ⇒ Object
Get the cited references
249 250 251 252 253 |
# File 'lib/taxpub.rb', line 249 def references Validator.validate_nokogiri(@doc) xpath = "//*/ref-list/ref" @doc.xpath(xpath).map{ |r| Reference.parse(r) } end |
#scientific_names(hsh = {}) ⇒ Object
Get the taxa
Attributes
-
hsh
- Hash { with_ranks: true } for scientific names returned with ranks as keys
199 200 201 202 203 204 205 |
# File 'lib/taxpub.rb', line 199 def scientific_names(hsh = {}) if hsh[:with_ranks] scientific_names_with_ranks else scientific_names_with_ranks.map{ |s| s.values.join(" ") } end end |
#title ⇒ Object
Get the title
103 104 105 106 107 108 |
# File 'lib/taxpub.rb', line 103 def title Validator.validate_nokogiri(@doc) xpath = "//*/article-meta/title-group/article-title" t = @doc.xpath(xpath).text Utils.clean_text(t) end |
#type ⇒ Object
77 78 79 80 81 |
# File 'lib/taxpub.rb', line 77 def type Validator.validate_nokogiri(@doc) xpath = "/article/@article-type" @doc.xpath(xpath).text end |
#url ⇒ Object
37 38 39 |
# File 'lib/taxpub.rb', line 37 def url @parameters[:url] || nil end |
#url=(url) ⇒ Object
Specify a remote TaxPub URL Source must be an xml file
Example
instance.url = "https://tdwgproceedings.pensoft.net/article/15141/download/xml/"
32 33 34 35 |
# File 'lib/taxpub.rb', line 32 def url=(url) Validator.validate_url(url) @parameters[:url] = url end |