Class: PlantToxicity::Scraper
- Inherits:
-
Object
- Object
- PlantToxicity::Scraper
- Defined in:
- lib/plant_toxicity/scraper.rb
Constant Summary collapse
- URL_BASE =
"https://www.aspca.org"
Class Method Summary collapse
Class Method Details
.get_list(letter) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/plant_toxicity/scraper.rb', line 5 def self.get_list(letter) plants = [] doc = Nokogiri::HTML(open(URL_BASE + "/pet-care/animal-poison-control/toxic-and-non-toxic-plants")) letter_details = doc.search("div.view-content span.views-summary a").detect {|letters| letters.text == letter.upcase} if letter_details != nil pages = [] pages << URL_BASE + letter_details.attribute("href").value Nokogiri::HTML(open(pages.first)).search("li.pager-item").each {|page| pages << URL_BASE + page.search("a").attribute("href").value} if pages.length > 1 next_page = Nokogiri::HTML(open(pages.last)).search("ul.pager li").detect {|link| link.attribute("class").value == "pager-next"} while next_page pages << URL_BASE + next_page.search("a").attribute("href").value next_page = Nokogiri::HTML(open(pages.last)).search("ul.pager li").detect {|link| link.attribute("class").value == "pager-next"} end end pages.each do |page| Nokogiri::HTML(open(page)).search("div.views-field-title a").each do |plant| plants << PlantToxicity::Plant.new(plant.text, URL_BASE + plant.attribute("href").value) end end end plants end |
.get_plant_details(plant) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/plant_toxicity/scraper.rb', line 29 def self.get_plant_details(plant) Nokogiri::HTML(open(plant.url)).search("div.pane-entity-field div.field-items").each do |field| if field.search("span.label-inline-format-label").text == "Scientific Name:" plant.scientific_name = field.search("span.values").text.strip end if field.search("span.label-inline-format-label").text == "Additional Common Names:" plant.other_names = field.search("span.values").text.strip end if field.search("span.label-inline-format-label").text == "Toxicity:" plant.toxicity = field.search("span.values").text.strip end if field.search("span.label-inline-format-label").text == "Non-Toxicity:" plant.non_toxicity = field.search("span.values").text.strip end if field.search("span.label-inline-format-label").text == "Clinical Signs:" plant.clinical_signs = field.search("span.values").text.strip end end end |