Class: RichterCatalogue::Scraper
- Inherits:
-
Object
- Object
- RichterCatalogue::Scraper
- Defined in:
- lib/richter_catalogue/scraper.rb
Class Method Summary collapse
- .artist_page(artist_url) ⇒ Object
- .painting_page(painting_url) ⇒ Object
- .subject_page(subject_url) ⇒ Object
- .subjects_page(subjects_url) ⇒ Object
Class Method Details
.artist_page(artist_url) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/richter_catalogue/scraper.rb', line 3 def self.artist_page(artist_url) doc = Nokogiri::HTML(open(artist_url)) name = doc.css("span.fn").text age = doc.css("span.ForceAgeToShow").text.gsub(/[(age )]/, "") nationality = doc.css("table tr:nth-of-type(4) td.category").text movement = doc.css("tr td.category a").text edu_1 = doc.css("div#mw-content-text table tr:nth-of-type(5) td:first-of-type a.mw-redirect").text + ", " edu_2 = doc.css("div#mw-content-text table tr:nth-of-type(5) td:first-of-type a.mw-redirect ~ a") edu_2.each_with_index {|edu, i| i.between?(1, edu_2.size - 1) ? edu_1 << edu.text + ", " : edu_1 << edu.text + "."} artist = {name: name, age: age, nationality: nationality, movement: movement, education: edu_1, artist_url: artist_url} artist end |
.painting_page(painting_url) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/richter_catalogue/scraper.rb', line 56 def self.painting_page(painting_url) doc = Nokogiri::HTML(open(painting_url)) painting = {medium: doc.css("#div-painting-info-box p.p-painting-info-medium").text.strip, year: RichterCatalogue::Year.find_or_create_by_name(doc.css("p.p-painting-info-year-size-etc span.span-painting-info-year").text.strip), size: doc.css("p.p-painting-info-year-size-etc span.span-painting-info-size").text.strip} painting[:price] = doc.css("div.info table tr td:first-of-type").text.strip if painting[:price] == "" painting.delete(:price) end painting[:name] = doc.css("div#div-painting-info-box span.span-painting-title2").text.strip if painting[:name] == "" painting[:name] = doc.css("div#div-painting-info-box span.span-painting-title1").text.strip end painting end |
.subject_page(subject_url) ⇒ Object
46 47 48 49 50 51 52 53 54 |
# File 'lib/richter_catalogue/scraper.rb', line 46 def self.subject_page(subject_url) doc = Nokogiri::HTML(open(subject_url)) paintings = [] doc.css("div.div-thumb.div-thumb-with-title a.a-thumb-link").each {|painting| paintings << {painting_url: painting.attribute("href").value }} paintings end |
.subjects_page(subjects_url) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/richter_catalogue/scraper.rb', line 20 def self.subjects_page(subjects_url) doc = Nokogiri::HTML(open(subjects_url)) subjects = [] doc.css("div.div-section-category div.div-section-category-mobile a.a-lit-cat").each do |subject| name = subject.attribute("title").text.strip case name when "Aeroplanes" subjects << {name: name, subject_url: subject.attribute("href").value} when "Children" subjects << {name: name, subject_url: subject.attribute("href").value} when "Mother and Child" subjects << {name: name, subject_url: subject.attribute("href").value} when "Flowers" subjects << {name: name, subject_url: subject.attribute("href").value} when "Families" subjects << {name: name, subject_url: subject.attribute("href").value} when "Skulls" subjects << {name: name, subject_url: subject.attribute("href").value} else end end subjects end |