Class: RichterCatalogue::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/richter_catalogue/scraper.rb

Class Method Summary collapse

Class Method Details

.artist_page(artist_url) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/richter_catalogue/scraper.rb', line 3

def self.artist_page(artist_url)

  doc = Nokogiri::HTML(open(artist_url))

  name = doc.css("span.fn").text
  age = doc.css("span.ForceAgeToShow").text.gsub(/[(age )]/, "")
  nationality = doc.css("table tr:nth-of-type(4) td.category").text
  movement = doc.css("tr td.category a").text
  edu_1 = doc.css("div#mw-content-text table tr:nth-of-type(5) td:first-of-type a.mw-redirect").text + ", "
  edu_2 = doc.css("div#mw-content-text table tr:nth-of-type(5) td:first-of-type a.mw-redirect ~ a")
  edu_2.each_with_index {|edu, i| i.between?(1, edu_2.size - 1) ? edu_1 << edu.text + ", " : edu_1 << edu.text + "."}

  artist = {name: name, age: age, nationality: nationality, movement: movement, education: edu_1, artist_url: artist_url}
 
  artist
end

.painting_page(painting_url) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/richter_catalogue/scraper.rb', line 56

def self.painting_page(painting_url)
  doc = Nokogiri::HTML(open(painting_url))

  painting = {medium: doc.css("#div-painting-info-box p.p-painting-info-medium").text.strip,
    year: RichterCatalogue::Year.find_or_create_by_name(doc.css("p.p-painting-info-year-size-etc span.span-painting-info-year").text.strip),
    size: doc.css("p.p-painting-info-year-size-etc span.span-painting-info-size").text.strip}

  painting[:price] = doc.css("div.info table tr td:first-of-type").text.strip
  if painting[:price] == ""
    painting.delete(:price)
  end
  painting[:name] = doc.css("div#div-painting-info-box span.span-painting-title2").text.strip
  if painting[:name] == ""
    painting[:name] = doc.css("div#div-painting-info-box span.span-painting-title1").text.strip
  end
  painting
end

.subject_page(subject_url) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'lib/richter_catalogue/scraper.rb', line 46

def self.subject_page(subject_url)
  doc = Nokogiri::HTML(open(subject_url))

  paintings = []

  doc.css("div.div-thumb.div-thumb-with-title a.a-thumb-link").each {|painting| paintings << {painting_url: painting.attribute("href").value }}
 
  paintings
end

.subjects_page(subjects_url) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/richter_catalogue/scraper.rb', line 20

def self.subjects_page(subjects_url)
  doc = Nokogiri::HTML(open(subjects_url))

  subjects = []

  doc.css("div.div-section-category div.div-section-category-mobile a.a-lit-cat").each do |subject|
    name = subject.attribute("title").text.strip
    case name
    when "Aeroplanes"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    when "Children"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    when "Mother and Child"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    when "Flowers"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    when "Families"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    when "Skulls"
      subjects << {name: name, subject_url: subject.attribute("href").value}
    else
    end
  end
  subjects
end