Class: SubPageScraper

Inherits:
Object
  • Object
show all
Includes:
Capybara::DSL
Defined in:
lib/rubyscraper/sub_page_scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(site, listings, delay) ⇒ SubPageScraper

Returns a new instance of SubPageScraper.



8
9
10
11
12
13
14
15
16
17
# File 'lib/rubyscraper/sub_page_scraper.rb', line 8

def initialize(site, listings, delay)
  @site     = site
  @listings = listings
  @delay    = delay

  Capybara.register_driver :poltergeist do |app|
    Capybara::Poltergeist::Driver.new(app, js_errors: false)
  end
  Capybara.default_driver = :poltergeist
end

Instance Attribute Details

#delayObject (readonly)

Returns the value of attribute delay.



5
6
7
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5

def delay
  @delay
end

#listingsObject (readonly)

Returns the value of attribute listings.



5
6
7
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5

def listings
  @listings
end

#siteObject (readonly)

Returns the value of attribute site.



5
6
7
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5

def site
  @site
end

Instance Method Details

#callObject



19
20
21
22
23
24
25
26
27
# File 'lib/rubyscraper/sub_page_scraper.rb', line 19

def call
  puts "Pulling #{@listings.count} listings from #{@site["name"]}:"
  listings = @listings.inject [] do |results, listing|
    sleep delay
    listing = pull_sub_page_data(site, listing)
    listing = listing_cleanup(listing)
    results << listing
  end; puts "\n"; listings
end

#listing_cleanup(listing) ⇒ Object



48
49
50
51
52
# File 'lib/rubyscraper/sub_page_scraper.rb', line 48

def listing_cleanup(listing)
  # Remove 'Headquarters: ' from weworkremotely jobs
  listing["location"].slice!("Headquarter: ") if !listing["location"].to_s.empty?
  listing
end

#pull_sub_page_data(site, listing) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rubyscraper/sub_page_scraper.rb', line 29

def pull_sub_page_data(site, listing)
  visit listing["url"]
  site["sub_page"]["fields"].each do |field|
    if field["method"] == "all"
      if has_css?(field["path"])
        values = all(field["path"]).map do |elem|
          elem.send(field["loop_collect"])
        end
        listing[field["field"]] = values.join(field["join"])
      end
    else
      if has_css?(field["path"])
        listing[field["field"]] = 
          send(field["method"].to_sym,field["path"]).text
      end
    end
  end; print "."; listing
end