Class: SubPageScraper
- Inherits:
-
Object
- Object
- SubPageScraper
- Includes:
- Capybara::DSL
- Defined in:
- lib/rubyscraper/sub_page_scraper.rb
Instance Attribute Summary collapse
-
#delay ⇒ Object
readonly
Returns the value of attribute delay.
-
#listings ⇒ Object
readonly
Returns the value of attribute listings.
-
#site ⇒ Object
readonly
Returns the value of attribute site.
Instance Method Summary collapse
- #call ⇒ Object
-
#initialize(site, listings, delay) ⇒ SubPageScraper
constructor
A new instance of SubPageScraper.
- #listing_cleanup(listing) ⇒ Object
- #pull_sub_page_data(site, listing) ⇒ Object
Constructor Details
#initialize(site, listings, delay) ⇒ SubPageScraper
Returns a new instance of SubPageScraper.
8 9 10 11 12 13 14 15 16 17 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 8 def initialize(site, listings, delay) @site = site @listings = listings @delay = delay Capybara.register_driver :poltergeist do |app| Capybara::Poltergeist::Driver.new(app, js_errors: false) end Capybara.default_driver = :poltergeist end |
Instance Attribute Details
#delay ⇒ Object (readonly)
Returns the value of attribute delay.
5 6 7 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5 def delay @delay end |
#listings ⇒ Object (readonly)
Returns the value of attribute listings.
5 6 7 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5 def listings @listings end |
#site ⇒ Object (readonly)
Returns the value of attribute site.
5 6 7 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 5 def site @site end |
Instance Method Details
#call ⇒ Object
19 20 21 22 23 24 25 26 27 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 19 def call puts "Pulling #{@listings.count} listings from #{@site["name"]}:" listings = @listings.inject [] do |results, listing| sleep delay listing = pull_sub_page_data(site, listing) listing = listing_cleanup(listing) results << listing end; puts "\n"; listings end |
#listing_cleanup(listing) ⇒ Object
48 49 50 51 52 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 48 def listing_cleanup(listing) # Remove 'Headquarters: ' from weworkremotely jobs listing["location"].slice!("Headquarter: ") if !listing["location"].to_s.empty? listing end |
#pull_sub_page_data(site, listing) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/rubyscraper/sub_page_scraper.rb', line 29 def pull_sub_page_data(site, listing) visit listing["url"] site["sub_page"]["fields"].each do |field| if field["method"] == "all" if has_css?(field["path"]) values = all(field["path"]).map do |elem| elem.send(field["loop_collect"]) end listing[field["field"]] = values.join(field["join"]) end else if has_css?(field["path"]) listing[field["field"]] = send(field["method"].to_sym,field["path"]).text end end end; print "."; listing end |