Class: ListScraper::UpdateScraper
- Inherits:
-
Object
- Object
- ListScraper::UpdateScraper
- Defined in:
- lib/storeListScraper/update_scraper.rb
Instance Attribute Summary collapse
-
#base ⇒ Object
readonly
Returns the value of attribute base.
-
#letters ⇒ Object
Returns the value of attribute letters.
-
#list ⇒ Object
Returns the value of attribute list.
-
#pages ⇒ Object
Returns the value of attribute pages.
Instance Method Summary collapse
-
#initialize ⇒ UpdateScraper
constructor
A new instance of UpdateScraper.
- #letters_scrape ⇒ Object
- #pages_scrape(letter_link) ⇒ Object
- #update ⇒ Object
- #update_business_list ⇒ Object
Constructor Details
#initialize ⇒ UpdateScraper
Returns a new instance of UpdateScraper.
6 7 8 9 10 11 12 13 14 |
# File 'lib/storeListScraper/update_scraper.rb', line 6 def initialize @base = 'https://storefound.org/' @letters = [] #array of links for each letter group @pages = [] #array to store pages for each letter File.delete('./lib/storeListScrapper/business_list.csv') if File.exist?('./lib/storeListScrapper/business_list.csv') @list = CSV.open("./lib/storeListScrapper/business_list.csv", "w") @list << ["Company Name", "link"] #headers update end |
Instance Attribute Details
#base ⇒ Object (readonly)
Returns the value of attribute base.
4 5 6 |
# File 'lib/storeListScraper/update_scraper.rb', line 4 def base @base end |
#letters ⇒ Object
Returns the value of attribute letters.
3 4 5 |
# File 'lib/storeListScraper/update_scraper.rb', line 3 def letters @letters end |
#list ⇒ Object
Returns the value of attribute list.
3 4 5 |
# File 'lib/storeListScraper/update_scraper.rb', line 3 def list @list end |
#pages ⇒ Object
Returns the value of attribute pages.
3 4 5 |
# File 'lib/storeListScraper/update_scraper.rb', line 3 def pages @pages end |
Instance Method Details
#letters_scrape ⇒ Object
25 26 27 28 29 30 31 |
# File 'lib/storeListScraper/update_scraper.rb', line 25 def letters_scrape #scrape all main links for letters group doc = Nokogiri::HTML5(URI.open('https://storefound.org/store/starts-a/page-1')) doc.css('.letter-block a').each do |lk| @letters << lk.attribute('href').text end end |
#pages_scrape(letter_link) ⇒ Object
33 34 35 36 37 38 39 40 |
# File 'lib/storeListScraper/update_scraper.rb', line 33 def pages_scrape(letter_link) #scrape all page links for each letter group @pages.clear doc = Nokogiri::HTML5(URI.open("#{@base}#{letter_link}")) doc.css('.pagination a').each do |lk| @pages << lk.attribute('href').text end end |
#update ⇒ Object
16 17 18 19 20 21 22 23 |
# File 'lib/storeListScraper/update_scraper.rb', line 16 def update letters_scrape @letters.each do |letter| pages_scrape(letter) update_business_list end puts "Successfully updated!!" end |
#update_business_list ⇒ Object
42 43 44 45 46 47 48 49 50 |
# File 'lib/storeListScraper/update_scraper.rb', line 42 def update_business_list #scrape all business names and corresponding links @pages.each do |lk| doc = Nokogiri::HTML5(URI.open("#{@base}#{lk}")) j = doc.css('.main-block .col-half a').each do |biz| @list << [biz.text, biz.attribute('href').text] end end end |