Class: ListScraper::UpdateScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/storeListScraper/update_scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeUpdateScraper

Returns a new instance of UpdateScraper.



6
7
8
9
10
11
12
13
14
# File 'lib/storeListScraper/update_scraper.rb', line 6

def initialize
    @base = 'https://storefound.org/'
    @letters = [] #array of links for each letter group
    @pages = [] #array to store pages for each letter
    File.delete('./lib/storeListScrapper/business_list.csv') if File.exist?('./lib/storeListScrapper/business_list.csv')
    @list = CSV.open("./lib/storeListScrapper/business_list.csv", "w")
    @list << ["Company Name", "link"] #headers
    update
end

Instance Attribute Details

#baseObject (readonly)

Returns the value of attribute base.



4
5
6
# File 'lib/storeListScraper/update_scraper.rb', line 4

def base
  @base
end

#lettersObject

Returns the value of attribute letters.



3
4
5
# File 'lib/storeListScraper/update_scraper.rb', line 3

def letters
  @letters
end

#listObject

Returns the value of attribute list.



3
4
5
# File 'lib/storeListScraper/update_scraper.rb', line 3

def list
  @list
end

#pagesObject

Returns the value of attribute pages.



3
4
5
# File 'lib/storeListScraper/update_scraper.rb', line 3

def pages
  @pages
end

Instance Method Details

#letters_scrapeObject



25
26
27
28
29
30
31
# File 'lib/storeListScraper/update_scraper.rb', line 25

def letters_scrape
    #scrape all main links for letters group
    doc = Nokogiri::HTML5(URI.open('https://storefound.org/store/starts-a/page-1'))
    doc.css('.letter-block a').each do |lk|
        @letters << lk.attribute('href').text
    end
end

#pages_scrape(letter_link) ⇒ Object



33
34
35
36
37
38
39
40
# File 'lib/storeListScraper/update_scraper.rb', line 33

def pages_scrape(letter_link)
    #scrape all page links for each letter group
    @pages.clear
    doc = Nokogiri::HTML5(URI.open("#{@base}#{letter_link}"))
    doc.css('.pagination a').each do |lk|
        @pages << lk.attribute('href').text
    end
end

#updateObject



16
17
18
19
20
21
22
23
# File 'lib/storeListScraper/update_scraper.rb', line 16

def update
    letters_scrape
    @letters.each do |letter|
        pages_scrape(letter)
        update_business_list
    end
    puts "Successfully updated!!"
end

#update_business_listObject



42
43
44
45
46
47
48
49
50
# File 'lib/storeListScraper/update_scraper.rb', line 42

def update_business_list
    #scrape all business names and corresponding links
    @pages.each do |lk|
        doc = Nokogiri::HTML5(URI.open("#{@base}#{lk}"))
        j = doc.css('.main-block .col-half a').each do |biz|
            @list << [biz.text, biz.attribute('href').text]
        end
    end
end