Class: WikihowTechTopics::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/wikihow_tech_topics/scraper.rb

Constant Summary collapse

@@all =
[]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(title = nil, content = nil, home_page = nil, title_array = nil, content_array = nil) ⇒ Scraper

Returns a new instance of Scraper.



11
12
13
14
15
16
# File 'lib/wikihow_tech_topics/scraper.rb', line 11

def initialize(title=nil, content=nil, home_page=nil, title_array = nil, content_array = nil)
    @title_array = title_array
    @content_array = content_array
    @home_page = home_page
    @@all << self
end

Instance Attribute Details

#content_arrayObject

Returns the value of attribute content_array.



7
8
9
# File 'lib/wikihow_tech_topics/scraper.rb', line 7

def content_array
  @content_array
end

#home_pageObject

Returns the value of attribute home_page.



7
8
9
# File 'lib/wikihow_tech_topics/scraper.rb', line 7

def home_page
  @home_page
end

#title_arrayObject

Returns the value of attribute title_array.



7
8
9
# File 'lib/wikihow_tech_topics/scraper.rb', line 7

def title_array
  @title_array
end

Class Method Details

.allObject



49
50
51
# File 'lib/wikihow_tech_topics/scraper.rb', line 49

def self.all
    @@all
end

.scraped_content_arrayObject



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/wikihow_tech_topics/scraper.rb', line 29

def self.scraped_content_array
    url = "https://www.wikihow.com/Category:Selecting-and-Buying-a-Computer"
    home_page = Nokogiri::HTML(open(url))

    content_urls = home_page.css(".thumbnail").children.css("a").map { |content_link| content_link.attribute("href").text }

    http_added = content_urls.map { |content_url| "https:" + content_url }
    
    url_array_sidebar_articles_removed = http_added.pop(4)

    http_added.map do |complete_content_url| 
        content_pages_to_scrape = Nokogiri::HTML(open(complete_content_url))
    
    final_scraped_content = content_pages_to_scrape.css('div.steps').map { |full_content|
            full_content.css("b").text }

    final_scraped_content
    end
end

.scraped_title_arrayObject



18
19
20
21
22
23
24
25
26
27
# File 'lib/wikihow_tech_topics/scraper.rb', line 18

def self.scraped_title_array
    home_page = Nokogiri::HTML(open("https://www.wikihow.com/Category:Selecting-and-Buying-a-Computer"))
    title_array = []
    home_page.css('.text').each do |title_info|
    info_for_title_array = title_info.css('span').text
    title_array << info_for_title_array
    end
    title_array.pop(5)
    title_array
end