Class: WikihowTechTopics::Scraper
- Inherits:
-
Object
- Object
- WikihowTechTopics::Scraper
- Defined in:
- lib/wikihow_tech_topics/scraper.rb
Constant Summary collapse
- @@all =
[]
Instance Attribute Summary collapse
-
#content_array ⇒ Object
Returns the value of attribute content_array.
-
#home_page ⇒ Object
Returns the value of attribute home_page.
-
#title_array ⇒ Object
Returns the value of attribute title_array.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(title = nil, content = nil, home_page = nil, title_array = nil, content_array = nil) ⇒ Scraper
constructor
A new instance of Scraper.
Constructor Details
#initialize(title = nil, content = nil, home_page = nil, title_array = nil, content_array = nil) ⇒ Scraper
Returns a new instance of Scraper.
11 12 13 14 15 16 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 11 def initialize(title=nil, content=nil, home_page=nil, title_array = nil, content_array = nil) @title_array = title_array @content_array = content_array @home_page = home_page @@all << self end |
Instance Attribute Details
#content_array ⇒ Object
Returns the value of attribute content_array.
7 8 9 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 7 def content_array @content_array end |
#home_page ⇒ Object
Returns the value of attribute home_page.
7 8 9 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 7 def home_page @home_page end |
#title_array ⇒ Object
Returns the value of attribute title_array.
7 8 9 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 7 def title_array @title_array end |
Class Method Details
.all ⇒ Object
49 50 51 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 49 def self.all @@all end |
.scraped_content_array ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 29 def self.scraped_content_array url = "https://www.wikihow.com/Category:Selecting-and-Buying-a-Computer" home_page = Nokogiri::HTML(open(url)) content_urls = home_page.css(".thumbnail").children.css("a").map { |content_link| content_link.attribute("href").text } http_added = content_urls.map { |content_url| "https:" + content_url } = http_added.pop(4) http_added.map do |complete_content_url| content_pages_to_scrape = Nokogiri::HTML(open(complete_content_url)) final_scraped_content = content_pages_to_scrape.css('div.steps').map { |full_content| full_content.css("b").text } final_scraped_content end end |
.scraped_title_array ⇒ Object
18 19 20 21 22 23 24 25 26 27 |
# File 'lib/wikihow_tech_topics/scraper.rb', line 18 def self.scraped_title_array home_page = Nokogiri::HTML(open("https://www.wikihow.com/Category:Selecting-and-Buying-a-Computer")) title_array = [] home_page.css('.text').each do |title_info| info_for_title_array = title_info.css('span').text title_array << info_for_title_array end title_array.pop(5) title_array end |