Class: Scraper::Comics
- Inherits:
-
Object
- Object
- Scraper::Comics
- Defined in:
- lib/scraper/comics.rb
Class Method Summary collapse
- .all(start_page = 1, last_page = 5) ⇒ Object
- .description(comic) ⇒ Object
- .download_link(comic) ⇒ Object
- .last_page ⇒ Object
- .main_image(comic) ⇒ Object
- .other_images(comic) ⇒ Object
- .tags(comic) ⇒ Object
Class Method Details
.all(start_page = 1, last_page = 5) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/scraper/comics.rb', line 4 def self.all(start_page = 1, last_page = 5) results = [] start_page.upto(last_page).each do |i| doc = Nokogiri::HTML(open("http://www.newcomic.org/page/#{i}/")) doc.css('.story_short').each do |block| result = OpenStruct.new url = block.search('a').map {|a| a['href']}.first result.title = block.at_css('.story_h').content result.url = url comic = Nokogiri::HTML(open(url)) result.main_image = main_image(comic) result.other_images = other_images(comic) result.download_link = download_link(comic) result. = (comic) result.description = description(comic) results << result end end results end |
.description(comic) ⇒ Object
32 33 34 |
# File 'lib/scraper/comics.rb', line 32 def self.description(comic) comic.at_css('.story_c').content.gsub(/\s+/, " ").strip end |
.download_link(comic) ⇒ Object
44 45 46 47 48 |
# File 'lib/scraper/comics.rb', line 44 def self.download_link(comic) if comic.at_css('.story_c') comic.at_css('.story_c').search('a').map {|a| a['href']}.first end end |
.last_page ⇒ Object
56 57 58 |
# File 'lib/scraper/comics.rb', line 56 def self.last_page Nokogiri::HTML(open('http://www.newcomic.org/')).search('#dle-content > div.basenavi > span.navigation > a:nth-child(12)').first.children[0].to_s.to_i end |
.main_image(comic) ⇒ Object
36 37 38 |
# File 'lib/scraper/comics.rb', line 36 def self.main_image(comic) comic.at_css('.story_c').search('img').map {|a| a['src']}.first end |
.other_images(comic) ⇒ Object
40 41 42 |
# File 'lib/scraper/comics.rb', line 40 def self.other_images(comic) comic.at_css('.story_c').search('a').map {|a| a['href']}.collect! {|x| x if %r{\Ahttps?:\/\/.+\.(?:jpe?g|png)\z}.match(x) }.compact end |
.tags(comic) ⇒ Object
50 51 52 53 54 |
# File 'lib/scraper/comics.rb', line 50 def self.(comic) if comic.at_xpath('//*[@id="dle-content"]/div/article/div[2]/p/i') comic.at_xpath('//*[@id="dle-content"]/div/article/div[2]/p/i').content.gsub('Tags: ', '').split(',').collect(&:strip) end end |