Class: BNextRobot
- Inherits:
-
Object
- Object
- BNextRobot
- Includes:
- Crawler, FeedFilter
- Defined in:
- lib/ext_class/bnext_robot.rb
Overview
BNextRobot Extract titles and links of daily/ weekly hot feeds.
Constant Summary collapse
- FEED_XPATH =
"//a[contains(@class, 'item_title block_link')]/@href"- TITLE_XPATH =
"//div[contains(@class, 'main_title')]"- TAG_XPATH =
"//a[contains(@class, 'tag_link')]"- INFO_XPATH =
"//span[contains(@class, 'info')]"- CONTENT_XPATH =
"//div[contains(@class, 'content htmlview')]"- IMGS_XPATH =
"//div[contains(@class, 'content htmlview')]/p/img/@src"
Instance Attribute Summary collapse
-
#day_rank_feeds ⇒ Object
Returns the value of attribute day_rank_feeds.
-
#week_rank_feeds ⇒ Object
Returns the value of attribute week_rank_feeds.
Attributes included from Crawler
Instance Method Summary collapse
- #_extract_feed(feed_id) ⇒ Object
- #analyze ⇒ Object
- #get_feeds(cat, page_no) ⇒ Object
- #init_rank_feeds ⇒ Object
-
#initialize ⇒ BNextRobot
constructor
A new instance of BNextRobot.
- #show_day_rank ⇒ Object
- #show_week_rank ⇒ Object
Methods included from FeedFilter
Methods included from Crawler
Constructor Details
#initialize ⇒ BNextRobot
Returns a new instance of BNextRobot.
22 23 24 25 26 |
# File 'lib/ext_class/bnext_robot.rb', line 22 def initialize load_page('http://www.bnext.com.tw/') analyze init_rank_feeds end |
Instance Attribute Details
#day_rank_feeds ⇒ Object
Returns the value of attribute day_rank_feeds.
20 21 22 |
# File 'lib/ext_class/bnext_robot.rb', line 20 def day_rank_feeds @day_rank_feeds end |
#week_rank_feeds ⇒ Object
Returns the value of attribute week_rank_feeds.
20 21 22 |
# File 'lib/ext_class/bnext_robot.rb', line 20 def week_rank_feeds @week_rank_feeds end |
Instance Method Details
#_extract_feed(feed_id) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/ext_class/bnext_robot.rb', line 85 def _extract_feed(feed_id) query_url = @domain[0..-2] + "#{feed_id}" document = Oga.parse_html(open(query_url)) title = document.xpath(TITLE_XPATH).text.force_encoding('utf-8') = document.xpath(INFO_XPATH)[0].text.gsub('撰文者:'.force_encoding('ascii-8bit'), '').force_encoding('utf-8') date = document.xpath(INFO_XPATH)[1].text.gsub('發表日期:'.force_encoding('ascii-8bit'), '').force_encoding('utf-8') content = document.xpath(CONTENT_XPATH).text.force_encoding('utf-8') = document.xpath(TAG_XPATH).map{ |i| i.text.force_encoding('utf-8') } imgs = document.xpath(IMGS_XPATH).map(&:text) Feed.new(title, , date, , query_url, content, imgs).to_hash() end |
#analyze ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/ext_class/bnext_robot.rb', line 28 def analyze = @web_data.scan(/<li>.*?<\/li>/) = .map { |x| x.match(/<a.*?<\/a>/).to_s } hrefs = .map { |x| x.match(/href=\".*?\"/).to_s[7..-2] } cat_names = .map { |x| x.match(/>.+?</).to_s[1..-2] } cats_pair = cat_names.zip(hrefs).select { |n, ref| ref.start_with? 'categories' } @cats = Hash.new(false) cats_pair.map { |n, ref| @cats[n] = @domain + ref } nil end |
#get_feeds(cat, page_no) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/ext_class/bnext_robot.rb', line 70 def get_feeds(cat, page_no) # TODO: parse all feeds @ page: page_no query_url = @domain + "categories/#{cat}/?p=#{page_no}" document = Oga.parse_html(open(query_url)) path = document.xpath(FEED_XPATH).map(&:text) # path.each do |feed_id| # feed = _extract_feed(feed_id) # puts "Title: #{feed.title}" # puts "Author: #{feed.author}" # puts "Date: #{feed.date}" # puts "Tags: " + feed.tags.join(", ") # end path.map { |feed_id| _extract_feed(feed_id) } end |
#init_rank_feeds ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/ext_class/bnext_robot.rb', line 50 def init_rank_feeds token_gen = ["//div[@id = '", "_rank']//a[@class = 'content']"] document = Oga.parse_html(@web_data) day_rank_hrefs = document.xpath(token_gen.join('day') + '/@href').map(&:text) week_rank_hrefs = document.xpath(token_gen.join('week') + '/@href').map(&:text) day_rank_titles = document.xpath(token_gen.join('day')).map(&:text) week_rank_titles = document.xpath(token_gen.join('week')).map(&:text) day_rank = day_rank_titles.zip(day_rank_hrefs).select { |title, href| href.start_with? '/' } day_rank = day_rank.map { |title, href| [title, @domain + href[1..-1]] } week_rank = week_rank_titles.zip(week_rank_hrefs).select { |title, href| href.start_with? '/' } week_rank = week_rank.map { |title, href| [title, @domain + href[1..-1]] } @day_rank_feeds = day_rank.map { |title, href| Feed.new(title, "", "", [], href, "") } @week_rank_feeds = week_rank.map { |title, href| Feed.new(title, "", "", [], href, "") } nil end |
#show_day_rank ⇒ Object
40 41 42 43 |
# File 'lib/ext_class/bnext_robot.rb', line 40 def show_day_rank @day_rank_feeds.map { |feed| puts "#{feed.title}: #{feed.link}" } nil end |
#show_week_rank ⇒ Object
45 46 47 48 |
# File 'lib/ext_class/bnext_robot.rb', line 45 def show_week_rank @week_rank_feeds.map { |feed| puts "#{feed.title}: #{feed.link}" } nil end |