Class: Artaius::Plugins::Scraper

Inherits:
Object
  • Object
show all
Includes:
Cinch::Plugin
Defined in:
lib/artaius/plugins/scraper.rb

Overview

Scraps web pages, that have <title> attribute.

Instance Method Summary collapse

Instance Method Details



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/artaius/plugins/scraper.rb', line 12

def scrap_links(m)
  unless @agent
    @agent = Mechanize.new
    @agent.user_agent_alias = 'Linux Firefox'
  end

  URI.extract(m.message, %w[http https]) do |link|
    begin
      page = @agent.get(link)
      uri = URI.parse(link)
    rescue Mechanize::ResponseCodeError
      m.reply I18n.scraper.broken_link and next
    end

    title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil

    if title
      case uri.host
      when 'forum.kag2d.com'
        pattern = / \| Page \d{1,4} \| King Arthur's Gold Forum$/
        title.sub!(pattern, '')
        m.reply I18n.scraper.h.kag_forum(title)
      else
        m.reply I18n.scraper.title(title)
      end
    end
  end
end