Module: Scrape

Defined in:
lib/scrape.rb,
lib/scrape/version.rb

Defined Under Namespace

Classes: Application, CLI, DSL, DefaultLoader, FileNotFound, HTTPError, Match, RobotsTxt, RobotsTxtRules, Site

Constant Summary collapse

VERSION =
'0.3.0'

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.user_agentObject



28
29
30
# File 'lib/scrape.rb', line 28

def user_agent
  @user_agent || "Scrape/#{Scrape::VERSION}"
end

Class Method Details

.load_scrapefile(path) ⇒ Object



40
41
42
# File 'lib/scrape.rb', line 40

def load_scrapefile path
  Application.new path
end

.loggerObject



32
33
34
# File 'lib/scrape.rb', line 32

def logger
  @logger ||= Logger.new STDOUT
end

.logger=(log) ⇒ Object



36
37
38
# File 'lib/scrape.rb', line 36

def logger= log
  @logger = log
end

.open(url, headers = nil, &block) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/scrape.rb', line 44

def open url, headers = nil, &block
  url = Addressable::URI.parse url
  headers ||= {}

  conn = Faraday.new :url => url.to_s do |faraday|
    faraday.response :follow_redirects, :cookies => :all, :limit => 3
    faraday.adapter Faraday.default_adapter
  end

  conn.headers[:user_agent] = user_agent

  res = conn.get url.request_uri do |req|
    headers.each{|key, val| req[key] = val }
  end

  if res.success?
    res.body
  else
    raise HTTPError, res.status
  end
end