Class: Robotx

Inherits:
Object
  • Object
show all
Defined in:
lib/robotx.rb

Constant Summary collapse

TIMEOUT =

seconds

30

Instance Method Summary collapse

Constructor Details

#initialize(uri, user_agent = '*') ⇒ Robotx

Returns a new instance of Robotx.

Raises:

  • (URI::InvalidURIError)


11
12
13
14
15
16
17
# File 'lib/robotx.rb', line 11

def initialize(uri, user_agent='*')
  @uri = URI.parse(URI.encode(uri))
  raise URI::InvalidURIError.new('scheme or host missing') unless @uri.scheme and @uri.host

  @user_agent  = user_agent.downcase
  @robots_data = parse_robots_txt
end

Instance Method Details

#allowedObject



19
20
21
# File 'lib/robotx.rb', line 19

def allowed
  return disallowed.empty? ? ['/'] : @robots_data.fetch(@user_agent, {}).fetch('allow', ['/'])
end

#allowed?(data) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
30
31
32
33
34
35
36
37
# File 'lib/robotx.rb', line 27

def allowed?(data)
  if data.is_a?(Array) or data.is_a?(Set)
    return {}.tap do |hash|
      data.each do |uri|
        hash[uri] = check_permission(uri)
      end
    end
  end

  return check_permission(data)
end

#crawl_delayObject



43
44
45
# File 'lib/robotx.rb', line 43

def crawl_delay
  return [@robots_data.fetch(@user_agent, {}).fetch('crawl-delay', 0), 0].max
end

#disallowedObject



23
24
25
# File 'lib/robotx.rb', line 23

def disallowed
  return @robots_data.fetch(@user_agent, {}).fetch('disallow', [])
end

#sitemapObject



39
40
41
# File 'lib/robotx.rb', line 39

def sitemap
  return @robots_data.fetch('sitemap', [])
end

#user_agentsObject



47
48
49
# File 'lib/robotx.rb', line 47

def user_agents
  return @robots_data.keys.delete_if { |agent| agent == 'sitemap' }
end