Class: Robots

Inherits:
Object
  • Object
show all
Defined in:
lib/robots.rb

Defined Under Namespace

Classes: ParsedRobots

Constant Summary collapse

DEFAULT_TIMEOUT =
3

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(user_agent) ⇒ Robots

Returns a new instance of Robots.



144
145
146
147
# File 'lib/robots.rb', line 144

def initialize(user_agent)
  @user_agent = user_agent
  @parsed = {}
end

Class Method Details

.get_robots_txt(uri, user_agent) ⇒ Object



126
127
128
129
130
131
132
133
134
# File 'lib/robots.rb', line 126

def self.get_robots_txt(uri, user_agent)
  begin
    Timeout::timeout(Robots.timeout) do
      io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil
    end 
  rescue Timeout::Error
    STDERR.puts "robots.txt request timed out"
  end
end

.timeoutObject



140
141
142
# File 'lib/robots.rb', line 140

def self.timeout
  @timeout || DEFAULT_TIMEOUT
end

.timeout=(t) ⇒ Object



136
137
138
# File 'lib/robots.rb', line 136

def self.timeout=(t)
  @timeout = t
end

Instance Method Details

#allowed?(uri) ⇒ Boolean

Returns:

  • (Boolean)


149
150
151
152
153
154
# File 'lib/robots.rb', line 149

def allowed?(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].allowed?(uri, @user_agent)
end

#other_values(uri) ⇒ Object



156
157
158
159
160
161
# File 'lib/robots.rb', line 156

def other_values(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].other_values
end