Class: Robots
- Inherits:
-
Object
- Object
- Robots
- Defined in:
- lib/robots.rb
Overview
Robots retrieves and processes the robots.txt file from the target server
Instance Method Summary collapse
- #allowed?(url) ⇒ Boolean
- #contents ⇒ Object
-
#initialize(options) ⇒ Robots
constructor
Processes the robots.txt file.
- #user_agent_settings ⇒ Object
Constructor Details
#initialize(options) ⇒ Robots
Processes the robots.txt file
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/robots.rb', line 5 def initialize() @options = raise "options should be a hash" unless .kind_of? Hash raise ":url is required" unless @options.has_key? :url @options[:file] = "robots.txt" unless @options.has_key? :file @options[:user_agent] = "cobweb" unless @options.has_key? :user_agent uri = URI.parse(@options[:url]) content = Cobweb.new(:cache => nil, :text_mime_types => ["text/html", "application/xhtml+xml", "text/plain"]).get([uri.scheme, "://", uri.host, ":", uri.port, "/", @options[:file]].join) if content[:mime_type][0..4] == "text/" @raw_data = parse_data(content[:body]) if @options.has_key?(:user_agent) && @raw_data.has_key?(@options[:user_agent].to_s.downcase.to_sym) @params = @raw_data[@options[:user_agent].to_s.downcase.to_sym] else raise "Wildcard user-agent is not present" unless @raw_data.has_key? :* @params = @raw_data[:*] end else raise "Invalid mime type: #{content[:content_type]}" end end |
Instance Method Details
#allowed?(url) ⇒ Boolean
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/robots.rb', line 28 def allowed?(url) uri = URI.parse(url) @params[:allow].each do |pattern| return true if uri.path.match(Cobweb.escape_pattern_for_regex(pattern, @options)) end @params[:disallow].each do |pattern| return false if uri.path.match(Cobweb.escape_pattern_for_regex(pattern, @options)) end true end |
#contents ⇒ Object
43 44 45 |
# File 'lib/robots.rb', line 43 def contents @raw_data end |
#user_agent_settings ⇒ Object
39 40 41 |
# File 'lib/robots.rb', line 39 def user_agent_settings @params end |