Class: RobotRules
- Inherits:
-
Object
- Object
- RobotRules
- Defined in:
- lib/robot_rules.rb
Overview
Based on Perl’s WWW::RobotRules module, by Gisle Aas.
Instance Method Summary collapse
- #allowed?(u) ⇒ Boolean
-
#initialize(user_agent) ⇒ RobotRules
constructor
A new instance of RobotRules.
- #parse(site, robots_data) ⇒ Object
Constructor Details
#initialize(user_agent) ⇒ RobotRules
Returns a new instance of RobotRules.
12 13 14 15 |
# File 'lib/robot_rules.rb', line 12 def initialize( user_agent ) @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase @rules = Hash.new { |rules, rule| rules[rule] = Array.new } end |
Instance Method Details
#allowed?(u) ⇒ Boolean
66 67 68 69 70 71 72 73 74 |
# File 'lib/robot_rules.rb', line 66 def allowed?( u ) uri = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u) location = "#{uri.host}:#{uri.port}" path = uri.path return true unless %w{http https}.include?(uri.scheme) not @rules[location].any? { |rule| path.index(rule) == 0 } end |
#parse(site, robots_data) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/robot_rules.rb', line 17 def parse( site, robots_data ) uri = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site) location = "#{uri.host}:#{uri.port}" @rules.delete(location) rules = robots_data.split(/[\015\012]+/).map { |rule| rule.sub(/\s*#.*$/, "") } anon_rules = Array.new my_rules = Array.new current = anon_rules rules.each do |rule| case rule when /^\s*User-Agent\s*:\s*(.+?)\s*$/i break unless my_rules.empty? current = if $1 == "*" anon_rules elsif $1.downcase.index(@user_agent) my_rules else nil end when /^\s*Disallow\s*:\s*(.*?)\s*$/i next if current.nil? if $1.empty? current << nil else disallow = Addressable::URI.parse($1) next unless disallow.scheme.nil? or disallow.scheme == uri.scheme next unless disallow.port.nil? or disallow.port == uri.port next unless disallow.host.nil? or disallow.host.downcase == uri.host.downcase disallow = disallow.path disallow = "/" if disallow.empty? disallow = "/#{disallow}" unless disallow[0] == ?/ current << disallow end end end @rules[location] = if my_rules.empty? anon_rules.compact else my_rules.compact end end |