Class: Robotex::ParsedRobots
- Inherits:
-
Object
- Object
- Robotex::ParsedRobots
- Defined in:
- lib/robotex.rb
Instance Method Summary collapse
- #allowed?(uri, user_agent) ⇒ Boolean
- #delay(user_agent) ⇒ Object
-
#initialize(uri, user_agent) ⇒ ParsedRobots
constructor
A new instance of ParsedRobots.
Constructor Details
#initialize(uri, user_agent) ⇒ ParsedRobots
Returns a new instance of ParsedRobots.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/robotex.rb', line 18 def initialize(uri, user_agent) io = Robotex.get_robots_txt(uri, user_agent) if !io || io.content_type != "text/plain" || io.status != ["200", "OK"] io = StringIO.new("User-agent: *\nAllow: /\n") end @disallows = {} @allows = {} @delays = {} agent = /.*/ io.each do |line| next if line =~ /^\s*(#.*|$)/ arr = line.split(":") key = arr.shift value = arr.join(":").strip value.strip! case key.downcase when "user-agent" agent = to_regex(value) when "allow" @allows[agent] ||= [] @allows[agent] << to_regex(value) when "disallow" @disallows[agent] ||= [] @disallows[agent] << to_regex(value) when "crawl-delay" @delays[agent] = value.to_i end end @parsed = true end |
Instance Method Details
#allowed?(uri, user_agent) ⇒ Boolean
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/robotex.rb', line 52 def allowed?(uri, user_agent) return true unless @parsed allowed = true uri = URI.parse(uri.to_s) unless uri.is_a?(URI) path = uri.request_uri @allows.each do |key, value| unless allowed if user_agent =~ key value.each do |rule| if path =~ rule allowed = true end end end end end @disallows.each do |key, value| if user_agent =~ key value.each do |rule| if path =~ rule allowed = false end end end end return allowed end |
#delay(user_agent) ⇒ Object
83 84 85 86 87 88 |
# File 'lib/robotex.rb', line 83 def delay(user_agent) @delays.each do |agent, delay| return delay if agent =~ user_agent end nil end |