Class: Polipus::Robotex::ParsedRobots
- Inherits:
-
Object
- Object
- Polipus::Robotex::ParsedRobots
- Defined in:
- lib/polipus/robotex.rb
Instance Method Summary collapse
- #allowed?(uri, user_agent) ⇒ Boolean
- #delay(user_agent) ⇒ Object
-
#initialize(uri, user_agent) ⇒ ParsedRobots
constructor
A new instance of ParsedRobots.
Constructor Details
#initialize(uri, user_agent) ⇒ ParsedRobots
Returns a new instance of ParsedRobots.
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/polipus/robotex.rb', line 16 def initialize(uri, user_agent) io = Robotex.get_robots_txt(uri, user_agent) if !io || io.content_type != 'text/plain' || io.status != %w(200 OK) io = StringIO.new("User-agent: *\nAllow: /\n") end @disallows = {} @allows = {} @delays = {} agent = /.*/ io.each do |line| next if line =~ /^\s*(#.*|$)/ arr = line.split(':') key = arr.shift value = arr.join(':').strip value.strip! case key.downcase when 'user-agent' agent = to_regex(value) when 'allow' unless value.empty? @allows[agent] ||= [] @allows[agent] << to_regex(value) end when 'disallow' unless value.empty? @disallows[agent] ||= [] @disallows[agent] << to_regex(value) end when 'crawl-delay' @delays[agent] = value.to_i end end @parsed = true end |
Instance Method Details
#allowed?(uri, user_agent) ⇒ Boolean
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/polipus/robotex.rb', line 52 def allowed?(uri, user_agent) return true unless @parsed allowed = true uri = URI.parse(uri.to_s) unless uri.is_a?(URI) path = uri.request_uri @allows.each do |key, value| unless allowed if user_agent =~ key value.each do |rule| path =~ rule && allowed = true end end end end @disallows.each do |key, value| if user_agent =~ key value.each do |rule| path =~ rule && allowed = false end end end allowed end |
#delay(user_agent) ⇒ Object
79 80 81 82 83 84 |
# File 'lib/polipus/robotex.rb', line 79 def delay(user_agent) @delays.each do |agent, delay| return delay if agent =~ user_agent end nil end |