Class: Robots::ParsedRobots
- Inherits:
-
Object
- Object
- Robots::ParsedRobots
- Defined in:
- lib/robots.rb
Instance Method Summary collapse
- #allowed?(uri, user_agent) ⇒ Boolean
-
#initialize(uri, user_agent) ⇒ ParsedRobots
constructor
A new instance of ParsedRobots.
- #other_values ⇒ Object
Constructor Details
#initialize(uri, user_agent) ⇒ ParsedRobots
Returns a new instance of ParsedRobots.
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/robots.rb', line 37 def initialize(uri, user_agent) @last_accessed = Time.at(1) io = Robots.get_robots_txt(uri, user_agent) if !io || io.content_type != "text/plain" || io.status != ["200", "OK"] io = StringIO.new("User-agent: *\nAllow: /\n") end @other = {} @disallows = {} @allows = {} @delays = {} # added delays to make it work agent = /.*/ io.each do |line| next if line =~ /^\s*(#.*|$)/ arr = line.split(":") key = arr.shift value = arr.join(":").strip value.strip! case key when "User-agent" agent = to_regex(value) when "Allow" @allows[agent] ||= [] @allows[agent] << to_regex(value) when "Disallow" @disallows[agent] ||= [] @disallows[agent] << to_regex(value) when "Crawl-delay" @delays[agent] = value.to_i else @other[key] ||= [] @other[key] << value end end @parsed = true end |
Instance Method Details
#allowed?(uri, user_agent) ⇒ Boolean
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/robots.rb', line 77 def allowed?(uri, user_agent) return true unless @parsed allowed = true path = uri.request_uri @disallows.each do |key, value| if user_agent =~ key value.each do |rule| if path =~ rule allowed = false end end end end @allows.each do |key, value| unless allowed if user_agent =~ key value.each do |rule| if path =~ rule allowed = true end end end end end if allowed && @delays[user_agent] sleep @delays[user_agent] - (Time.now - @last_accessed) @last_accessed = Time.now end return allowed end |
#other_values ⇒ Object
112 113 114 |
# File 'lib/robots.rb', line 112 def other_values @other end |