Class: Robotx
- Inherits:
-
Object
- Object
- Robotx
- Defined in:
- lib/robotx.rb
Constant Summary collapse
- TIMEOUT =
seconds
30
Instance Method Summary collapse
- #allowed ⇒ Object
- #allowed?(data) ⇒ Boolean
- #crawl_delay ⇒ Object
- #disallowed ⇒ Object
-
#initialize(uri, user_agent = '*') ⇒ Robotx
constructor
A new instance of Robotx.
- #sitemap ⇒ Object
- #user_agents ⇒ Object
Constructor Details
#initialize(uri, user_agent = '*') ⇒ Robotx
Returns a new instance of Robotx.
11 12 13 14 15 16 17 |
# File 'lib/robotx.rb', line 11 def initialize(uri, user_agent='*') @uri = URI.parse(URI.encode(uri)) raise URI::InvalidURIError.new('scheme or host missing') unless @uri.scheme and @uri.host @user_agent = user_agent.downcase @robots_data = parse_robots_txt end |
Instance Method Details
#allowed ⇒ Object
19 20 21 |
# File 'lib/robotx.rb', line 19 def allowed return disallowed.empty? ? ['/'] : @robots_data.fetch(@user_agent, {}).fetch('allow', ['/']) end |
#allowed?(data) ⇒ Boolean
27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/robotx.rb', line 27 def allowed?(data) if data.is_a?(Array) or data.is_a?(Set) return {}.tap do |hash| data.each do |uri| hash[uri] = (uri) end end end return (data) end |
#crawl_delay ⇒ Object
43 44 45 |
# File 'lib/robotx.rb', line 43 def crawl_delay return [@robots_data.fetch(@user_agent, {}).fetch('crawl-delay', 0), 0].max end |
#disallowed ⇒ Object
23 24 25 |
# File 'lib/robotx.rb', line 23 def disallowed return @robots_data.fetch(@user_agent, {}).fetch('disallow', []) end |
#sitemap ⇒ Object
39 40 41 |
# File 'lib/robotx.rb', line 39 def sitemap return @robots_data.fetch('sitemap', []) end |
#user_agents ⇒ Object
47 48 49 |
# File 'lib/robotx.rb', line 47 def user_agents return @robots_data.keys.delete_if { |agent| agent == 'sitemap' } end |