Class: Stats
- Inherits:
-
Object
- Object
- Stats
- Defined in:
- lib/stats.rb
Overview
Stats class is the main statisitics hub for monitoring crawls. Either can be viewed through the Sinatra interface, or returned from the CobwebCrawler.crawl method or block
Instance Attribute Summary collapse
-
#redis ⇒ Object
readonly
Returns the value of attribute redis.
Instance Method Summary collapse
-
#end_crawl(options, cancelled = false) ⇒ Object
Removes the crawl from the running crawls and updates status.
- #get_crawled ⇒ Object
-
#get_statistics ⇒ Object
Returns the statistics hash.
-
#get_status ⇒ Object
Returns the current status of the crawl.
- #inbound_links_for(url) ⇒ Object
-
#initialize(options) ⇒ Stats
constructor
Sets up redis usage for statistics.
-
#set_totals ⇒ Object
Sets totals for the end of the crawl (Not Used).
-
#start_crawl(options) ⇒ Object
Sets up the crawl in statistics.
-
#update_statistics(content, crawl_counter = @redis.scard("crawled").to_i, queue_counter = @redis.scard("queued").to_i) ⇒ Object
Returns statistics hash.
-
#update_status(status) ⇒ Object
Sets the current status of the crawl.
Constructor Details
#initialize(options) ⇒ Stats
Sets up redis usage for statistics
8 9 10 11 12 13 14 15 16 17 |
# File 'lib/stats.rb', line 8 def initialize() [:redis_options] = {} unless .has_key? :redis_options if [:redis] @full_redis = [:redis] else @full_redis = Redis.new([:redis_options]) end @lock = Mutex.new @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{[:crawl_id]}", :redis => @full_redis) end |
Instance Attribute Details
#redis ⇒ Object (readonly)
Returns the value of attribute redis.
5 6 7 |
# File 'lib/stats.rb', line 5 def redis @redis end |
Instance Method Details
#end_crawl(options, cancelled = false) ⇒ Object
Removes the crawl from the running crawls and updates status
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/stats.rb', line 32 def end_crawl(, cancelled=false) #@full_redis.srem "cobweb_crawls", options[:crawl_id] if cancelled @redis.hset "statistics", "current_status", CobwebCrawlHelper::CANCELLED else @redis.hset "statistics", "current_status", CobwebCrawlHelper::FINISHED end @redis.hset "statistics", "crawl_finished_at", DateTime.now #@redis.del "crawl_details" end |
#get_crawled ⇒ Object
43 44 45 |
# File 'lib/stats.rb', line 43 def get_crawled @redis.smembers "crawled" end |
#get_statistics ⇒ Object
Returns the statistics hash
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/stats.rb', line 159 def get_statistics statistics = HashUtil.deep_symbolize_keys(@redis.hgetall("statistics")) if statistics[:status_counts].nil? statistics[:status_counts] else statistics[:status_counts] = JSON.parse(statistics[:status_counts]) end if statistics[:mime_counts].nil? statistics[:mime_counts] else statistics[:mime_counts] = JSON.parse(statistics[:mime_counts]) end statistics end |
#get_status ⇒ Object
Returns the current status of the crawl
181 182 183 |
# File 'lib/stats.rb', line 181 def get_status @redis.hget "statistics", "current_status" end |
#inbound_links_for(url) ⇒ Object
47 48 49 50 |
# File 'lib/stats.rb', line 47 def inbound_links_for(url) uri = UriHelper.parse(url).normalize @redis.smembers("inbound_links_#{Digest::MD5.hexdigest(uri.to_s)}") end |
#set_totals ⇒ Object
Sets totals for the end of the crawl (Not Used)
186 187 188 189 |
# File 'lib/stats.rb', line 186 def set_totals stats = get_statistics stats[:crawled] = @redis.smembers "crawled" end |
#start_crawl(options) ⇒ Object
Sets up the crawl in statistics
20 21 22 23 24 25 26 27 28 29 |
# File 'lib/stats.rb', line 20 def start_crawl() unless @full_redis.sismember "cobweb_crawls", [:crawl_id] @full_redis.sadd "cobweb_crawls", [:crawl_id] .keys.each do |key| @redis.hset "crawl_details", key, [key].to_s end end @redis.hset "statistics", "crawl_started_at", DateTime.now @redis.hset "statistics", "current_status", CobwebCrawlHelper::STARTING end |
#update_statistics(content, crawl_counter = @redis.scard("crawled").to_i, queue_counter = @redis.scard("queued").to_i) ⇒ Object
Returns statistics hash. update_statistics takes the content hash, extracts statistics from it and updates redis with the data.
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/stats.rb', line 53 def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i) @lock.synchronize { @statistics = get_statistics if @statistics.has_key? :average_response_time @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1)) else @statistics[:average_response_time] = content[:response_time].to_f end @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f if @statistics.has_key? :average_length @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1)) else @statistics[:average_length] = content[:length].to_i end @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml") @statistics[:page_count] = @statistics[:page_count].to_i + 1 @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i increment_time_stat("pages_count") else @statistics[:asset_count] = @statistics[:asset_count].to_i + 1 @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i increment_time_stat("assets_count") end total_redirects = @statistics[:total_redirects].to_i @statistics[:total_redirects] = 0 if total_redirects.nil? @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil? @statistics[:crawl_counter] = crawl_counter @statistics[:queue_counter] = queue_counter total_length = @statistics[:total_length].to_i @statistics[:total_length] = total_length + content[:length].to_i mime_counts = {} if @statistics.has_key? :mime_counts mime_counts = @statistics[:mime_counts] if mime_counts.has_key? content[:mime_type] mime_counts[content[:mime_type]] += 1 else mime_counts[content[:mime_type]] = 1 end else mime_counts = {content[:mime_type] => 1} end @statistics[:mime_counts] = mime_counts.to_json # record mime categories stats if content[:mime_type].cobweb_starts_with? "text" increment_time_stat("mime_text_count") elsif content[:mime_type].cobweb_starts_with? "application" increment_time_stat("mime_application_count") elsif content[:mime_type].cobweb_starts_with? "audio" increment_time_stat("mime_audio_count") elsif content[:mime_type].cobweb_starts_with? "image" increment_time_stat("mime_image_count") elsif content[:mime_type].cobweb_starts_with? "message" increment_time_stat("mime_message_count") elsif content[:mime_type].cobweb_starts_with? "model" increment_time_stat("mime_model_count") elsif content[:mime_type].cobweb_starts_with? "multipart" increment_time_stat("mime_multipart_count") elsif content[:mime_type].cobweb_starts_with? "video" increment_time_stat("mime_video_count") end status_counts = {} if @statistics.has_key? :status_counts status_counts = @statistics[:status_counts] status_code = content[:status_code].to_i.to_s.to_sym if status_counts.has_key? status_code status_counts[status_code] += 1 else status_counts[status_code] = 1 end else status_counts = {status_code => 1} end # record statistics by status type if content[:status_code] >= 200 && content[:status_code] < 300 increment_time_stat("status_200_count") elsif content[:status_code] >= 400 && content[:status_code] < 500 increment_time_stat("status|_400_count") elsif content[:status_code] >= 500 && content[:status_code] < 600 increment_time_stat("status|_500_count") end @statistics[:status_counts] = status_counts.to_json ## time based statistics increment_time_stat("minute_totals", "minute", 60) redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}" instance_eval redis_command } @statistics end |
#update_status(status) ⇒ Object
Sets the current status of the crawl
176 177 178 |
# File 'lib/stats.rb', line 176 def update_status(status) @redis.hset("statistics", "current_status", status) unless get_status == CobwebCrawlHelper::CANCELLED end |