Class: Jobs::CleanUpCrawlerStats

Inherits:
Scheduled show all
Defined in:
app/jobs/scheduled/clean_up_crawler_stats.rb

Instance Method Summary collapse

Methods inherited from Scheduled

#perform

Methods inherited from Base

acquire_cluster_concurrency_lock!, clear_cluster_concurrency_lock!, cluster_concurrency, cluster_concurrency_redis_key, delayed_perform, #error_context, get_cluster_concurrency, #last_db_duration, #log, #perform, #perform_immediately

Instance Method Details

#execute(args) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'app/jobs/scheduled/clean_up_crawler_stats.rb', line 7

def execute(args)
  WebCrawlerRequest.where("date < ?", WebCrawlerRequest.max_record_age.ago).delete_all

  # keep count of only the top user agents
  DB.exec <<~SQL
    WITH ranked_requests AS (
      SELECT row_number() OVER (ORDER BY count DESC) as row_number, id
        FROM web_crawler_requests
       WHERE date = '#{1.day.ago.strftime("%Y-%m-%d")}'
    )
    DELETE FROM web_crawler_requests
    WHERE id IN (
      SELECT ranked_requests.id
        FROM ranked_requests
       WHERE row_number > #{WebCrawlerRequest.max_records_per_day}
    )
  SQL
end