Module: Croque::Aggregator
- Defined in:
- lib/croque/aggregator.rb
Class Method Summary collapse
- .aggregate(date) ⇒ Object
- .aggregate_per_hour(date) ⇒ Object
- .all ⇒ Object
- .generate_ranking(date) ⇒ Object
Class Method Details
.aggregate(date) ⇒ Object
4 5 6 7 8 9 10 11 |
# File 'lib/croque/aggregator.rb', line 4 def aggregate(date) # remove files remove_files(date) # aggregate per hour aggregate_per_hour(date) # generate_ranking generate_ranking(date) end |
.aggregate_per_hour(date) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/croque/aggregator.rb', line 13 def aggregate_per_hour(date) # scan each file log("aggregate logs per hour on #{date} start") log_files.each do |file| log("check skippable of #{file}") # check skippable next if skippable?(date, file) log("aggregate logs of #{file}") # all lines linage = 1000 wc_result = `wc -l #{file}` line_count = wc_result.match(/\d+/)[0].to_i k = 1 lines = [] while (k-1)*linage < line_count log("aggregate logs for #{(k-1)*linage}-#{k*linage} in #{line_count} on #{date}") fragment = `head -n #{k*1000} #{file} | tail -n #{linage}` fragment_lines = fragment.lines lines += fragment_lines.select do |line| line.match(date_matcher(date)) end k += 1 end # extract the matched line (Date) lines = lines hours.each do |hour| # craete csv file log("create csv for #{date} #{hour} hour") create_csv(date, hour, lines) end end log("aggregate logs per hour on #{date} end") end |
.all ⇒ Object
77 78 79 80 81 82 83 84 85 |
# File 'lib/croque/aggregator.rb', line 77 def all paths = Dir.glob(store_path + '*') paths = paths.select do |path| path.match(/\d{4}\-\d{2}\-\d{2}/) end paths.map do |path| Date.parse(File.basename(path)) end end |
.generate_ranking(date) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/croque/aggregator.rb', line 47 def generate_ranking(date) log("generate ranking on #{date} start") array = [] hours.each do |hour| log("generate array for ranking in #{date} #{hour} hour") # csv data path = csv_path(date, hour) # next if no file next unless File.exist?(path) csv_data = File.open(path, "r").read.gsub(/\r/, "") csv = CSV.new(csv_data) csv.to_a.each do |line| uuid = line[0] processing_time = line[1].to_f next if low?(processing_time) array << [date, hour, uuid, processing_time] end end log("sort array for ranking on #{date}") # Processing Time Desc array = array.sort{ |a, b| b[3] <=> a[3] } log("generate ranking csv on #{date}") # Generate CSV data = CSV.generate("", csv_option) do |csv| array.each{ |line| csv << line } end store_csv(ranking_path(date), data) log("generate ranking on #{date} end") end |