Class: SkinnyJeans
- Inherits:
-
Object
- Object
- SkinnyJeans
- Defined in:
- lib/skinny_jeans.rb
Overview
require ‘home_run’
Defined Under Namespace
Instance Attribute Summary collapse
-
#hash_of_dates ⇒ Object
Returns the value of attribute hash_of_dates.
-
#last_pageview_at ⇒ Object
Returns the value of attribute last_pageview_at.
Class Method Summary collapse
Instance Method Summary collapse
- #execute ⇒ Object
- #file_reader ⇒ Object
- #get_ar_class(klass) ⇒ Object
-
#initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp) ⇒ SkinnyJeans
constructor
A new instance of SkinnyJeans.
- #pageview ⇒ Object
- #prepare_db ⇒ Object
- #update ⇒ Object
Constructor Details
#initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp) ⇒ SkinnyJeans
Returns a new instance of SkinnyJeans.
17 18 19 20 21 22 23 |
# File 'lib/skinny_jeans.rb', line 17 def initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp) @logfile_path, @sqlite_db_path, @path_regexp, @date_regexp = [logfile_path, sqlite_db_path, path_regexp, date_regexp] @is_gzipped = !logfile_path.to_s[/gz/].nil? prepare_db @hash_of_dates = {} @last_datetime = nil end |
Instance Attribute Details
#hash_of_dates ⇒ Object
Returns the value of attribute hash_of_dates.
15 16 17 |
# File 'lib/skinny_jeans.rb', line 15 def hash_of_dates @hash_of_dates end |
#last_pageview_at ⇒ Object
Returns the value of attribute last_pageview_at.
15 16 17 |
# File 'lib/skinny_jeans.rb', line 15 def last_pageview_at @last_pageview_at end |
Class Method Details
.execute(logfile_path, sqlite_db_path, path_regexp, date_regexp) ⇒ Object
11 12 13 |
# File 'lib/skinny_jeans.rb', line 11 def self.execute(logfile_path, sqlite_db_path, path_regexp, date_regexp) self.new(logfile_path, sqlite_db_path, path_regexp, date_regexp).execute end |
Instance Method Details
#execute ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/skinny_jeans.rb', line 49 def execute lines_parsed = 0 last_line_parsed, last_pageview_at, lineno_of_last_line_parsed = [nil,nil,nil] # last_update = Update.order("id DESC").limit(1).first last_update = Update.find(:first, :order => "id DESC", :limit => 1) # see if the last_line_parsed parsed exists in the current log file # if it doesnt exist, we'll simply read anything with a timestamp greater than last_pageview_at if last_update last_pageview_at, last_line_parsed = last_update.last_pageview_at, last_update.last_line_parsed file_reader do |line, lineno| if line == last_line_parsed lineno_of_last_line_parsed = lineno break end end puts "last line parsed was\n#{last_line_parsed}\nat lineno #{lineno_of_last_line_parsed}" end realtime = Benchmark.realtime do date_path_pairs_array = [] lineno = -1 file_reader do |line, index| lineno += 1 next if lineno_of_last_line_parsed && lineno < lineno_of_last_line_parsed path_match = line[@path_regexp, 1] next if path_match.nil? date_match = line[@date_regexp, 1] next if date_match.nil? time_object = parse_string_as_date(date_match) next if lineno_of_last_line_parsed.nil? && !last_pageview_at.nil? && time_object < last_pageview_at insert_or_increment([time_object,path_match]) last_line_parsed = line lines_parsed += 1 end end puts "completed parsing in #{realtime}" persisted = 0 realtime = Benchmark.realtime do hash_of_dates.each do |date, hash_of_paths| hash_of_paths.keys.each do |path| pv = Pageview.find_or_create_by_date_and_path(date, path) pv.pageview_count ||= 0 pv.pageview_count += hash_of_paths[path] pv.save! persisted += 1 end end end puts "completed persistence in #{realtime}" Update.create!({:last_pageview_at => self.last_pageview_at, :lines_parsed => lines_parsed, :last_line_parsed => last_line_parsed}) puts "total records in DB: #{Pageview.count}\nlines parsed this round: #{lines_parsed}\nlines persisted this round:#{persisted}\ntotal SkinnyJeans executions since inception: #{Update.count}" return self end |
#file_reader ⇒ Object
116 117 118 119 120 121 122 123 |
# File 'lib/skinny_jeans.rb', line 116 def file_reader if @is_gzipped lineno = 0 Zlib::GzipReader.open(@logfile_path){|line|yield([line.read,lineno]);lineno+=1} else File.new(@logfile_path, "r").each_with_index{|line, lineno| yield([line,lineno])} end end |
#get_ar_class(klass) ⇒ Object
128 129 130 |
# File 'lib/skinny_jeans.rb', line 128 def get_ar_class(klass) begin;return(klass);rescue(ActiveRecord::ConnectionNotEstablished);prepare_db;end end |
#pageview ⇒ Object
125 |
# File 'lib/skinny_jeans.rb', line 125 def pageview;get_ar_class(Pageview);end |
#prepare_db ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/skinny_jeans.rb', line 25 def prepare_db # create database if necessary SQLite3::Database.new(@sqlite_db_path) ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @sqlite_db_path) # create tables if necessary if !Pageview.table_exists? ActiveRecord::Base.connection.create_table(:pageviews) do |t| t.column :date, :date t.column :path, :string t.column :pageview_count, :integer end # flow tight like skinny jeans with these compound indexes ActiveRecord::Base.connection.add_index(:pageviews, [:date, :path], :name => "date_path_index") ActiveRecord::Base.connection.add_index(:pageviews, [:date, :pageview_count], :name => "date_pageview_count_index") end if !Update.table_exists? ActiveRecord::Base.connection.create_table(:updates) do |t| t.column :last_pageview_at, :timestamp t.column :lines_parsed, :integer t.column :last_line_parsed, :string end end end |
#update ⇒ Object
126 |
# File 'lib/skinny_jeans.rb', line 126 def update;get_ar_class(Update);end |