Class: SkinnyJeans

Inherits:
Object
  • Object
show all
Defined in:
lib/skinny_jeans.rb

Overview

require ‘home_run’

Defined Under Namespace

Classes: Pageview, Update

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp) ⇒ SkinnyJeans

Returns a new instance of SkinnyJeans.



17
18
19
20
21
22
23
# File 'lib/skinny_jeans.rb', line 17

def initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp)
  @logfile_path, @sqlite_db_path, @path_regexp, @date_regexp = [logfile_path, sqlite_db_path, path_regexp, date_regexp]
  @is_gzipped = !logfile_path.to_s[/gz/].nil?
  prepare_db
  @hash_of_dates = {}
  @last_datetime = nil
end

Instance Attribute Details

#hash_of_datesObject

Returns the value of attribute hash_of_dates.



15
16
17
# File 'lib/skinny_jeans.rb', line 15

def hash_of_dates
  @hash_of_dates
end

#last_pageview_atObject

Returns the value of attribute last_pageview_at.



15
16
17
# File 'lib/skinny_jeans.rb', line 15

def last_pageview_at
  @last_pageview_at
end

Class Method Details

.execute(logfile_path, sqlite_db_path, path_regexp, date_regexp) ⇒ Object



11
12
13
# File 'lib/skinny_jeans.rb', line 11

def self.execute(logfile_path, sqlite_db_path, path_regexp, date_regexp)
  self.new(logfile_path, sqlite_db_path, path_regexp, date_regexp).execute
end

Instance Method Details

#executeObject



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/skinny_jeans.rb', line 49

def execute

  lines_parsed = 0
  last_line_parsed, last_pageview_at, lineno_of_last_line_parsed = [nil,nil,nil]
  # last_update = Update.order("id DESC").limit(1).first
  last_update = Update.find(:first, :order => "id DESC", :limit => 1)

  # see if the last_line_parsed parsed exists in the current log file
  # if it doesnt exist, we'll simply read anything with a timestamp greater than last_pageview_at
  if last_update
    last_pageview_at, last_line_parsed = last_update.last_pageview_at, last_update.last_line_parsed
    file_reader do |line, lineno|
      if line == last_line_parsed
        lineno_of_last_line_parsed = lineno
        break
      end
    end
    puts "last line parsed was\n#{last_line_parsed}\nat lineno #{lineno_of_last_line_parsed}"
  end

  realtime = Benchmark.realtime do
    date_path_pairs_array = []
    lineno = -1

    file_reader do |line, index|
      lineno += 1
      next if lineno_of_last_line_parsed && lineno < lineno_of_last_line_parsed

      path_match = line[@path_regexp, 1]
      next if path_match.nil?
      date_match = line[@date_regexp, 1]
      next if date_match.nil?
      time_object = parse_string_as_date(date_match)

      next if lineno_of_last_line_parsed.nil? && !last_pageview_at.nil? && time_object < last_pageview_at

      insert_or_increment([time_object,path_match])
      last_line_parsed = line
      lines_parsed += 1
    end
  end

  puts "completed parsing in #{realtime}"

  persisted = 0
  realtime = Benchmark.realtime do
    hash_of_dates.each do |date, hash_of_paths|
      hash_of_paths.keys.each do |path|
        pv = Pageview.find_or_create_by_date_and_path(date, path)
        pv.pageview_count ||= 0
        pv.pageview_count += hash_of_paths[path]
        pv.save!
        persisted += 1
      end
    end
  end
  
  puts "completed persistence in #{realtime}"

  Update.create!({:last_pageview_at => self.last_pageview_at, :lines_parsed => lines_parsed, :last_line_parsed => last_line_parsed})

  puts "total records in DB: #{Pageview.count}\nlines parsed this round: #{lines_parsed}\nlines persisted this round:#{persisted}\ntotal SkinnyJeans executions since inception: #{Update.count}"

  return self

end

#file_readerObject



116
117
118
119
120
121
122
123
# File 'lib/skinny_jeans.rb', line 116

def file_reader
  if @is_gzipped
    lineno = 0
    Zlib::GzipReader.open(@logfile_path){|line|yield([line.read,lineno]);lineno+=1}
  else
    File.new(@logfile_path, "r").each_with_index{|line, lineno| yield([line,lineno])}
  end
end

#get_ar_class(klass) ⇒ Object



128
129
130
# File 'lib/skinny_jeans.rb', line 128

def get_ar_class(klass)
  begin;return(klass);rescue(ActiveRecord::ConnectionNotEstablished);prepare_db;end
end

#pageviewObject



125
# File 'lib/skinny_jeans.rb', line 125

def pageview;get_ar_class(Pageview);end

#prepare_dbObject



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/skinny_jeans.rb', line 25

def prepare_db
  # create database if necessary
  SQLite3::Database.new(@sqlite_db_path)
  ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @sqlite_db_path)
  # create tables if necessary
  if !Pageview.table_exists?
    ActiveRecord::Base.connection.create_table(:pageviews) do |t|
      t.column :date, :date
      t.column :path, :string
      t.column :pageview_count, :integer
    end
    # flow tight like skinny jeans with these compound indexes
    ActiveRecord::Base.connection.add_index(:pageviews, [:date, :path], :name => "date_path_index")
    ActiveRecord::Base.connection.add_index(:pageviews, [:date, :pageview_count], :name => "date_pageview_count_index")
  end
  if !Update.table_exists?
    ActiveRecord::Base.connection.create_table(:updates) do |t|
      t.column :last_pageview_at, :timestamp
      t.column :lines_parsed, :integer
      t.column :last_line_parsed, :string
    end
  end
end

#updateObject



126
# File 'lib/skinny_jeans.rb', line 126

def update;get_ar_class(Update);end