Class: Geoptima::Dataset

Inherits:
Object
  • Object
show all
Includes:
ErrorCounter
Defined in:
lib/geoptima/data.rb

Instance Attribute Summary collapse

Attributes included from ErrorCounter

#errors

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ErrorCounter

#combine_errors, #incr_error, #report_errors

Constructor Details

#initialize(name, options = {}) ⇒ Dataset

Returns a new instance of Dataset.



856
857
858
859
860
861
862
863
864
# File 'lib/geoptima/data.rb', line 856

def initialize(name,options={})
  @name = name
  @data = []
  @options = options
  @time_range = options[:time_range] || DateRange.new(Config[:min_datetime],Config[:max_datetime])
  @location_range = options[:location_range] || LocationRange.everywhere
  @geolocation_options = options[:geolocation_options] || {}
  @fields = {}
end

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



854
855
856
# File 'lib/geoptima/data.rb', line 854

def name
  @name
end

#optionsObject (readonly)

Returns the value of attribute options.



854
855
856
# File 'lib/geoptima/data.rb', line 854

def options
  @options
end

Class Method Details

.add_directory_to_datasets(datasets, directory, options = {}) ⇒ Object



1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
# File 'lib/geoptima/data.rb', line 1126

def self.add_directory_to_datasets(datasets,directory,options={})
  Dir.open(directory).each do |file|
    next if(file =~ /^\./)
    path = "#{directory}/#{file}"
    if File.directory? path
      add_directory_to_datasets(datasets,path,options)
    elsif file =~ /\.json/i
      add_file_to_datasets(datasets,path,options)
    else
      puts "Ignoring files without JSON extension: #{path}"
    end
  end
end

.add_file_to_datasets(datasets, file, options = {}) ⇒ Object



1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
# File 'lib/geoptima/data.rb', line 1111

def self.add_file_to_datasets(datasets,file,options={})
  if File.directory?(file)
    add_directory_to_datasets(datasets,file,options)
  else
    geoptima=Geoptima::Data.new(file)
    unless geoptima.valid?
      puts "INVALID: #{geoptima.start}\t#{file}\n\n"
    else
      key = options[:combine_all] ? 'all' : geoptima.id
      datasets[key] ||= Geoptima::Dataset.new(key, options)
      datasets[key] << geoptima
    end
  end
end

.make_datasets(files, options = {}) ⇒ Object



1140
1141
1142
1143
1144
1145
1146
# File 'lib/geoptima/data.rb', line 1140

def self.make_datasets(files, options={})
  datasets = {}
  files.each do |file|
    add_file_to_datasets(datasets,file,options)
  end
  datasets
end

Instance Method Details

#<<(data) ⇒ Object



866
867
868
869
# File 'lib/geoptima/data.rb', line 866

def <<(data)
  @sorted = nil
  @data << data
end

#[](key) ⇒ Object



936
937
938
# File 'lib/geoptima/data.rb', line 936

def [](key)
  @fields[key.downcase] ||= @data.map{|d| d[key]}.compact.uniq
end

#descriptionObject



1107
1108
1109
# File 'lib/geoptima/data.rb', line 1107

def description
  "Dataset:#{name}, IMEI:#{imeis.join(',')}, IMSI:#{imsis.join(',')}, Platform:#{platforms.join(',')}, Model:#{models.join(',')}, OS:#{oses.join(',')}, Files:#{file_count}, Events:#{sorted && sorted.length}"
end

#dump_timers(out = STDOUT) ⇒ Object



1018
1019
1020
1021
1022
1023
1024
# File 'lib/geoptima/data.rb', line 1018

def dump_timers(out=STDOUT)
  out.puts "Printing timer information for #{@timers.length} timers:"
  @timers.keys.sort.each do |key|
    t = @timers[key]
    out.puts "\t#{t.describe}"
  end
end

#each_trace {|trace| ... } ⇒ Object

Yields:

  • (trace)


1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
# File 'lib/geoptima/data.rb', line 1077

def each_trace
  trace = nil
  timer('each_trace').start
  sorted('gps').each do |gps|
    trace ||= Trace.new(self)
    if trace.too_far(gps)
      yield trace
      trace = Trace.new(self)
    end
    trace << gps
  end
  yield trace if(trace)
  timer('each_trace').stop
end

#events_namesObject



1009
1010
1011
# File 'lib/geoptima/data.rb', line 1009

def events_names
  @data.map{ |v| v.events_names }.flatten.uniq.sort
end

#file_countObject



871
872
873
# File 'lib/geoptima/data.rb', line 871

def file_count
  @data.length
end

#firstObject



952
953
954
955
# File 'lib/geoptima/data.rb', line 952

def first
  merge_events unless @sorted
  @sorted[nil][0]
end

#header(names = nil) ⇒ Object



978
979
980
981
982
983
# File 'lib/geoptima/data.rb', line 978

def header(names=nil)
  merge_events unless @sorted
  (names || events_names).map do |event_type|
    [(s=sorted(event_type)[0]) && s.header]
  end.flatten
end

#imeiObject



883
884
885
# File 'lib/geoptima/data.rb', line 883

def imei
  imeis[0]
end

#imeisObject



887
888
889
# File 'lib/geoptima/data.rb', line 887

def imeis
  @imeis ||= ('imei')
end

#imsiObject



875
876
877
# File 'lib/geoptima/data.rb', line 875

def imsi
  imsis[0]
end

#imsisObject



879
880
881
# File 'lib/geoptima/data.rb', line 879

def imsis
  @imsis ||= ('imsi')
end

#lastObject



957
958
959
960
# File 'lib/geoptima/data.rb', line 957

def last
  merge_events unless @sorted
  @sorted[nil][-1]
end

#lengthObject



962
963
964
# File 'lib/geoptima/data.rb', line 962

def length
  sorted.length
end

#locate_eventsObject



1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
# File 'lib/geoptima/data.rb', line 1092

def locate_events
  puts "Locating #{sorted.length} events" if(true||$debug)
  locator = Geoptima::Locator.new self.sorted, @geolocation_options
  timer("locate.all").start
  locator.locate
  timer("locate.all").stop
  if (true||$debug)
    puts "Located #{locator.located.length} / #{sorted.length} events (timed: #{timer("locate.all")}"
  end
end

#make_all_from_metadata(field_name) ⇒ Object



891
892
893
894
895
896
897
898
899
900
901
902
# File 'lib/geoptima/data.rb', line 891

def (field_name)
  @data.inject({}) do |a,d|
    a[d[field_name]] ||= 0
    a[d[field_name]] += d.count.to_i
    a
  end.to_a.sort do |a,b|
    b[1]<=>a[1]
  end.map do |x|
    #puts "Have #{field_name}: #{x.join('=')}"
    x[0]
  end.compact.uniq
end

#merge_eventsObject



1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
# File 'lib/geoptima/data.rb', line 1026

def merge_events
  @sorted ||= {}
  unless @sorted[nil]
    timer('merge_events').start
    event_hash = {}
    puts "Creating sorted maps for #{self}" if($debug)
    events_names.each do |name|
      is_gps = name == 'gps'
      puts "Preparing maps for #{name}" if($debug)
      @data.each do |data|
        puts "Processing #{(e=data.events[name]) && e.length} events for #{name}" if($debug)
        (events = data.events[name]) && events.each do |event|
          puts "\t\tTesting #{event.time} inside #{@time_range}" if($debug)
          if @time_range.include?(event.time)
            puts "\t\t\tEvent at #{event.time} is inside #{@time_range}" if($debug)
            if !is_gps || @location_range.nil? || @location_range.include?(event.location)
              key = "#{event.time_key} #{name}"
              event_hash[key] = event
            end
          end
        end
        combine_errors data
      end
      puts "After adding #{name} events, maps are #{event_hash.length} long" if($debug)
    end
    puts "Merging and sorting #{event_hash.keys.length} maps" if($debug)
    timer('merge_events.sort').start
    @sorted[nil] = event_hash.keys.sort.map{|k| event_hash[k]}
    timer('merge_events.sort').stop
    puts "Sorted #{@sorted[nil].length} events" if($debug)
    timer('merge_events.locate').start
    locate_events if(options[:locate])
    timer('merge_events.locate').stop
    timer('merge_events').stop
  end
  @sorted
end

#modelsObject



944
945
946
# File 'lib/geoptima/data.rb', line 944

def models
  self['Model']
end

#osesObject



948
949
950
# File 'lib/geoptima/data.rb', line 948

def oses
  self['OS']
end

#platformsObject



940
941
942
# File 'lib/geoptima/data.rb', line 940

def platforms
  self['Platform']
end

#recent(event, key, seconds = 60) ⇒ Object



904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
# File 'lib/geoptima/data.rb', line 904

def recent(event,key,seconds=60)
  unless event[key]
    timer("export.event.recent").start
    timer("export.event.recent.#{key}").start
    if imei = event.file.imei
      puts "Searching for recent values for '#{key}' starting at event #{event}" if($debug)
      ev,prop=key.split(/\./)
      ar=sorted
      puts "\tSearching through #{ar && ar.length} events for event type #{ev} and property #{prop}" if($debug)
      if i=ar.index(event)
        afe = while(i>0)
          fe = ar[i-=1]
          puts "\t\tTesting event[#{i}]: #{fe}" if($debug)
          break(fe) if(fe.nil? || (event.time - fe.time) * SPERDAY > seconds || (fe.name == ev && fe.file.imei == imei))
        end
        if afe && afe.name == ev
          puts "\t\tFound event[#{i}] with #{prop} => #{afe[prop]} and time gap of #{(event.time - fe.time) * SPERDAY} seconds" if($debug)
          event[key] = afe[prop]
        end
      else
        puts "Event not found in search for recent '#{key}': #{event}"
      end
    else
      puts "Not searching for correlated data without imei: #{event}"
    end
    timer("export.event.recent.#{key}").stop
    timer("export.event.recent").stop
  end
#      @recent[key] ||= ''
  event[key]
end

#sorted(event_type = nil) ⇒ Object



966
967
968
969
970
971
972
973
974
975
976
# File 'lib/geoptima/data.rb', line 966

def sorted(event_type=nil)
  merge_events unless @sorted
  unless @sorted[event_type] || event_type.nil?
    timer("sorted.#{event_type}").start
    @sorted[event_type] = @sorted[nil].reject do |event|
      event.name != event_type
    end
    timer("sorted.#{event_type}").stop
  end
  @sorted[event_type]
end

#statsObject



985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
# File 'lib/geoptima/data.rb', line 985

def stats
  merge_events unless @sorted
  unless @stats
    timer('stats').start
    @stats = {}
    event_count = 0
    sorted.each do |event|
      event_count += 1
      event.header.each do |field|
        key = "#{event.name}.#{field}"
        value = event[field]
        @stats[key] ||= {}
        @stats[key][value] ||= 0
        @stats[key][value] += 1
      end
    end
    timer('stats').stop
  end
  @stats.reject! do |k,v|
    v.length > 500 || v.length > 10 && v.length > event_count / 2
  end
  @stats
end

#timer(name) ⇒ Object



1013
1014
1015
1016
# File 'lib/geoptima/data.rb', line 1013

def timer(name)
  @timers ||= {}
  @timers[name] ||= Geoptima::Timer.new(name)
end

#to_sObject



1103
1104
1105
# File 'lib/geoptima/data.rb', line 1103

def to_s
  (imei.to_s.length < 1 || name == imei) ? name : imeis.join(',')
end

#waypoints(waypoints = nil) ⇒ Object



1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
# File 'lib/geoptima/data.rb', line 1064

def waypoints(waypoints=nil)
  @waypoints ||= {}
  event_type = waypoints=='all' ? nil : waypoints
  unless @waypoints[event_type]
    @waypoints[event_type] = Trace.new(self, :type => 'ways', :name => "waypoints-#{self.name}")
    sorted(event_type).each do |e|
      e = e.to_type
      @waypoints[event_type] << e if(e.valid_gpx?)
    end
  end
  @waypoints[event_type]
end