Class: BBFS::ContentData::ContentData

Inherits:
Object
  • Object
show all
Defined in:
lib/content_data/content_data.rb

Overview

Unfortunately this class is used as mutable for now. So need to be carefull. TODO(kolman): Make this class imutable, but add indexing structure to it. TODO(kolman): Add wrapper to the class to enable dynamic content data (with easy access indexes)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(copy = nil) ⇒ ContentData

Returns a new instance of ContentData.

Parameters:

  • content_data_serializer_str (String)


123
124
125
126
127
128
129
130
131
132
# File 'lib/content_data/content_data.rb', line 123

def initialize(copy = nil)
  if copy.nil?
    @contents = Hash.new # key is a checksum , value is a refernce to the Content object
    @instances = Hash.new  # key is an instance global path , value is a reference to the ContentInstance object
  else
    # Regenerate only the hashes, the values are immutable.
    @contents = copy.contents.clone
    @instances = copy.instances.clone
  end
end

Instance Attribute Details

#contentsObject (readonly)

Returns the value of attribute contents.



120
121
122
# File 'lib/content_data/content_data.rb', line 120

def contents
  @contents
end

#instancesObject (readonly)

Returns the value of attribute instances.



120
121
122
# File 'lib/content_data/content_data.rb', line 120

def instances
  @instances
end

Class Method Details

.format_time(time) ⇒ Object



256
257
258
259
260
# File 'lib/content_data/content_data.rb', line 256

def self.format_time(time)
  return nil unless time.instance_of?Time
  str = time.to_i.to_s
  return str
end

.intersect(a, b) ⇒ Object

returns the common content in both a and b



332
333
334
335
# File 'lib/content_data/content_data.rb', line 332

def self.intersect(a, b)
  b_minus_a = ContentData.remove(a, b)
  return ContentData.remove(b_minus_a, b)
end

.merge(a, b) ⇒ Object

merges content data a and content data b to a new content data and returns it.



263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/content_data/content_data.rb', line 263

def self.merge(a, b)
  return b unless not a.nil?
  return a unless not b.nil?

  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new
  ret.merge(a)
  ret.merge(b)

  return ret
end

.parse_time(time_str) ⇒ Object



250
251
252
253
254
# File 'lib/content_data/content_data.rb', line 250

def self.parse_time time_str
  return nil unless time_str.instance_of? String
  seconds_from_epoch = Integer time_str  # Not using to_i here because it does not check string is integer.
  time = Time.at seconds_from_epoch
end

.remove(a, b) ⇒ Object

removed content data a from content data b and returns the new content data.



278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/content_data/content_data.rb', line 278

def self.remove(a, b)
  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new

  b.contents.values.each { |content|
    #print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
    ret.add_content(content) unless a.content_exists(content.checksum)
  }

  #Log.info "kaka"

  b.instances.values.each { |instance|
    #print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
    ret.add_instance(instance) unless a.content_exists(instance.checksum)
  }

  #print "kuku %s" % ret.contents.size.to_s
  #print "kuku %s" % ret.instances.size.to_s
  return ret
end

.remove_directory(cd, global_dir_path) ⇒ Object



315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/content_data/content_data.rb', line 315

def self.remove_directory(cd, global_dir_path)
  return nil unless cd.instance_of?ContentData

  ret = ContentData.new
  cd.instances.values.each do |instance|
    Log.debug3("global path to check: #{global_dir_path}")
    Log.debug3("instance global path: #{instance.global_path}")
    if instance.global_path.scan(global_dir_path).size == 0
      Log.debug3("Adding instance.")
      ret.add_content(cd.contents[instance.checksum])
      ret.add_instance(instance)
    end
  end
  return ret
end

.remove_instances(a, b) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/content_data/content_data.rb', line 301

def self.remove_instances(a, b)
  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new
  b.instances.values.each do |instance|
    if !a.instances.key?(instance.global_path)
      ret.add_content(b.contents[instance.checksum])
      ret.add_instance(instance)
    end
  end
  return ret
end

.unify_time(db) ⇒ Object

unify time for all entries with same content to minimal time



338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/content_data/content_data.rb', line 338

def self.unify_time(db)
  mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
  checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
  checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)

  # populate tables with given ContentData entries
  db.instances.each_value do |instance|
    checksum = instance.checksum
    time = instance.modification_time

    unless (checksum2instances.has_key? checksum)
      checksum2instances[checksum] = []
    end
    checksum2instances[checksum] << instance

    if (not checksum2time.has_key? checksum)
      checksum2time[checksum] = time
    elsif ((checksum2time[checksum] <=> time) > 0)
      checksum2time[checksum] = time
    end
  end

  # update min time table with time information from contents
  db.contents.each do |checksum, content|
    time = content.first_appearance_time
    if (not checksum2time.has_key? checksum)
      checksum2time[checksum] = time
    elsif ((checksum2time[checksum] <=> time) > 0)
      checksum2time[checksum] = time
    end
  end

  # add content entries to the output table. in need of case update time field with found min time
  db.contents.each do |checksum, content|
    time = checksum2time[checksum]
    if ((content.first_appearance_time <=> time) == 0)
      mod_db.add_content(content)
    else
      mod_db.add_content(Content.new(checksum, content.size, time))
    end
  end

  # add instance entries to the output table. in need of case update time field with found min time
  checksum2instances.each do |checksum, instances|
    time = checksum2time[checksum]
    instances.each do |instance|
      if ((instance.modification_time <=> time) == 0)
        mod_db.add_instance(instance)
      else # must be bigger then found min time
        mod_instance = ContentInstance.new(instance.checksum, instance.size,
                                           instance.server_name, instance.device,
                                           instance.full_path, time)
        mod_db.add_instance(mod_instance)
      end
    end
  end
  mod_db
end

Instance Method Details

#==(other) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/content_data/content_data.rb', line 174

def ==(other)
  return false if other == nil
  return false unless @contents.size == other.contents.size
  return false unless @instances.size == other.instances.size

  @contents.keys.each { |key|
    if (@contents[key] != other.contents[key])
      Log.info @contents[key].first_appearance_time.to_i
      Log.info other.contents[key].first_appearance_time.to_i
      return false
    end
  }
  @instances.keys.each { |key|
    if (@instances[key] != other.instances[key])
      return false
    end
  }
  return true
end

#add_content(content) ⇒ Object



134
135
136
# File 'lib/content_data/content_data.rb', line 134

def add_content(content)
  @contents[content.checksum] = content
end

#add_instance(instance) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/content_data/content_data.rb', line 138

def add_instance(instance)
  if (not @contents.key?(instance.checksum))
    Log.warning sprintf("Adding instance while it's" +
               " checksum %s does not exists.\n", instance.checksum)
    Log.warning sprintf("%s\n", instance.to_s)
    return false
  elsif (@contents[instance.checksum].size != instance.size)
    Log.warning 'File size different from content size while same checksum'
    Log.warning instance.to_s
    return false
  end

  key = instance.global_path

  #override file if needed
  @instances[key] = instance
end

#content_exists(checksum) ⇒ Object



160
161
162
# File 'lib/content_data/content_data.rb', line 160

def content_exists(checksum)
  @contents.key? checksum
end

#empty?Boolean

Returns:

  • (Boolean)


156
157
158
# File 'lib/content_data/content_data.rb', line 156

def empty?
  @contents.empty?
end

#from_file(filename) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/content_data/content_data.rb', line 213

def from_file(filename)
  lines = IO.readlines(filename)
  i = 0
  number_of_contents = lines[i].to_i
  i += 1
  number_of_contents.times {
    parameters = lines[i].split(",")
    add_content(Content.new(parameters[0],
                            parameters[1].to_i,
                            ContentData.parse_time(parameters[2])))
    i += 1
  }

  number_of_instances = lines[i].to_i
  i += 1
  number_of_instances.times {
    parameters = lines[i].split(',')
    # bugfix: if file name consist a comma then parsing based on comma separating fails
    if (parameters.size > 6)
      (5..parameters.size-2).each do |i|
        parameters[4] = [parameters[4], parameters[i]].join(",")
      end
      (5..parameters.size-2).each do |i|
        parameters.delete_at(5)
      end
    end

    add_instance(ContentInstance.new(parameters[0],
                                     parameters[1].to_i,
                                     parameters[2],
                                     parameters[3],
                                     parameters[4],
                                     ContentData.parse_time(parameters[5])))
    i += 1
  }
end

#merge(content_data) ⇒ Object

TODO(kolman): The semantics of thir merge is merge! change in all file.



165
166
167
168
169
170
171
172
# File 'lib/content_data/content_data.rb', line 165

def merge(content_data)
  content_data.contents.values.each { |content|
    add_content(content)
  }
  content_data.instances.values.each { |instance|
    add_instance(instance)
  }
end

#to_file(filename) ⇒ Object



207
208
209
210
211
# File 'lib/content_data/content_data.rb', line 207

def to_file(filename)
  content_data_dir = File.dirname(filename)
  FileUtils.makedirs(content_data_dir) unless File.exists?(content_data_dir)
  File.open(filename, 'w') {|f| f.write(to_s) }
end

#to_sObject



194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/content_data/content_data.rb', line 194

def to_s
  ret = ""
  ret << @contents.length.to_s << "\n"
  @contents.each_value { |content|
    ret << content.to_s << "\n"
  }
  ret << @instances.length.to_s << "\n"
  @instances.each_value { |instance|
    ret << instance.to_s << "\n"
  }
  return ret
end