Class: Gonzui::AbstractDBM

Inherits:
Object
  • Object
show all
Includes:
Util
Defined in:
lib/gonzui/dbm.rb

Direct Known Subclasses

BDBDBM

Constant Summary collapse

DBTable =
[ 
  [:fmtid_fmt,         ap::ID,     ap::String,   false],
  [:fmtid_fabbr,       ap::ID,     ap::String,   false],
  [:fabbr_fmtid,       ap::String, ap::ID,       false],
  [:lcsid_lcs,         ap::ID,     ap::String,   false],
  [:lcsid_labbr,       ap::ID,     ap::String,   false],
  [:labbr_lcsid,       ap::String, ap::ID,       false],
  [:seq,               ap::String, ap::Fixnum,   false],
  [:stat,              ap::String, ap::Fixnum,   false],
  [:pkg_pkgid,         ap::String, ap::ID,       false],
  [:pkgid_pkg,         ap::ID,     ap::String,   false],
  [:pkgid_pathids,     ap::ID,     ap::ID,       true],
  [:pkgid_fmtids,      ap::ID,     ap::ID,       true],
  [:pkgid_lcsids,      ap::ID,     ap::ID,       true],
  [:pkgid_options,     ap::ID,     ap::String,   true],
  [:pkgid_src,         ap::ID,     ap::String,   false],
  [:path_pathid,       ap::String, ap::ID,       false],
  [:pathid_digest,     ap::ID,     ap::GZString, false],
  [:pathid_info,       ap::ID,     ap::String,   false],
  [:pathid_content,    ap::ID,     ap::GZString, false],
  [:pathid_bols,       ap::ID,     ap::GZString, false],
  [:pathid_hash,       ap::ID,     ap::String,   false],
  [:pathid_path,       ap::ID,     ap::String,   false],
  [:pathid_pkgid,      ap::ID,     ap::ID,       false],
  [:pathid_wordids,    ap::ID,     ap::GZString, false],
  [:type_typeid,       ap::Symbol, ap::ID,       false],
  [:typeid_type,       ap::ID,     ap::Symbol,   false],
  [:word_wordid,       ap::String, ap::ID,       false],
  [:wordid_pkgids,     ap::ID,     ap::ID,       true],
  [:wordid_word,       ap::ID,     ap::String,   false],
  [:pkgwordid_pathids, ap::String, ap::ID,       true],
  [:pathwordid_info,   ap::String, ap::String,   false],
  [:version,           ap::String, ap::String,   false],
]
IDTable =
[
  # id_name,    # of id     text -> id    id -> text    id -> alt
  [:type_id,    :ntypes,    :type_typeid, :typeid_type, nil],
  [:word_id,    :nwords,    :word_wordid, :wordid_word, nil],
  [:path_id,    :ncontents, :path_pathid, :pathid_path, nil],
  [:package_id, :npackages, :pkg_pkgid,   :pkgid_pkg,   nil],
  [:format_id,  :nformats,  :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt],
  [:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs],
]

Instance Method Summary collapse

Methods included from Util

assert, assert_equal, assert_equal_all, assert_non_nil, assert_not_reached, benchmark, command_exist?, commify, eprintf, format_bytes, program_name, protect_from_signals, require_command, set_verbosity, shell_escape, unix?, vprintf, windows?, wprintf

Constructor Details

#initialize(config, read_only = false) ⇒ AbstractDBM

Returns a new instance of AbstractDBM.



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/gonzui/dbm.rb', line 162

def initialize(config, read_only = false)
  raise "#{config.db_directory}: No such directory" unless 
    File.directory?(config.db_directory)
  @config = config

  validate_db_version
  @db_opened = {}
  DBTable.each {|db_name, key_type, value_type, dupsort|
    open_db(db_name, key_type, value_type, dupsort)
  }
  put_db_version unless read_only
  init_id_counters

  @opened = true
  @current_package_id = nil
  @wordid_pathids_cache = {}
end

Instance Method Details

#binary_content?(path_id) ⇒ Boolean

Returns:

  • (Boolean)


309
310
311
312
# File 'lib/gonzui/dbm.rb', line 309

def binary_content?(path_id)
  format_id = get_format_id_from_path_id(path_id)
  get_format_abbrev(format_id) == "binary"
end

#closeObject

Raises:



314
315
316
317
318
319
320
321
# File 'lib/gonzui/dbm.rb', line 314

def close
  flush_cache
  raise DBMError.new("dbm is already closed") unless @opened
  @db_opened.each {|name, db| 
    db.close
  }
  @opened = false
end

#consistent?Boolean

Returns:

  • (Boolean)


323
324
325
326
327
# File 'lib/gonzui/dbm.rb', line 323

def consistent?
  verify_stat_integrity
  verify_seq_integrity
  return true
end

#decrease_counter(key, step = 1) ⇒ Object

Raises:



219
220
221
222
223
# File 'lib/gonzui/dbm.rb', line 219

def decrease_counter(key, step = 1)
  value = get_counter(key) - step
  raise DBMError.new("counter #{key} becomes minus") if value < 0
  @stat[key.to_s] = value
end

#each_db_nameObject



335
336
337
# File 'lib/gonzui/dbm.rb', line 335

def each_db_name
  @db_opened.each_key {|db_name| yield(db_name.to_s) }
end

#each_format(&block) ⇒ Object



339
340
341
# File 'lib/gonzui/dbm.rb', line 339

def each_format(&block)
  each_property(:fmtid_fmt, :get_format_abbrev, &block)
end

#each_license(&block) ⇒ Object



343
344
345
# File 'lib/gonzui/dbm.rb', line 343

def each_license(&block)
  each_property(:lcsid_lcs, :get_license_abbrev, &block)
end

#each_package_nameObject



347
348
349
# File 'lib/gonzui/dbm.rb', line 347

def each_package_name
  @pkgid_pkg.each_value {|value| yield(value) }
end

#each_word(&block) ⇒ Object



351
352
353
# File 'lib/gonzui/dbm.rb', line 351

def each_word(&block)
  @word_wordid.each_key {|word| yield(word) }
end

#find_all(pattern) ⇒ Object



355
356
357
358
359
# File 'lib/gonzui/dbm.rb', line 355

def find_all(pattern)
  word_id = @word_wordid[pattern]
  results = collect_all_results(word_id)
  return results
end

#find_all_by_prefix(pattern) ⇒ Object

Raises:

  • (NotImplementedError)


361
362
363
# File 'lib/gonzui/dbm.rb', line 361

def find_all_by_prefix(pattern)
  raise NotImplementedError.new("should be implemented in a sub class")
end

#find_all_by_regexp(pattern) ⇒ Object



365
366
367
368
369
370
371
372
373
374
# File 'lib/gonzui/dbm.rb', line 365

def find_all_by_regexp(pattern)
  regexp = Regexp.new(pattern)
  results = []
  @word_wordid.each {|word, word_id|
    if regexp.match(word)
      results.concat(collect_all_results(word_id))
    end
  }
  return results
end

#find_word_info(path_id, word_id) ⇒ Object



376
377
378
379
380
# File 'lib/gonzui/dbm.rb', line 376

def find_word_info(path_id, word_id)
  get_all_word_info(path_id, word_id).each {|info|
    yield(info)
  }
end

#flush_cacheObject



382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/gonzui/dbm.rb', line 382

def flush_cache
  wordids = @wordid_pathids_cache.keys.sort!
  wordids.each {|word_id|
    package_word_id = AutoPack.pack_id2(@current_package_id, word_id)
    @wordid_pathids_cache[word_id].each {|path_id|
      @pkgwordid_pathids[package_word_id] = path_id
    }
  }
  wordids.each {|word_id|
    @wordid_pkgids[word_id] = @current_package_id
  }
  @wordid_pathids_cache.clear
  @id_counters.each {|counter| counter.flush}
end

#get_all_word_info(path_id, word_id) ⇒ Object



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'lib/gonzui/dbm.rb', line 397

def get_all_word_info(path_id, word_id)
  path_word_id = AutoPack.pack_id2(path_id, word_id)
  dump = @pathwordid_info[path_word_id]
  return [] if dump.nil?
  bols = get_bols(path_id)
  bol = bols.shift
  assert_equal(0, bol)

  lineno = 0
  DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id|
    while bol and bol <= byteno
      lineno += 1
      bol = bols.shift
    end
    type = get_type(type_id)
    WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno)
  }
end

#get_content(path_id) ⇒ Object



424
425
426
# File 'lib/gonzui/dbm.rb', line 424

def get_content(path_id)
  @pathid_content[path_id]
end

#get_content_hash(path_id) ⇒ Object



416
417
418
# File 'lib/gonzui/dbm.rb', line 416

def get_content_hash(path_id)
  @pathid_hash[path_id]
end

#get_content_info(path_id) ⇒ Object



428
429
430
431
432
# File 'lib/gonzui/dbm.rb', line 428

def (path_id)
  dump = @pathid_info[path_id]
  assert_non_nil(dump)
  return ContentInfo.load(dump)
end

#get_counter(key) ⇒ Object



420
421
422
# File 'lib/gonzui/dbm.rb', line 420

def get_counter(key)
  @stat[key.to_s] or 0
end

#get_digest(path_id) ⇒ Object



434
435
436
437
438
439
440
441
# File 'lib/gonzui/dbm.rb', line 434

def get_digest(path_id)
  dump = @pathid_digest[path_id]
  return [] if dump.nil?
  DeltaDumper.undump_tuples(DigestInfo, dump).map {|data|
    data.push(get_type(data.last))
    DigestInfo.new(*data)
  }
end

#get_format_abbrev(format_id) ⇒ Object



443
444
445
# File 'lib/gonzui/dbm.rb', line 443

def get_format_abbrev(format_id)
  @fmtid_fabbr[format_id]
end

#get_format_id(format_abbrev) ⇒ Object



447
448
449
# File 'lib/gonzui/dbm.rb', line 447

def get_format_id(format_abbrev)
  @fabbr_fmtid[format_abbrev]
end

#get_format_id_from_path_id(path_id) ⇒ Object



451
452
453
# File 'lib/gonzui/dbm.rb', line 451

def get_format_id_from_path_id(path_id)
  (path_id).format_id
end

#get_format_ids_from_package_id(package_id) ⇒ Object



455
456
457
# File 'lib/gonzui/dbm.rb', line 455

def get_format_ids_from_package_id(package_id)
  @pkgid_fmtids.duplicates(package_id)
end

#get_format_name(format_id) ⇒ Object



459
460
461
# File 'lib/gonzui/dbm.rb', line 459

def get_format_name(format_id)
  @fmtid_fmt[format_id]
end

#get_license_abbrev(license_id) ⇒ Object



463
464
465
# File 'lib/gonzui/dbm.rb', line 463

def get_license_abbrev(license_id)
  @lcsid_labbr[license_id]
end

#get_license_id(license_abbrev) ⇒ Object



467
468
469
# File 'lib/gonzui/dbm.rb', line 467

def get_license_id(license_abbrev)
  @labbr_lcsid[license_abbrev]
end

#get_license_id_from_path_id(path_id) ⇒ Object



471
472
473
# File 'lib/gonzui/dbm.rb', line 471

def get_license_id_from_path_id(path_id)
  (path_id).license_id
end

#get_license_ids_from_package_id(package_id) ⇒ Object



475
476
477
# File 'lib/gonzui/dbm.rb', line 475

def get_license_ids_from_package_id(package_id)
  @pkgid_lcsids.duplicates(package_id)
end

#get_license_name(license_id) ⇒ Object



479
480
481
# File 'lib/gonzui/dbm.rb', line 479

def get_license_name(license_id)
  @lcsid_lcs[license_id]
end

#get_ncontentsObject



483
484
485
# File 'lib/gonzui/dbm.rb', line 483

def get_ncontents
  get_counter(:ncontents)
end

#get_ncontents_by_format_id(format_id) ⇒ Object



487
488
489
490
491
# File 'lib/gonzui/dbm.rb', line 487

def get_ncontents_by_format_id(format_id)
  format_abbrev = get_format_abbrev(format_id)
  key = make_ncontents_by_format_key(format_abbrev)
  return get_counter(key)
end

#get_ncontents_by_license_id(license_id) ⇒ Object



493
494
495
496
497
# File 'lib/gonzui/dbm.rb', line 493

def get_ncontents_by_license_id(license_id)
  license_abbrev = get_license_abbrev(license_id)
  key = make_ncontents_by_license_key(license_abbrev)
  return get_counter(key)
end

#get_ncontents_in_package(package_name) ⇒ Object



503
504
505
506
# File 'lib/gonzui/dbm.rb', line 503

def get_ncontents_in_package(package_name)
  package_id = get_package_id(package_name)
  @pkgid_pathids.duplicates(package_id).length
end

#get_ncontents_indexedObject



499
500
501
# File 'lib/gonzui/dbm.rb', line 499

def get_ncontents_indexed
  get_counter(:ncontents_indexed)
end

#get_nformatsObject



508
509
510
# File 'lib/gonzui/dbm.rb', line 508

def get_nformats
  get_counter(:nformats)
end

#get_nlines_indexedObject



512
513
514
# File 'lib/gonzui/dbm.rb', line 512

def get_nlines_indexed
  get_counter(:nlines_indexed)
end

#get_npackagesObject



516
517
518
# File 'lib/gonzui/dbm.rb', line 516

def get_npackages
  get_counter(:npackages)
end

#get_nwordsObject



520
521
522
# File 'lib/gonzui/dbm.rb', line 520

def get_nwords
  get_counter(:nwords)
end

#get_package_id(package_name) ⇒ Object



524
525
526
# File 'lib/gonzui/dbm.rb', line 524

def get_package_id(package_name)
  @pkg_pkgid[package_name]
end

#get_package_id_from_path_id(path_id) ⇒ Object



528
529
530
# File 'lib/gonzui/dbm.rb', line 528

def get_package_id_from_path_id(path_id)
  @pathid_pkgid[path_id]
end

#get_package_ids(word_id) ⇒ Object



532
533
534
# File 'lib/gonzui/dbm.rb', line 532

def get_package_ids(word_id)
  @wordid_pkgids.duplicates(word_id)
end

#get_package_name(package_id) ⇒ Object



536
537
538
# File 'lib/gonzui/dbm.rb', line 536

def get_package_name(package_id)
  @pkgid_pkg[package_id]
end

#get_package_namesObject



540
541
542
# File 'lib/gonzui/dbm.rb', line 540

def get_package_names
  @pkgid_pkg.values
end

#get_package_options(package_id) ⇒ Object



544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
# File 'lib/gonzui/dbm.rb', line 544

def get_package_options(package_id)
  options = {}
  values = @pkgid_options.duplicates(package_id)
  values.each {|value|
    k, v = value.split(":", 2)
    k = k.intern
    case k
    when :exclude_pattern
      v = Regexp.new(v)
    when :noindex_formats
      v = v.split(",")
    else
      raise DBMError.new("#{k}: unknown option")
    end
    options[k] = v
  }
  assert(options[:exclude_pattern])
  assert(options[:noindex_formats])
  return options
end

#get_path(path_id) ⇒ Object



565
566
567
# File 'lib/gonzui/dbm.rb', line 565

def get_path(path_id)
  @pathid_path[path_id]
end

#get_path_id(path) ⇒ Object



569
570
571
# File 'lib/gonzui/dbm.rb', line 569

def get_path_id(path)
  @path_pathid[path]
end

#get_path_ids(package_id) ⇒ Object



573
574
575
# File 'lib/gonzui/dbm.rb', line 573

def get_path_ids(package_id)
  @pkgid_pathids.duplicates(package_id)
end

#get_path_ids_from_package_and_word_id(package_id, word_id) ⇒ Object



577
578
579
580
# File 'lib/gonzui/dbm.rb', line 577

def get_path_ids_from_package_and_word_id(package_id, word_id)
  package_word_id = AutoPack.pack_id2(package_id, word_id)
  return @pkgwordid_pathids.duplicates(package_word_id)
end

#get_source_uri(package_id) ⇒ Object



582
583
584
# File 'lib/gonzui/dbm.rb', line 582

def get_source_uri(package_id)
  @pkgid_src[package_id]
end

#get_type(type_id) ⇒ Object



586
587
588
# File 'lib/gonzui/dbm.rb', line 586

def get_type(type_id)
  @typeid_type[type_id]
end

#get_type_id(type) ⇒ Object



590
591
592
# File 'lib/gonzui/dbm.rb', line 590

def get_type_id(type)
  @type_id_counter.get_id(type)
end

#get_word(word_id) ⇒ Object



594
595
596
# File 'lib/gonzui/dbm.rb', line 594

def get_word(word_id)
  @wordid_word[word_id]
end

#get_word_id(word) ⇒ Object



598
599
600
# File 'lib/gonzui/dbm.rb', line 598

def get_word_id(word)
  @word_wordid[word]
end

#get_word_ids(path_id) ⇒ Object



602
603
604
# File 'lib/gonzui/dbm.rb', line 602

def get_word_ids(path_id)
  DeltaDumper.undump_ids(@pathid_wordids[path_id])
end

#has_format_abbrev?(format_abbrev) ⇒ Boolean

Returns:

  • (Boolean)


610
611
612
# File 'lib/gonzui/dbm.rb', line 610

def has_format_abbrev?(format_abbrev)
  @fabbr_fmtid.has_key?(format_abbrev)
end

#has_format_id?(format_id) ⇒ Boolean

Returns:

  • (Boolean)


606
607
608
# File 'lib/gonzui/dbm.rb', line 606

def has_format_id?(format_id)
  @fmtid_fmt.has_key?(format_id)
end

#has_license_abbrev?(license_abbrev) ⇒ Boolean

Returns:

  • (Boolean)


618
619
620
# File 'lib/gonzui/dbm.rb', line 618

def has_license_abbrev?(license_abbrev)
  @labbr_lcsid.has_key?(license_abbrev)
end

#has_license_id?(license_id) ⇒ Boolean

Returns:

  • (Boolean)


614
615
616
# File 'lib/gonzui/dbm.rb', line 614

def has_license_id?(license_id)
  @lcsid_lcs.has_key?(license_id)
end

#has_package?(package_name) ⇒ Boolean

Returns:

  • (Boolean)


622
623
624
# File 'lib/gonzui/dbm.rb', line 622

def has_package?(package_name)
  @pkg_pkgid.include?(package_name)
end

#has_path?(path) ⇒ Boolean

Returns:

  • (Boolean)


626
627
628
# File 'lib/gonzui/dbm.rb', line 626

def has_path?(path)
  @path_pathid.include?(path)
end

#has_type?(type) ⇒ Boolean

Returns:

  • (Boolean)


630
631
632
# File 'lib/gonzui/dbm.rb', line 630

def has_type?(type)
  @type_typeid.include?(type)
end

#has_word?(word) ⇒ Boolean

Returns:

  • (Boolean)


634
635
636
637
638
639
640
641
# File 'lib/gonzui/dbm.rb', line 634

def has_word?(word)
  wordid = @word_wordid[word]
  if wordid
    return true
  else
    return false
  end
end

#increase_counter(key, step = 1) ⇒ Object



643
644
645
# File 'lib/gonzui/dbm.rb', line 643

def increase_counter(key, step = 1)
  @stat[key.to_s] = get_counter(key) + step
end

#make_ncontents_by_format_key(format_abbrev) ⇒ Object



647
648
649
# File 'lib/gonzui/dbm.rb', line 647

def make_ncontents_by_format_key(format_abbrev)
  ("ncontents_format_" + format_abbrev).intern
end

#make_ncontents_by_license_key(license_abbrev) ⇒ Object



651
652
653
# File 'lib/gonzui/dbm.rb', line 651

def make_ncontents_by_license_key(license_abbrev)
  ("ncontents_license_" + license_abbrev).downcase.intern
end

#put_package_options(package_id) ⇒ Object

FIXME: Ad hoc serialization. We avoid using Marshal not to make the DB Ruby-dependent.



657
658
659
660
661
662
# File 'lib/gonzui/dbm.rb', line 657

def put_package_options(package_id)
  @pkgid_options[package_id] = sprintf("exclude_pattern:%s", 
                                       @config.exclude_pattern.to_s)
  @pkgid_options[package_id] = sprintf("noindex_formats:%s",
                                       @config.noindex_formats.join(","))
end

#put_pathid_wordids(package_id, path_id, word_ids) ⇒ Object



664
665
666
667
668
669
670
# File 'lib/gonzui/dbm.rb', line 664

def put_pathid_wordids(package_id, path_id, word_ids)
  @current_package_id = package_id
  word_ids.each {|word_id|
    pathids = (@wordid_pathids_cache[word_id] ||= [])
    pathids << path_id
  }
end