Class: Gonzui::AbstractDBM
Direct Known Subclasses
Constant Summary collapse
- DBTable =
[ [:fmtid_fmt, ap::ID, ap::String, false], [:fmtid_fabbr, ap::ID, ap::String, false], [:fabbr_fmtid, ap::String, ap::ID, false], [:lcsid_lcs, ap::ID, ap::String, false], [:lcsid_labbr, ap::ID, ap::String, false], [:labbr_lcsid, ap::String, ap::ID, false], [:seq, ap::String, ap::Fixnum, false], [:stat, ap::String, ap::Fixnum, false], [:pkg_pkgid, ap::String, ap::ID, false], [:pkgid_pkg, ap::ID, ap::String, false], [:pkgid_pathids, ap::ID, ap::ID, true], [:pkgid_fmtids, ap::ID, ap::ID, true], [:pkgid_lcsids, ap::ID, ap::ID, true], [:pkgid_options, ap::ID, ap::String, true], [:pkgid_src, ap::ID, ap::String, false], [:path_pathid, ap::String, ap::ID, false], [:pathid_digest, ap::ID, ap::GZString, false], [:pathid_info, ap::ID, ap::String, false], [:pathid_content, ap::ID, ap::GZString, false], [:pathid_bols, ap::ID, ap::GZString, false], [:pathid_hash, ap::ID, ap::String, false], [:pathid_path, ap::ID, ap::String, false], [:pathid_pkgid, ap::ID, ap::ID, false], [:pathid_wordids, ap::ID, ap::GZString, false], [:type_typeid, ap::Symbol, ap::ID, false], [:typeid_type, ap::ID, ap::Symbol, false], [:word_wordid, ap::String, ap::ID, false], [:wordid_pkgids, ap::ID, ap::ID, true], [:wordid_word, ap::ID, ap::String, false], [:pkgwordid_pathids, ap::String, ap::ID, true], [:pathwordid_info, ap::String, ap::String, false], [:version, ap::String, ap::String, false], ]
- IDTable =
[ # id_name, # of id text -> id id -> text id -> alt [:type_id, :ntypes, :type_typeid, :typeid_type, nil], [:word_id, :nwords, :word_wordid, :wordid_word, nil], [:path_id, :ncontents, :path_pathid, :pathid_path, nil], [:package_id, :npackages, :pkg_pkgid, :pkgid_pkg, nil], [:format_id, :nformats, :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt], [:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs], ]
Instance Method Summary collapse
- #binary_content?(path_id) ⇒ Boolean
- #close ⇒ Object
- #consistent? ⇒ Boolean
- #decrease_counter(key, step = 1) ⇒ Object
- #each_db_name ⇒ Object
- #each_format(&block) ⇒ Object
- #each_license(&block) ⇒ Object
- #each_package_name ⇒ Object
- #each_word(&block) ⇒ Object
- #find_all(pattern) ⇒ Object
- #find_all_by_prefix(pattern) ⇒ Object
- #find_all_by_regexp(pattern) ⇒ Object
- #find_word_info(path_id, word_id) ⇒ Object
- #flush_cache ⇒ Object
- #get_all_word_info(path_id, word_id) ⇒ Object
- #get_content(path_id) ⇒ Object
- #get_content_hash(path_id) ⇒ Object
- #get_content_info(path_id) ⇒ Object
- #get_counter(key) ⇒ Object
- #get_digest(path_id) ⇒ Object
- #get_format_abbrev(format_id) ⇒ Object
- #get_format_id(format_abbrev) ⇒ Object
- #get_format_id_from_path_id(path_id) ⇒ Object
- #get_format_ids_from_package_id(package_id) ⇒ Object
- #get_format_name(format_id) ⇒ Object
- #get_license_abbrev(license_id) ⇒ Object
- #get_license_id(license_abbrev) ⇒ Object
- #get_license_id_from_path_id(path_id) ⇒ Object
- #get_license_ids_from_package_id(package_id) ⇒ Object
- #get_license_name(license_id) ⇒ Object
- #get_ncontents ⇒ Object
- #get_ncontents_by_format_id(format_id) ⇒ Object
- #get_ncontents_by_license_id(license_id) ⇒ Object
- #get_ncontents_in_package(package_name) ⇒ Object
- #get_ncontents_indexed ⇒ Object
- #get_nformats ⇒ Object
- #get_nlines_indexed ⇒ Object
- #get_npackages ⇒ Object
- #get_nwords ⇒ Object
- #get_package_id(package_name) ⇒ Object
- #get_package_id_from_path_id(path_id) ⇒ Object
- #get_package_ids(word_id) ⇒ Object
- #get_package_name(package_id) ⇒ Object
- #get_package_names ⇒ Object
- #get_package_options(package_id) ⇒ Object
- #get_path(path_id) ⇒ Object
- #get_path_id(path) ⇒ Object
- #get_path_ids(package_id) ⇒ Object
- #get_path_ids_from_package_and_word_id(package_id, word_id) ⇒ Object
- #get_source_uri(package_id) ⇒ Object
- #get_type(type_id) ⇒ Object
- #get_type_id(type) ⇒ Object
- #get_word(word_id) ⇒ Object
- #get_word_id(word) ⇒ Object
- #get_word_ids(path_id) ⇒ Object
- #has_format_abbrev?(format_abbrev) ⇒ Boolean
- #has_format_id?(format_id) ⇒ Boolean
- #has_license_abbrev?(license_abbrev) ⇒ Boolean
- #has_license_id?(license_id) ⇒ Boolean
- #has_package?(package_name) ⇒ Boolean
- #has_path?(path) ⇒ Boolean
- #has_type?(type) ⇒ Boolean
- #has_word?(word) ⇒ Boolean
- #increase_counter(key, step = 1) ⇒ Object
-
#initialize(config, read_only = false) ⇒ AbstractDBM
constructor
A new instance of AbstractDBM.
- #make_ncontents_by_format_key(format_abbrev) ⇒ Object
- #make_ncontents_by_license_key(license_abbrev) ⇒ Object
-
#put_package_options(package_id) ⇒ Object
FIXME: Ad hoc serialization.
- #put_pathid_wordids(package_id, path_id, word_ids) ⇒ Object
Methods included from Util
assert, assert_equal, assert_equal_all, assert_non_nil, assert_not_reached, benchmark, command_exist?, commify, eprintf, format_bytes, program_name, protect_from_signals, require_command, set_verbosity, shell_escape, unix?, vprintf, windows?, wprintf
Constructor Details
#initialize(config, read_only = false) ⇒ AbstractDBM
Returns a new instance of AbstractDBM.
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/gonzui/dbm.rb', line 162 def initialize(config, read_only = false) raise "#{config.db_directory}: No such directory" unless File.directory?(config.db_directory) @config = config validate_db_version @db_opened = {} DBTable.each {|db_name, key_type, value_type, dupsort| open_db(db_name, key_type, value_type, dupsort) } put_db_version unless read_only init_id_counters @opened = true @current_package_id = nil @wordid_pathids_cache = {} end |
Instance Method Details
#binary_content?(path_id) ⇒ Boolean
309 310 311 312 |
# File 'lib/gonzui/dbm.rb', line 309 def binary_content?(path_id) format_id = get_format_id_from_path_id(path_id) get_format_abbrev(format_id) == "binary" end |
#close ⇒ Object
314 315 316 317 318 319 320 321 |
# File 'lib/gonzui/dbm.rb', line 314 def close flush_cache raise DBMError.new("dbm is already closed") unless @opened @db_opened.each {|name, db| db.close } @opened = false end |
#consistent? ⇒ Boolean
323 324 325 326 327 |
# File 'lib/gonzui/dbm.rb', line 323 def consistent? verify_stat_integrity verify_seq_integrity return true end |
#decrease_counter(key, step = 1) ⇒ Object
219 220 221 222 223 |
# File 'lib/gonzui/dbm.rb', line 219 def decrease_counter(key, step = 1) value = get_counter(key) - step raise DBMError.new("counter #{key} becomes minus") if value < 0 @stat[key.to_s] = value end |
#each_db_name ⇒ Object
335 336 337 |
# File 'lib/gonzui/dbm.rb', line 335 def each_db_name @db_opened.each_key {|db_name| yield(db_name.to_s) } end |
#each_format(&block) ⇒ Object
339 340 341 |
# File 'lib/gonzui/dbm.rb', line 339 def each_format(&block) each_property(:fmtid_fmt, :get_format_abbrev, &block) end |
#each_license(&block) ⇒ Object
343 344 345 |
# File 'lib/gonzui/dbm.rb', line 343 def each_license(&block) each_property(:lcsid_lcs, :get_license_abbrev, &block) end |
#each_package_name ⇒ Object
347 348 349 |
# File 'lib/gonzui/dbm.rb', line 347 def each_package_name @pkgid_pkg.each_value {|value| yield(value) } end |
#each_word(&block) ⇒ Object
351 352 353 |
# File 'lib/gonzui/dbm.rb', line 351 def each_word(&block) @word_wordid.each_key {|word| yield(word) } end |
#find_all(pattern) ⇒ Object
355 356 357 358 359 |
# File 'lib/gonzui/dbm.rb', line 355 def find_all(pattern) word_id = @word_wordid[pattern] results = collect_all_results(word_id) return results end |
#find_all_by_prefix(pattern) ⇒ Object
361 362 363 |
# File 'lib/gonzui/dbm.rb', line 361 def find_all_by_prefix(pattern) raise NotImplementedError.new("should be implemented in a sub class") end |
#find_all_by_regexp(pattern) ⇒ Object
365 366 367 368 369 370 371 372 373 374 |
# File 'lib/gonzui/dbm.rb', line 365 def find_all_by_regexp(pattern) regexp = Regexp.new(pattern) results = [] @word_wordid.each {|word, word_id| if regexp.match(word) results.concat(collect_all_results(word_id)) end } return results end |
#find_word_info(path_id, word_id) ⇒ Object
376 377 378 379 380 |
# File 'lib/gonzui/dbm.rb', line 376 def find_word_info(path_id, word_id) get_all_word_info(path_id, word_id).each {|info| yield(info) } end |
#flush_cache ⇒ Object
382 383 384 385 386 387 388 389 390 391 392 393 394 395 |
# File 'lib/gonzui/dbm.rb', line 382 def flush_cache wordids = @wordid_pathids_cache.keys.sort! wordids.each {|word_id| package_word_id = AutoPack.pack_id2(@current_package_id, word_id) @wordid_pathids_cache[word_id].each {|path_id| @pkgwordid_pathids[package_word_id] = path_id } } wordids.each {|word_id| @wordid_pkgids[word_id] = @current_package_id } @wordid_pathids_cache.clear @id_counters.each {|counter| counter.flush} end |
#get_all_word_info(path_id, word_id) ⇒ Object
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 |
# File 'lib/gonzui/dbm.rb', line 397 def get_all_word_info(path_id, word_id) path_word_id = AutoPack.pack_id2(path_id, word_id) dump = @pathwordid_info[path_word_id] return [] if dump.nil? bols = get_bols(path_id) bol = bols.shift assert_equal(0, bol) lineno = 0 DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id| while bol and bol <= byteno lineno += 1 bol = bols.shift end type = get_type(type_id) WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno) } end |
#get_content(path_id) ⇒ Object
424 425 426 |
# File 'lib/gonzui/dbm.rb', line 424 def get_content(path_id) @pathid_content[path_id] end |
#get_content_hash(path_id) ⇒ Object
416 417 418 |
# File 'lib/gonzui/dbm.rb', line 416 def get_content_hash(path_id) @pathid_hash[path_id] end |
#get_content_info(path_id) ⇒ Object
428 429 430 431 432 |
# File 'lib/gonzui/dbm.rb', line 428 def get_content_info(path_id) dump = @pathid_info[path_id] assert_non_nil(dump) return ContentInfo.load(dump) end |
#get_counter(key) ⇒ Object
420 421 422 |
# File 'lib/gonzui/dbm.rb', line 420 def get_counter(key) @stat[key.to_s] or 0 end |
#get_digest(path_id) ⇒ Object
434 435 436 437 438 439 440 441 |
# File 'lib/gonzui/dbm.rb', line 434 def get_digest(path_id) dump = @pathid_digest[path_id] return [] if dump.nil? DeltaDumper.undump_tuples(DigestInfo, dump).map {|data| data.push(get_type(data.last)) DigestInfo.new(*data) } end |
#get_format_abbrev(format_id) ⇒ Object
443 444 445 |
# File 'lib/gonzui/dbm.rb', line 443 def get_format_abbrev(format_id) @fmtid_fabbr[format_id] end |
#get_format_id(format_abbrev) ⇒ Object
447 448 449 |
# File 'lib/gonzui/dbm.rb', line 447 def get_format_id(format_abbrev) @fabbr_fmtid[format_abbrev] end |
#get_format_id_from_path_id(path_id) ⇒ Object
451 452 453 |
# File 'lib/gonzui/dbm.rb', line 451 def get_format_id_from_path_id(path_id) get_content_info(path_id).format_id end |
#get_format_ids_from_package_id(package_id) ⇒ Object
455 456 457 |
# File 'lib/gonzui/dbm.rb', line 455 def get_format_ids_from_package_id(package_id) @pkgid_fmtids.duplicates(package_id) end |
#get_format_name(format_id) ⇒ Object
459 460 461 |
# File 'lib/gonzui/dbm.rb', line 459 def get_format_name(format_id) @fmtid_fmt[format_id] end |
#get_license_abbrev(license_id) ⇒ Object
463 464 465 |
# File 'lib/gonzui/dbm.rb', line 463 def get_license_abbrev(license_id) @lcsid_labbr[license_id] end |
#get_license_id(license_abbrev) ⇒ Object
467 468 469 |
# File 'lib/gonzui/dbm.rb', line 467 def get_license_id(license_abbrev) @labbr_lcsid[license_abbrev] end |
#get_license_id_from_path_id(path_id) ⇒ Object
471 472 473 |
# File 'lib/gonzui/dbm.rb', line 471 def get_license_id_from_path_id(path_id) get_content_info(path_id).license_id end |
#get_license_ids_from_package_id(package_id) ⇒ Object
475 476 477 |
# File 'lib/gonzui/dbm.rb', line 475 def get_license_ids_from_package_id(package_id) @pkgid_lcsids.duplicates(package_id) end |
#get_license_name(license_id) ⇒ Object
479 480 481 |
# File 'lib/gonzui/dbm.rb', line 479 def get_license_name(license_id) @lcsid_lcs[license_id] end |
#get_ncontents ⇒ Object
483 484 485 |
# File 'lib/gonzui/dbm.rb', line 483 def get_ncontents get_counter(:ncontents) end |
#get_ncontents_by_format_id(format_id) ⇒ Object
487 488 489 490 491 |
# File 'lib/gonzui/dbm.rb', line 487 def get_ncontents_by_format_id(format_id) format_abbrev = get_format_abbrev(format_id) key = make_ncontents_by_format_key(format_abbrev) return get_counter(key) end |
#get_ncontents_by_license_id(license_id) ⇒ Object
493 494 495 496 497 |
# File 'lib/gonzui/dbm.rb', line 493 def get_ncontents_by_license_id(license_id) license_abbrev = get_license_abbrev(license_id) key = make_ncontents_by_license_key(license_abbrev) return get_counter(key) end |
#get_ncontents_in_package(package_name) ⇒ Object
503 504 505 506 |
# File 'lib/gonzui/dbm.rb', line 503 def get_ncontents_in_package(package_name) package_id = get_package_id(package_name) @pkgid_pathids.duplicates(package_id).length end |
#get_ncontents_indexed ⇒ Object
499 500 501 |
# File 'lib/gonzui/dbm.rb', line 499 def get_ncontents_indexed get_counter(:ncontents_indexed) end |
#get_nformats ⇒ Object
508 509 510 |
# File 'lib/gonzui/dbm.rb', line 508 def get_nformats get_counter(:nformats) end |
#get_nlines_indexed ⇒ Object
512 513 514 |
# File 'lib/gonzui/dbm.rb', line 512 def get_nlines_indexed get_counter(:nlines_indexed) end |
#get_npackages ⇒ Object
516 517 518 |
# File 'lib/gonzui/dbm.rb', line 516 def get_npackages get_counter(:npackages) end |
#get_nwords ⇒ Object
520 521 522 |
# File 'lib/gonzui/dbm.rb', line 520 def get_nwords get_counter(:nwords) end |
#get_package_id(package_name) ⇒ Object
524 525 526 |
# File 'lib/gonzui/dbm.rb', line 524 def get_package_id(package_name) @pkg_pkgid[package_name] end |
#get_package_id_from_path_id(path_id) ⇒ Object
528 529 530 |
# File 'lib/gonzui/dbm.rb', line 528 def get_package_id_from_path_id(path_id) @pathid_pkgid[path_id] end |
#get_package_ids(word_id) ⇒ Object
532 533 534 |
# File 'lib/gonzui/dbm.rb', line 532 def get_package_ids(word_id) @wordid_pkgids.duplicates(word_id) end |
#get_package_name(package_id) ⇒ Object
536 537 538 |
# File 'lib/gonzui/dbm.rb', line 536 def get_package_name(package_id) @pkgid_pkg[package_id] end |
#get_package_names ⇒ Object
540 541 542 |
# File 'lib/gonzui/dbm.rb', line 540 def get_package_names @pkgid_pkg.values end |
#get_package_options(package_id) ⇒ Object
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 |
# File 'lib/gonzui/dbm.rb', line 544 def (package_id) = {} values = @pkgid_options.duplicates(package_id) values.each {|value| k, v = value.split(":", 2) k = k.intern case k when :exclude_pattern v = Regexp.new(v) when :noindex_formats v = v.split(",") else raise DBMError.new("#{k}: unknown option") end [k] = v } assert([:exclude_pattern]) assert([:noindex_formats]) return end |
#get_path(path_id) ⇒ Object
565 566 567 |
# File 'lib/gonzui/dbm.rb', line 565 def get_path(path_id) @pathid_path[path_id] end |
#get_path_id(path) ⇒ Object
569 570 571 |
# File 'lib/gonzui/dbm.rb', line 569 def get_path_id(path) @path_pathid[path] end |
#get_path_ids(package_id) ⇒ Object
573 574 575 |
# File 'lib/gonzui/dbm.rb', line 573 def get_path_ids(package_id) @pkgid_pathids.duplicates(package_id) end |
#get_path_ids_from_package_and_word_id(package_id, word_id) ⇒ Object
577 578 579 580 |
# File 'lib/gonzui/dbm.rb', line 577 def get_path_ids_from_package_and_word_id(package_id, word_id) package_word_id = AutoPack.pack_id2(package_id, word_id) return @pkgwordid_pathids.duplicates(package_word_id) end |
#get_source_uri(package_id) ⇒ Object
582 583 584 |
# File 'lib/gonzui/dbm.rb', line 582 def get_source_uri(package_id) @pkgid_src[package_id] end |
#get_type(type_id) ⇒ Object
586 587 588 |
# File 'lib/gonzui/dbm.rb', line 586 def get_type(type_id) @typeid_type[type_id] end |
#get_type_id(type) ⇒ Object
590 591 592 |
# File 'lib/gonzui/dbm.rb', line 590 def get_type_id(type) @type_id_counter.get_id(type) end |
#get_word(word_id) ⇒ Object
594 595 596 |
# File 'lib/gonzui/dbm.rb', line 594 def get_word(word_id) @wordid_word[word_id] end |
#get_word_id(word) ⇒ Object
598 599 600 |
# File 'lib/gonzui/dbm.rb', line 598 def get_word_id(word) @word_wordid[word] end |
#get_word_ids(path_id) ⇒ Object
602 603 604 |
# File 'lib/gonzui/dbm.rb', line 602 def get_word_ids(path_id) DeltaDumper.undump_ids(@pathid_wordids[path_id]) end |
#has_format_abbrev?(format_abbrev) ⇒ Boolean
610 611 612 |
# File 'lib/gonzui/dbm.rb', line 610 def has_format_abbrev?(format_abbrev) @fabbr_fmtid.has_key?(format_abbrev) end |
#has_format_id?(format_id) ⇒ Boolean
606 607 608 |
# File 'lib/gonzui/dbm.rb', line 606 def has_format_id?(format_id) @fmtid_fmt.has_key?(format_id) end |
#has_license_abbrev?(license_abbrev) ⇒ Boolean
618 619 620 |
# File 'lib/gonzui/dbm.rb', line 618 def has_license_abbrev?(license_abbrev) @labbr_lcsid.has_key?(license_abbrev) end |
#has_license_id?(license_id) ⇒ Boolean
614 615 616 |
# File 'lib/gonzui/dbm.rb', line 614 def has_license_id?(license_id) @lcsid_lcs.has_key?(license_id) end |
#has_package?(package_name) ⇒ Boolean
622 623 624 |
# File 'lib/gonzui/dbm.rb', line 622 def has_package?(package_name) @pkg_pkgid.include?(package_name) end |
#has_path?(path) ⇒ Boolean
626 627 628 |
# File 'lib/gonzui/dbm.rb', line 626 def has_path?(path) @path_pathid.include?(path) end |
#has_type?(type) ⇒ Boolean
630 631 632 |
# File 'lib/gonzui/dbm.rb', line 630 def has_type?(type) @type_typeid.include?(type) end |
#has_word?(word) ⇒ Boolean
634 635 636 637 638 639 640 641 |
# File 'lib/gonzui/dbm.rb', line 634 def has_word?(word) wordid = @word_wordid[word] if wordid return true else return false end end |
#increase_counter(key, step = 1) ⇒ Object
643 644 645 |
# File 'lib/gonzui/dbm.rb', line 643 def increase_counter(key, step = 1) @stat[key.to_s] = get_counter(key) + step end |
#make_ncontents_by_format_key(format_abbrev) ⇒ Object
647 648 649 |
# File 'lib/gonzui/dbm.rb', line 647 def make_ncontents_by_format_key(format_abbrev) ("ncontents_format_" + format_abbrev).intern end |
#make_ncontents_by_license_key(license_abbrev) ⇒ Object
651 652 653 |
# File 'lib/gonzui/dbm.rb', line 651 def make_ncontents_by_license_key(license_abbrev) ("ncontents_license_" + license_abbrev).downcase.intern end |
#put_package_options(package_id) ⇒ Object
FIXME: Ad hoc serialization. We avoid using Marshal not to make the DB Ruby-dependent.
657 658 659 660 661 662 |
# File 'lib/gonzui/dbm.rb', line 657 def (package_id) @pkgid_options[package_id] = sprintf("exclude_pattern:%s", @config.exclude_pattern.to_s) @pkgid_options[package_id] = sprintf("noindex_formats:%s", @config.noindex_formats.join(",")) end |
#put_pathid_wordids(package_id, path_id, word_ids) ⇒ Object
664 665 666 667 668 669 670 |
# File 'lib/gonzui/dbm.rb', line 664 def put_pathid_wordids(package_id, path_id, word_ids) @current_package_id = package_id word_ids.each {|word_id| pathids = (@wordid_pathids_cache[word_id] ||= []) pathids << path_id } end |