Module: Bio::FlatFileIndex::Indexer
- Defined in:
- lib/bio/io/flatfile/indexer.rb
Defined Under Namespace
Modules: Parser Classes: NameSpace, NameSpaces
Constant Summary collapse
- DEFAULT_SORT =
default sort program
'/usr/bin/sort'
- DEFAULT_ENV =
default env program (run a program in a modified environment)
'/usr/bin/env'
- DEFAULT_ENV_ARGS =
default arguments for env program
[ 'LC_ALL=C' ]
Class Method Summary collapse
-
.addindex_bdb(db, flag, need_update, parser, options) ⇒ Object
def.
-
.addindex_flat(db, mode, need_update, parser, options) ⇒ Object
def.
- .chose_sort_proc(prog, mode = :new, env = nil, env_args = nil) ⇒ Object
-
.makeindexBDB(name, parser, options, *files) ⇒ Object
module Parser.
-
.makeindexFlat(name, parser, options, *files) ⇒ Object
def.
- .update_index(name, parser, options, *files) ⇒ Object
Class Method Details
.addindex_bdb(db, flag, need_update, parser, options) ⇒ Object
def
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 |
# File 'lib/bio/io/flatfile/indexer.rb', line 476 def self.addindex_bdb(db, flag, need_update, parser, ) DEBUG.print "reading files...\n" pn = db.primary pn.file.close pn.file.flag = flag db.secondary.each_files do |x| x.file.close x.file.flag = flag x.file.open x.file.close end need_update.each do |fileid| filename = db.fileids[fileid].filename parser.open_flatfile(fileid, filename) parser.each do |pos, len| p = parser.parse_primary #pn.file.add_exclusive(p, [ fileid, pos, len ]) pn.file.add_overwrite(p, [ fileid, pos, len ]) #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" parser.parse_secondary do |sn, sp| db.secondary[sn].file.add_nr(sp, p) #DEBUG.print "#{sp} #{p}\n" end end parser.close_flatfile end true end |
.addindex_flat(db, mode, need_update, parser, options) ⇒ Object
def
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 |
# File 'lib/bio/io/flatfile/indexer.rb', line 525 def self.addindex_flat(db, mode, need_update, parser, ) require 'tempfile' prog = ['sort_program'] env = ['env_program'] env_args = ['env_program_arguments'] return false if need_update.to_a.size == 0 DEBUG.print "prepare temporary files...\n" tempbase = "bioflat#{rand(10000)}-" pfile = Tempfile.open(tempbase + 'primary-') DEBUG.print "open temporary file #{pfile.path.inspect}\n" sfiles = {} parser.secondary.names.each do |x| sfiles[x] = Tempfile.open(tempbase + 'secondary-') DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n" end DEBUG.print "reading files...\n" need_update.each do |fileid| filename = db.fileids[fileid].filename parser.open_flatfile(fileid, filename) parser.each do |pos, len| p = parser.parse_primary pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n" #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" parser.parse_secondary do |sn, sp| sfiles[sn] << "#{sp}\t#{p}\n" #DEBUG.print "#{sp} #{p}\n" end end parser.close_flatfile fileid += 1 end sort_proc = chose_sort_proc(prog, mode, env, env_args) pfile.close(false) DEBUG.print "sorting primary (#{parser.primary.name})...\n" db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path) pfile.close(true) parser.secondary.names.each do |x| DEBUG.print "sorting secondary (#{x})...\n" sfiles[x].close(false) db.secondary[x].file.import_tsv_files(false, mode, sort_proc, sfiles[x].path) sfiles[x].close(true) end true end |
.chose_sort_proc(prog, mode = :new, env = nil, env_args = nil) ⇒ Object
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 |
# File 'lib/bio/io/flatfile/indexer.rb', line 585 def self.chose_sort_proc(prog, mode = :new, env = nil, env_args = nil) case prog when /^builtin$/i, /^hs$/i, /^lm$/i DEBUG.print "sort: internal sort routine\n" sort_proc = Flat_1::FlatMappingFile::internal_sort_proc when nil, '' if FileTest.executable?(DEFAULT_SORT) return chose_sort_proc(DEFAULT_SORT, mode, env, env_args) else DEBUG.print "sort: internal sort routine\n" sort_proc = Flat_1::FlatMappingFile::internal_sort_proc end else env_args ||= DEFAULT_ENV_ARGS if env == '' or env == false then # inhibit to use env program prefixes = [ prog ] elsif env then # uses given env program prefixes = [ env ] + env_args + [ prog ] else # env == nil; uses default env program if possible if FileTest.executable?(DEFAULT_ENV) prefixes = [ DEFAULT_ENV ] + env_args + [ prog ] else prefixes = [ prog ] end end DEBUG.print "sort: #{prefixes.join(' ')}\n" if mode == :new then sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes) else sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes) end end sort_proc end |
.makeindexBDB(name, parser, options, *files) ⇒ Object
module Parser
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 |
# File 'lib/bio/io/flatfile/indexer.rb', line 451 def self.makeindexBDB(name, parser, , *files) # options are not used in this method unless defined?(BDB) raise RuntimeError, "Berkeley DB support not found" end DEBUG.print "makeing BDB DataBank...\n" db = DataBank.new(name, MAGIC_BDB) db.format = parser.format db.fileids.add(*files) db.fileids.recalc db.primary = parser.primary.name db.secondary = parser.secondary.names DEBUG.print "writing config.dat, config, fileids ...\n" db.write('wb', BDBdefault::flag_write) DEBUG.print "reading files...\n" addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)), parser, ) db.close true end |
.makeindexFlat(name, parser, options, *files) ⇒ Object
def
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 |
# File 'lib/bio/io/flatfile/indexer.rb', line 508 def self.makeindexFlat(name, parser, , *files) DEBUG.print "makeing flat/1 DataBank using temporary files...\n" db = DataBank.new(name, nil) db.format = parser.format db.fileids.add(*files) db.primary = parser.primary.name db.secondary = parser.secondary.names db.fileids.recalc DEBUG.print "writing DabaBank...\n" db.write('wb') addindex_flat(db, :new, (0...(files.size)), parser, ) db.close true end |
.update_index(name, parser, options, *files) ⇒ Object
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 |
# File 'lib/bio/io/flatfile/indexer.rb', line 621 def self.update_index(name, parser, , *files) db = DataBank.open(name) if parser then raise 'file format mismatch' if db.format != parser.format else begin dbclass_orig = Bio::FlatFile.autodetect_file(db.fileids[0].filename) rescue TypeError, Errno::ENOENT end begin dbclass_new = Bio::FlatFile.autodetect_file(files[0]) rescue TypeError, Errno::ENOENT end case db.format when 'swiss', 'embl' parser = Parser.new(db.format) if dbclass_new and dbclass_new != parser.dbclass raise 'file format mismatch' end when 'genbank' dbclass = dbclass_orig or dbclass_new if dbclass == Bio::GenBank or dbclass == Bio::GenPept parser = Parser.new(dbclass_orig) elsif !dbclass then raise 'cannnot determine format. please specify manually.' else raise 'file format mismatch' end if dbclass_new and dbclass_new != parser.dbclass raise 'file format mismatch' end else raise 'unsupported format' end end parser.set_primary_namespace(db.primary.name) parser.add_secondary_namespaces(*db.secondary.names) if ['renew'] then newfiles = db.fileids.filenames.find_all do |x| FileTest.exist?(x) end newfiles.concat(files) newfiles2 = newfiles.sort newfiles2.uniq! newfiles3 = [] newfiles.each do |x| newfiles3 << x if newfiles2.delete(x) end t = db.index_type db.close case t when MAGIC_BDB Indexer::makeindexBDB(name, parser, , *newfiles3) when MAGIC_FLAT Indexer::makeindexFlat(name, parser, , *newfiles3) else raise 'Unsupported index type' end return true end need_update = [] newfiles = files.dup db.fileids.cache_all db.fileids.each_with_index do |f, i| need_update << i unless f.check newfiles.delete(f.filename) end b = db.fileids.size begin db.fileids.recalc rescue Errno::ENOENT => evar DEBUG.print "Error: #{evar}\n" DEBUG.print "assumed --renew option\n" db.close = .dup ['renew'] = true update_index(name, parser, , *files) return true end # add new files db.fileids.add(*newfiles) db.fileids.recalc need_update.concat((b...(b + newfiles.size)).to_a) DEBUG.print "writing DabaBank...\n" db.write('wb', BDBdefault::flag_append) case db.index_type when MAGIC_BDB addindex_bdb(db, BDBdefault::flag_append, need_update, parser, ) when MAGIC_FLAT addindex_flat(db, :add, need_update, parser, ) else raise 'Unsupported index type' end db.close true end |