Class: Bio::FlatFileIndex::Flat_1::FlatMappingFile
- Defined in:
- lib/bio/io/flatfile/index.rb
Overview
FlatMappingFile class.
Internal use only.
Constant Summary collapse
- @@recsize_width =
4
- @@recsize_regex =
/\A\d{4}\z/
Instance Attribute Summary collapse
-
#filename ⇒ Object
readonly
Returns the value of attribute filename.
-
#mode ⇒ Object
Returns the value of attribute mode.
Class Method Summary collapse
- .external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
- .external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
- .external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
- .internal_sort_proc ⇒ Object
- .open(*arg) ⇒ Object
Instance Method Summary collapse
- #add_record(str) ⇒ Object
- #close ⇒ Object
-
#each ⇒ Object
export/import/edit data.
- #export_tsv(stream) ⇒ Object
- #get_record(i) ⇒ Object
- #import_tsv_files(flag_primary, mode, sort_proc, *files) ⇒ Object
- #init(rs) ⇒ Object
- #init_with_sorted_tsv_file(filename, flag_primary = false) ⇒ Object
-
#initialize(filename, mode = 'rb') ⇒ FlatMappingFile
constructor
A new instance of FlatMappingFile.
- #open ⇒ Object
- #put_record(i, str) ⇒ Object
- #record_size ⇒ Object
- #records ⇒ Object (also: #size)
-
#search(key) ⇒ Object
methods for searching.
- #seek(i) ⇒ Object
-
#write_record(str) ⇒ Object
methods for writing file.
Constructor Details
#initialize(filename, mode = 'rb') ⇒ FlatMappingFile
Returns a new instance of FlatMappingFile.
734 735 736 737 738 739 740 741 |
# File 'lib/bio/io/flatfile/index.rb', line 734 def initialize(filename, mode = 'rb') @filename = filename @mode = mode @file = nil #@file = File.open(filename, mode) @record_size = nil @records = nil end |
Instance Attribute Details
#filename ⇒ Object (readonly)
Returns the value of attribute filename.
743 744 745 |
# File 'lib/bio/io/flatfile/index.rb', line 743 def filename @filename end |
#mode ⇒ Object
Returns the value of attribute mode.
742 743 744 |
# File 'lib/bio/io/flatfile/index.rb', line 742 def mode @mode end |
Class Method Details
.external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
923 924 925 926 927 928 929 930 931 |
# File 'lib/bio/io/flatfile/index.rb', line 923 def self.external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| # files (and in1) must be sorted cmd = sort_program + [ '-m', '-o', out, in1, *files ] system(*cmd) end end |
.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 |
# File 'lib/bio/io/flatfile/index.rb', line 900 def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| # (in1 may be sorted) tf_all = [] tfn_all = [] files.each do |fn| tf = Tempfile.open('sort') tf.close(false) cmd = sort_program + [ '-o', tf.path, fn ] system(*cmd) tf_all << tf tfn_all << tf.path end cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ] system(*cmd_fin) tf_all.each do |tf| tf.close(true) end end end |
.external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) ⇒ Object
891 892 893 894 895 896 897 898 |
# File 'lib/bio/io/flatfile/index.rb', line 891 def self.external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| cmd = sort_program + [ '-o', out, in1, *files ] system(*cmd) end end |
.internal_sort_proc ⇒ Object
933 934 935 936 937 938 939 940 941 942 943 944 945 946 |
# File 'lib/bio/io/flatfile/index.rb', line 933 def self.internal_sort_proc Proc.new do |out, in1, *files| a = IO.readlines(in1) files.each do |fn| IO.foreach(fn) do |x| a << x end end a.sort! of = File.open(out, 'w') a.each { |x| of << x } of.close end end |
.open(*arg) ⇒ Object
730 731 732 |
# File 'lib/bio/io/flatfile/index.rb', line 730 def self.open(*arg) self.new(*arg) end |
Instance Method Details
#add_record(str) ⇒ Object
806 807 808 809 810 811 812 |
# File 'lib/bio/io/flatfile/index.rb', line 806 def add_record(str) n = records rs = record_size @file.seek(0, IO::SEEK_END) write_record(str) @records += 1 end |
#close ⇒ Object
755 756 757 758 759 760 761 762 |
# File 'lib/bio/io/flatfile/index.rb', line 755 def close if @file then DEBUG.print "FlatMappingFile: close #{@filename}\n" @file.close @file = nil end nil end |
#each ⇒ Object
export/import/edit data
841 842 843 844 845 846 847 848 |
# File 'lib/bio/io/flatfile/index.rb', line 841 def each n = records seek(0) (0...n).each do |i| yield Record.new(get_record(i)) end self end |
#export_tsv(stream) ⇒ Object
850 851 852 853 854 855 |
# File 'lib/bio/io/flatfile/index.rb', line 850 def export_tsv(stream) self.each do |x| stream << "#{x.to_s}\n" end stream end |
#get_record(i) ⇒ Object
776 777 778 779 780 781 782 |
# File 'lib/bio/io/flatfile/index.rb', line 776 def get_record(i) rs = record_size seek(i) str = @file.read(rs) #DEBUG.print "get_record(#{i})=#{str.inspect}\n" str end |
#import_tsv_files(flag_primary, mode, sort_proc, *files) ⇒ Object
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 |
# File 'lib/bio/io/flatfile/index.rb', line 948 def import_tsv_files(flag_primary, mode, sort_proc, *files) require 'tempfile' tmpfile1 = Tempfile.open('flat') self.export_tsv(tmpfile1) unless mode == :new tmpfile1.close(false) tmpfile0 = Tempfile.open('sorted') tmpfile0.close(false) sort_proc.call(tmpfile0.path, tmpfile1.path, *files) tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+') tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary) tmpmap.close self.close begin File.rename(self.filename, self.filename + ".#{$$}.bak~") rescue Errno::ENOENT end File.rename(tmpmap.filename, self.filename) begin File.delete(self.filename + ".#{$$}.bak~") rescue Errno::ENOENT end tmpfile0.close(true) tmpfile1.close(true) self end |
#init(rs) ⇒ Object
827 828 829 830 831 832 833 834 835 836 837 838 |
# File 'lib/bio/io/flatfile/index.rb', line 827 def init(rs) unless 0 < rs and rs < 10 ** @@recsize_width then raise 'record size out of range' end open @record_size = rs str = sprintf("%0*d", @@recsize_width, rs) @file.truncate(0) @file.seek(0, IO::SEEK_SET) @file.write(str) @records = 0 end |
#init_with_sorted_tsv_file(filename, flag_primary = false) ⇒ Object
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 |
# File 'lib/bio/io/flatfile/index.rb', line 857 def init_with_sorted_tsv_file(filename, flag_primary = false) rec_size = 1 f = File.open(filename) f.each do |y| rec_size = y.chomp.length if rec_size < y.chomp.length end self.init(rec_size) prev = nil f.rewind if flag_primary then f.each do |y| x = Record.new(y.chomp, rec_size) if prev then if x.key == prev.key DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n" else self.add_record(prev.to_s) end end prev = x end self.add_record(prev.to_s) if prev else f.each do |y| x = Record.new(y.chomp, rec_size) self.add_record(x.to_s) if x != prev prev = x end end f.close self end |
#open ⇒ Object
745 746 747 748 749 750 751 752 753 |
# File 'lib/bio/io/flatfile/index.rb', line 745 def open unless @file then DEBUG.print "FlatMappingFile: open #{@filename}\n" @file = File.open(@filename, @mode) true else nil end end |
#put_record(i, str) ⇒ Object
814 815 816 817 818 819 820 821 822 823 824 825 |
# File 'lib/bio/io/flatfile/index.rb', line 814 def put_record(i, str) n = records rs = record_size if i >= n then @file.seek(0, IO::SEEK_END) @file.write(sprintf("%-*s", rs, '') * (i - n)) @records = i + 1 else seek(i) end write_record(str) end |
#record_size ⇒ Object
764 765 766 767 768 769 770 771 772 773 774 |
# File 'lib/bio/io/flatfile/index.rb', line 764 def record_size unless @record_size then open @file.seek(0, IO::SEEK_SET) s = @file.read(@@recsize_width) raise 'strange record size' unless s =~ @@recsize_regex @record_size = s.to_i DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n" end @record_size end |
#records ⇒ Object Also known as: size
789 790 791 792 793 794 795 796 |
# File 'lib/bio/io/flatfile/index.rb', line 789 def records unless @records then rs = record_size @records = (@file.stat.size - @@recsize_width) / rs DEBUG.print "FlatMappingFile: records: #{@records}\n" end @records end |
#search(key) ⇒ Object
methods for searching
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 |
# File 'lib/bio/io/flatfile/index.rb', line 982 def search(key) n = records return [] if n <= 0 i = n / 2 i_prev = nil DEBUG.print "binary search starts...\n" begin rec = Record.new(get_record(i)) i_prev = i if key < rec.key then n = i i = i / 2 elsif key > rec.key then i = (i + n) / 2 else # key == rec.key result = [ rec.val ] j = i - 1 while j >= 0 and (rec = Record.new(get_record(j))).key == key result << rec.val j = j - 1 end result.reverse! j = i + 1 while j < n and (rec = Record.new(get_record(j))).key == key result << rec.val j = j + 1 end DEBUG.print "#{result.size} hits found!!\n" return result end end until i_prev == i DEBUG.print "no hits found\n" #nil [] end |
#seek(i) ⇒ Object
784 785 786 787 |
# File 'lib/bio/io/flatfile/index.rb', line 784 def seek(i) rs = record_size @file.seek(@@recsize_width + rs * i) end |
#write_record(str) ⇒ Object
methods for writing file
800 801 802 803 804 |
# File 'lib/bio/io/flatfile/index.rb', line 800 def write_record(str) rs = record_size rec = sprintf("%-*s", rs, str)[0..rs] @file.write(rec) end |