Top Level Namespace
- Includes:
- Bio::MAF
Defined Under Namespace
Constant Summary collapse
- INTERVAL =
10
- LOG =
Bio::MAF::LOG
- PRINTERS =
{ 'flat' => :FlatPrinter, 'stack' => :CallStackPrinter }
Constants included from Bio::MAF
Bio::MAF::FASTA_LINE_LEN, Bio::MAF::TYPE_PROPS
Instance Method Summary collapse
- #apply_options(tiler) ⇒ Object
- #build_index(maf, index) ⇒ Object
- #desc(seq) ⇒ Object
- #each_tiler(access, intervals) ⇒ Object
- #handle_interval_spec(int) ⇒ Object
- #handle_list_spec(spec) ⇒ Object
- #make_processing_task(maf) ⇒ Object
- #parse_interval(line) ⇒ Object
- #process_maf(maf_path, bgz_path, idx_path) ⇒ Object
- #target_for(base, interval, &blk) ⇒ Object
- #usage(msg) ⇒ Object
Methods included from Bio::MAF
Instance Method Details
#apply_options(tiler) ⇒ Object
30 31 32 33 34 35 |
# File 'bin/maf_tile', line 30 def (tiler) tiler.reference = $options.ref if $options.ref tiler.species = $options.species tiler.species_map = $options.species_map tiler.fill_char = $options.fill_char if $options.fill_char end |
#build_index(maf, index) ⇒ Object
19 20 21 22 23 |
# File 'bin/maf_index', line 19 def build_index(maf, index) parser = Bio::MAF::Parser.new(maf, $options.parser_opts) idx = Bio::MAF::KyotoIndex.build(parser, index, $options.ref_only) idx.close end |
#desc(seq) ⇒ Object
7 8 9 |
# File 'bin/find_overlaps', line 7 def desc(seq) "#{seq.source}:#{seq.start}-#{seq.end}" end |
#each_tiler(access, intervals) ⇒ Object
37 38 39 40 41 42 43 44 |
# File 'bin/maf_tile', line 37 def each_tiler(access, intervals) intervals.each do |int| access.tile(int) do |tiler| (tiler) yield tiler end end end |
#handle_interval_spec(int) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 |
# File 'bin/maf_extract', line 25 def handle_interval_spec(int) if int =~ /(.+):(\d+)-(\d+)/ if $options.one_based Bio::GenomicInterval.new($1, $2.to_i, $3.to_i) else Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i) end else raise "Invalid interval specification: #{int}" end end |
#handle_list_spec(spec) ⇒ Object
17 18 19 20 21 22 23 |
# File 'bin/maf_extract', line 17 def handle_list_spec(spec) if spec =~ /^@(.+)/ File.read($1).split else spec.split(',') end end |
#make_processing_task(maf) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'bin/maf_bgzip', line 59 def make_processing_task(maf) maf_base = File.basename(maf) base = maf_base.gsub(/\.maf.*/, '') bgz_path = "#{$options.dir}/#{base}.maf.bgz" if File.exist?(bgz_path) && ! $options.force LOG.error "#{bgz_path} already exists, refusing to overwrite " \ "without --force!" exit 1 end idx_path = nil if $options.index idx_path = "#{$options.dir}/#{base}.kct" if File.exist?(idx_path) && ! $options.force LOG.error "#{idx_path} already exists, refusing to overwrite " \ "without --force!" exit 1 end end lambda { process_maf(maf, bgz_path, idx_path) } end |
#parse_interval(line) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'bin/maf_tile', line 9 def parse_interval(line) src, r_start_s, r_end_s, _ = line.split(nil, 4) r_start = r_start_s.to_i r_end = r_end_s.to_i i_src = if $options.bed_species "#{$options.bed_species}.#{src}" else src end if $options.one_based Bio::GenomicInterval.new(i_src, r_start, r_end) else Bio::GenomicInterval.zero_based(i_src, r_start, r_end) end end |
#process_maf(maf_path, bgz_path, idx_path) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# File 'bin/maf_bgzip', line 80 def process_maf(maf_path, bgz_path, idx_path) maf_base = File.basename(maf_path) LOG.debug { "Processing #{maf_base}." } p = Bio::MAF::Parser.new(maf_path, :retain_text => true) if idx_path if File.exists?(idx_path) File.unlink(idx_path) end idx = Bio::MAF::KyotoIndex.new(idx_path) idx.prep(bgz_path, :bgzf, $options.ref_only) exec = Bio::MAF::Executor.create end start_t = Time.now last_t = start_t last_pos = 0 n_blocks = 0 maf_size = File.size(maf_path) File.open(bgz_path, 'wb') do |out_f| Bio::BGZF::Writer.new(out_f, $options.level) do |bgz_w| maf_w = Bio::MAF::Writer.new(bgz_w) maf_w.write_header(p.header) p.each_block do |block| bgz_w.write(block.orig_text) if idx block.offset = bgz_w.last_write_pos exec.submit do idx.index_blocks([block]) end end n_blocks += 1 if n_blocks % 100 == 0 cur_t = Time.now delta_t = cur_t - last_t if delta_t > INTERVAL cur_pos = p.phys_f.tell LOG.debug { pos_mb = cur_pos.to_f / 1048576 delta_bytes = cur_pos - last_pos rate = delta_bytes.to_f / delta_t mb_rate = rate / 1048576 pct = cur_pos.to_f / maf_size * 100 elapsed = cur_t - start_t sprintf("%s: processed %.1f MB (%.1f%%) in %ds, %.2f MB/s.", maf_base, pos_mb, pct, elapsed, mb_rate) } last_t = cur_t last_pos = cur_pos end end end end end unc = p.f.tell if p.f != p.phys_f p.close if idx exec.shutdown idx.db.synchronize(true) end elapsed = Time.now - start_t mb = maf_size.to_f / 1048576 mb_rate = mb / elapsed LOG.info { sprintf("Processed %s (%.1f MB) in %ds, %.2f MB/s", maf_base, mb, elapsed, mb_rate) } if unc LOG.info { unc_mb = unc / 1048576 unc_rate = unc_mb / elapsed sprintf(" Uncompressed: %.1f MB, %.2f MB/s", unc_mb, unc_rate) } end LOG.info { raw_size = unc || maf_size avg_block_kb = raw_size.to_f / n_blocks / 1024 sprintf(" %d alignment blocks, average size %.2f KB", n_blocks, avg_block_kb) } LOG.info { orig_size = unc ? unc : maf_size bgzf_size = File.size(bgz_path).to_f ratio = bgzf_size / orig_size sprintf(" Compressed with BGZF (level=%d) to %.1f MB (%.1fx)", $options.level, bgzf_size / 1048576, ratio) } end |
#target_for(base, interval, &blk) ⇒ Object
25 26 27 28 |
# File 'bin/maf_tile', line 25 def target_for(base, interval, &blk) path = "#{base}_#{interval.zero_start}-#{interval.zero_end}.fa" File.open(path, 'w', &blk) end |
#usage(msg) ⇒ Object
127 128 129 130 131 |
# File 'bin/maf_extract', line 127 def usage(msg) $stderr.puts msg $stderr.puts $op exit 2 end |