Class: Cheripic::Regions

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/cheripic/regions.rb

Overview

An application of Bio::DB::Fasta object that lets to extract selected regions of the fasta entries

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(assembly) ⇒ Regions

creates a Regions object using fasta

Parameters:

  • assembly (String)

    path to a fasta file



26
27
28
29
30
# File 'lib/cheripic/regions.rb', line 26

def initialize(assembly)
  @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
  @id_len = {}
  self.get_id_len
end

Instance Attribute Details

#id_lenHash (readonly)

Returns a hash of fasta entry ids as keys and sequence length as values.

Returns:

  • (Hash)

    a hash of fasta entry ids as keys and sequence length as values



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/cheripic/regions.rb', line 16

class Regions

  include Enumerable
  extend Forwardable
  def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
  attr_reader :reference_db, :id_len

  # creates a Regions object using fasta
  #
  # @param assembly [String] path to a fasta file
  def initialize(assembly)
    @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
    @id_len = {}
    self.get_id_len
  end

  # A method to extract fasta entry ids and length and make a hash
  #
  # @return [Hash] a hash of fasta entry ids as keys and sequence length as values
  def get_id_len
    @reference_db.load_fai_entries
    @reference_db.index.entries.each_entry do | entry |
      @id_len[entry.id] = entry.length
    end
  end

  # A method to returns left and right sequence of specified length, for a provided id and position
  # @note this method excludes the base at the position provided
  # @param id [String]
  # @param pos [Integer]
  # @return [Array<String>]
  def fetch_seq(id, pos)
    limit = Options.sel_seq_len + 1
    len = @id_len[id]
    low = pos-limit <= 0 ? 0 : pos-limit
    high = pos+limit >= len ? len : pos+limit
    region = Bio::DB::Fasta::Region.parse_region("#{id}:#{low}-#{pos-1}")
    seq_left = @reference_db.fetch_sequence(region)
    region = Bio::DB::Fasta::Region.parse_region("#{id}:#{pos+1}-#{high}")
    seq_right = @reference_db.fetch_sequence(region)
    [seq_left, seq_right]
  end

end

#reference_dbBio::DB::Fasta::FastaFile (readonly)

Returns indexed fasta object.

Returns:

  • (Bio::DB::Fasta::FastaFile)

    indexed fasta object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/cheripic/regions.rb', line 16

class Regions

  include Enumerable
  extend Forwardable
  def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
  attr_reader :reference_db, :id_len

  # creates a Regions object using fasta
  #
  # @param assembly [String] path to a fasta file
  def initialize(assembly)
    @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
    @id_len = {}
    self.get_id_len
  end

  # A method to extract fasta entry ids and length and make a hash
  #
  # @return [Hash] a hash of fasta entry ids as keys and sequence length as values
  def get_id_len
    @reference_db.load_fai_entries
    @reference_db.index.entries.each_entry do | entry |
      @id_len[entry.id] = entry.length
    end
  end

  # A method to returns left and right sequence of specified length, for a provided id and position
  # @note this method excludes the base at the position provided
  # @param id [String]
  # @param pos [Integer]
  # @return [Array<String>]
  def fetch_seq(id, pos)
    limit = Options.sel_seq_len + 1
    len = @id_len[id]
    low = pos-limit <= 0 ? 0 : pos-limit
    high = pos+limit >= len ? len : pos+limit
    region = Bio::DB::Fasta::Region.parse_region("#{id}:#{low}-#{pos-1}")
    seq_left = @reference_db.fetch_sequence(region)
    region = Bio::DB::Fasta::Region.parse_region("#{id}:#{pos+1}-#{high}")
    seq_right = @reference_db.fetch_sequence(region)
    [seq_left, seq_right]
  end

end

Instance Method Details

#fetch_seq(id, pos) ⇒ Array<String>

Note:

this method excludes the base at the position provided

A method to returns left and right sequence of specified length, for a provided id and position

Parameters:

  • id (String)
  • pos (Integer)

Returns:

  • (Array<String>)


47
48
49
50
51
52
53
54
55
56
57
# File 'lib/cheripic/regions.rb', line 47

def fetch_seq(id, pos)
  limit = Options.sel_seq_len + 1
  len = @id_len[id]
  low = pos-limit <= 0 ? 0 : pos-limit
  high = pos+limit >= len ? len : pos+limit
  region = Bio::DB::Fasta::Region.parse_region("#{id}:#{low}-#{pos-1}")
  seq_left = @reference_db.fetch_sequence(region)
  region = Bio::DB::Fasta::Region.parse_region("#{id}:#{pos+1}-#{high}")
  seq_right = @reference_db.fetch_sequence(region)
  [seq_left, seq_right]
end

#get_id_lenHash

A method to extract fasta entry ids and length and make a hash

Returns:

  • (Hash)

    a hash of fasta entry ids as keys and sequence length as values



35
36
37
38
39
40
# File 'lib/cheripic/regions.rb', line 35

def get_id_len
  @reference_db.load_fai_entries
  @reference_db.index.entries.each_entry do | entry |
    @id_len[entry.id] = entry.length
  end
end