Module: Mizlab

Defined in:
lib/mizlab.rb,
lib/mizlab/version.rb

Defined Under Namespace

Classes: Blast

Constant Summary collapse

VERSION =
"0.1.7"

Class Method Summary collapse

Class Method Details

.calculate_coordinates(sequence, mappings, weights = nil, window_size = nil) ⇒ Array

Calculate coordinates from sequence

Parameters:

  • sequence (Bio::Sequence)

    sequence

  • mappings (Hash)

    Hash formated => [Float…]. All of [Float…] must be have same dimention.

  • weights (Hash) (defaults to: nil)

    Weights for some base combination.

  • window_size (Integer) (defaults to: nil)

    Size of window when scanning sequence. If not give this, will use ‘mappings.keys.length -1`.

Returns:

  • (Array)

    coordinates like [[dim1…], [dim2…]…].



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/mizlab.rb', line 78

def calculate_coordinates(sequence, mappings,
                          weights = nil, window_size = nil)
  # error detections
  if weights.is_a?(Hash) && window_size.nil?
    keys = weights.keys
    expect_window_size = keys[0].length
    if keys.any? { |k| k.length != expect_window_size }
      raise TypeError, "When not give `window_size`, `weights` must have same length keys"
    end
  end
  n_dimention = mappings.values[0].length
  if mappings.values.any? { |v| v.length != n_dimention }
    raise TypeError, "All of `mappings`.values must have same size"
  end

  mappings.each do |k, v|
    mappings[k] = v.map(&:to_f)
  end

  window_size = (if window_size.nil?
    unless weights.nil?
      weights.keys[0].length
    else
      3 # default
    end
  else
    window_size
  end)
  window_size -= 1
  weights = weights.nil? ? {} : weights
  weights.default = 1.0
  coordinates = Array.new(n_dimention) { [0.0] }
  sequence.length.times do |idx|
    start = idx < window_size ? 0 : idx - window_size
    vector = mappings[sequence[idx]].map { |v| v * weights[sequence[start..idx]] }
    vector.each_with_index do |v, j|
      coordinates[j].append(coordinates[j][-1] + v)
    end
  end
  return coordinates
end

.fetch_taxon(taxonid) {|Hash| ... } ⇒ Hash

Fetch Taxonomy information from Taxonomy ID. can be give block too.

Parameters:

  • taxonid (String/Integer)

    Taxonomy ID, or Array of its.

Yields:

  • (Hash)

    Taxonomy informations.

Returns:

  • (Hash)

    Taxonomy informations.



154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/mizlab.rb', line 154

def fetch_taxon(taxonid)
  taxonid = taxonid.is_a?(Array) ? taxonid : [taxonid]
  taxonid.each do |id|
    obj = Bio::NCBI::REST::EFetch.taxonomy(id, "xml")
    hashed = xml_to_hash(REXML::Document.new(obj).root)
    if block_given?
      yield hashed[:TaxaSet][:Taxon][:LineageEx][:Taxon]
    else
      return hashed[:TaxaSet][:Taxon][:LineageEx][:Taxon]
    end
  end
end

.getent(accessions, is_protein = false) {|String| ... } ⇒ String

Get entry as String. You can also give a block.

Parameters:

  • accessions (String/Array)

    Accession numbers like [“NC_012920”, …].

Yields:

  • (String)

    Entry as string.

Returns:

  • (String)

    Entry as string.



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/mizlab.rb', line 16

def getent(accessions, is_protein = false)
  accessions = accessions.is_a?(String) ? [accessions] : accessions
  accessions.each do |acc|
    ret = is_protein ? fetch_protein(acc) : fetch_nucleotide(acc)
    if block_given?
      yield ret
    else
      return ret
    end
    sleep(0.37) # Using 0.333... seconds, sometimes hit the NCBI rate limit
  end
end

.getobj(accessions, is_protein = false) {|Bio::GenBank| ... } ⇒ Bio::GenBank

Fetch data via genbank. You can also give a block.

Parameters:

  • accessions (String/Array)

    Accession numbers Like [“NC_012920”, …].

  • is_protein (Bool) (defaults to: false)

    wheather the accession is protein. Default to true.

Yields:

  • (Bio::GenBank)

    GenBank object.

Returns:

  • (Bio::GenBank)

    GenBank object.



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/mizlab.rb', line 34

def getobj(accessions, is_protein = false)
  getent(accessions, is_protein) do |entry|
    parse(entry) do |o|
      if block_given?
        yield o
      else
        return o
      end
    end
  end
end

.local_patterns(x_coordinates, y_coordinates) ⇒ Array

Compute local patterns from coordinates.

Parameters:

  • x_coordinates (Array)

    Coordinates on x dimention.

  • y_coordinates (Array)

    Coordinates on y dimention.

Returns:

  • (Array)

    Local pattern histgram (unnormalized).



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/mizlab.rb', line 124

def local_patterns(x_coordinates, y_coordinates)
  length = x_coordinates.length
  if length != y_coordinates.length
    raise TypeError, "The arguments must have same length."
  end

  filled_pixs = Set.new
  x_coordinates[...-1].zip(y_coordinates[...-1],
                           x_coordinates[1...],
                           y_coordinates[1...]) do |x_start, y_start, x_end, y_end|
    bresenham(x_start.truncate, y_start.truncate,
              x_end.truncate, y_end.truncate).each do |pix|
      filled_pixs.add("#{pix[0]}##{pix[1]}")
      # NOTE:
      # In set or hash, if including array make it so slow.
      # Prevend it by converting array into symbol or freezed string.
    end
  end

  local_pattern_list = [0] * 512
  get_patterns(filled_pixs) do |pattern|
    local_pattern_list[pattern] += 1
  end
  return local_pattern_list
end

.savefile(filename, obj) ⇒ nil

Save object.

Parameters:

  • filename (String)

    Filepath from executed source.

  • obj (Bio::DB)

    Object which inherits from ‘Bio::DB`.

Returns:

  • (nil)


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/mizlab.rb', line 50

def savefile(filename, obj)
  if File.exists?(filename)
    yes = Set.new(["N", "n", "no"])
    no = Set.new(["Y", "y", "yes"])
    loop do
      print("#{filename} exists already. Overwrite? [y/n] ")
      inputed = gets.rstrip
      if yes.include?(inputed)
        return
      elsif no.include?(inputed)
        break
      end
      puts("You should input 'y' or 'n'")
    end
  end
  File.open(filename, "w") do |f|
    obj.tags.each do |t|
      f.puts(obj.get(t))
    end
  end
end