Module: TransmembraneIndex

Included in:
Phobius, Phobius::Index
Defined in:
lib/transmembrane.rb

Overview

A transmemIndex is a hash that takes a fasta reference as key and returns a structured hash containing the transmembrane information.

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.filetype(file) ⇒ Object

returns :toppred or :phobius



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/transmembrane.rb', line 7

def self.filetype(file)
  tp = nil
  File.open(file) do |fh|
    while (line = fh.gets)
      case line
      when /SEQENCE/
        tp = :phobius 
        break
      when /    0  0 i/
        tp = :phobius  # if they don't have the headers, 
                       # this will pick it up if they have a 
                       # single prot without tm or signal peptide.
        break
      when /Algorithm specific parameters/
        tp = :toppred  # New text
        break
      when /<parameters>/
        tp = :toppred  # XML
        break
      end
    end
  end
  tp
end

.new(file) ⇒ Object

right now accepts toppred.out files Phobius objects can use the fasta object to update their hash for methods like avg_overlap



39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/transmembrane.rb', line 39

def self.new(file)
  case x = filetype(file)
  when :toppred
    require 'transmembrane/toppred'
    TopPred::Index.new(file)
  when :phobius
    require 'transmembrane/phobius'
    # warn "WARNING: You have NO fasta object with Phobius based TransmembraneIndex! (which needs one to do proper indexing!)" unless fasta
    Phobius::Index.new(file)
  else 
    raise ArgumentError, "#{x} filetype for #{file} not recognized!"
  end
end

Instance Method Details

#avg_overlap(key, sequence, tp = :number) ⇒ Object

tp = :number or :fraction which is the fraction of the sequence size returns the average number of overlapping amino acids with transmembrane segments returns nil if there is no protein by that key



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/transmembrane.rb', line 66

def avg_overlap(key, sequence, tp=:number)
  if self.key? key
    numbers = num_transmem_aa(self[key], sequence)
    if numbers.size > 0
      sum = 0
      numbers.each {|num| sum += num}
      avg_num = sum.to_f / numbers.size
      # the one line way to do it
      #avg_num = numbers.inject(0) {|memo,num| num + memo }.to_f / numbers.size
      if tp == :fraction
        avg_num / sequence.size
        # this is the same as doing this:
        #numbers.inject(0.0) {|memo,num| (num.to_f/seq_size + memo) } / numbers.size
      else
        avg_num
      end
    else
      0.0
    end
  else  # what to do if the protein isn't there?? which happens on occasion
    nil
  end
end

#num_certain_indexObject

returns a hash of key -> num certain transmembrane segments



54
55
56
57
58
59
60
# File 'lib/transmembrane.rb', line 54

def num_certain_index
  hash = {}
  self.each do |k,v|
    hash[k] = v[:num_certain_transmembrane_segments] || 0
  end
  hash
end

#num_overlapping_chars(full_sequence, ranges, substring) ⇒ Object

returns an array of the number of overlapping sequences in substring with the substrings defined in start_stop_doublets within full_sequence start_stop_doublets should be 0 indexed!!! the span includes the ‘stop’ position i.e., full_sequence



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/transmembrane.rb', line 109

def num_overlapping_chars(full_sequence, ranges, substring)
  #start_positions = aaseq.enum_for(:scan, substring).map { $~.offset(0)[0]}
  if ranges.size == 0
    []
    #full_sequence.enum_for(:scan, substring).map { 0 }
  else
    substring_ranges = []
    pos = 0
    slen = substring.size
    while i=full_sequence.index(substring,pos)
      substring_ranges << Range.new(i, i+slen-1)
      pos = i + slen
    end
    # brute force way
    last_tm_range = ranges.last.last
    to_return = substring_ranges.map do |sb|
      overlap = 0
      # there's got to be a much simpler way to do this, but this does work...
      ranges.each do |tm|
        (frst, lst) = 
          if tm.include?( sb.first )
            [tm, sb]
          elsif tm.include?( sb.last )
            [sb, tm]
          else
            nil
          end
        if frst
          if lst.last <= frst.last
            overlap += (frst.last+1 - frst.first) - (lst.first - frst.first) - (frst.last - lst.last)
          else 
            overlap += (frst.last+1 - frst.first) - (lst.first - frst.first)
          end
        end
      end
      overlap
    end
  end
end

#num_transmem_aa(tmhash, sequence) ⇒ Object

returns an array (usually length of 1) of the number of amino acids contained inside transmembrane spanning segments. assumes that tmhash has the key ‘transmembrane_segments’ if there are no transmembrane segments, returns empty array.



94
95
96
97
98
99
100
101
102
103
# File 'lib/transmembrane.rb', line 94

def num_transmem_aa(tmhash, sequence)
  if tmhash.key? :transmembrane_segments
    ranges = tmhash[:transmembrane_segments].map do |tmseg|
      Range.new( tmseg[:start]-1, tmseg[:stop]-1 )
    end
    num_overlapping_chars(tmhash[:aaseq], ranges, sequence)
  else
    []
  end
end

#reference_to_key(reference) ⇒ Object



32
33
34
# File 'lib/transmembrane.rb', line 32

def reference_to_key(reference)
  # needs to be subclassed or written
end