Class: LiftOver::Query

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-liftover.rb,
bin/bio-liftover

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.lift(array, coordinate) ⇒ Object

Lift coordinates



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/bio-liftover.rb', line 177

def self.lift(array,coordinate)  
  res_chr=""
  res_start=""
  flag_st=0
  result=[]
  array.each do |k,v|
    start_ref=k.split(" ")[5].to_i
    start_query=k.split(" ")[10].to_i
    incr=nil
    end_incr=0
    gap=0
    fl_st=0
    strand=k.split(" ")[9]
    q_size=k.split(" ")[8].to_i
    res_chr=k.split(" ")[7]
    v.each do |val|
      break if flag_st==1
      if incr.nil?
        end_incr=val[2]
        gap=val[1]
        incr=val[0]+gap
        itval0=IntervalTree::InclusiveTree.new(start_ref...start_ref+val[0].to_i)
        res1=itval0.search(coordinate)
        fl_st=start_ref+val[0]+gap
        if not res1.empty?
          if strand=="-"
            res_start = "#{q_size-start_query-(coordinate-start_ref)}"
            flag_st=1
          else
            res_start = "#{start_query+(coordinate-start_ref)}"
            flag_st=1
          end
        end
      else
        if gap != 0
          itval1=IntervalTree::InclusiveTree.new(fl_st...fl_st+gap.to_i)
          res2=itval1.search(coordinate)
          if not res2.empty?
            if strand=="-"
              res_start = "#{q_size-start_query-(coordinate-start_ref)}"
              flag_st=1
            else
              res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
              flag_st=1
            end
          end
          fl_st=fl_st+gap
        else
          incr=val[0].to_i
          if val[0].to_i !=0
            itval2=IntervalTree::InclusiveTree.new(fl_st...fl_st+val[0].to_i)
            res3=itval2.search(coordinate)
            if not res3.empty?
              if strand=="-"
                res_start = "#{q_size-start_query-(coordinate-start_ref)}"
                flag_st=1
              else
                res_start = "#{start_query+(coordinate-start_ref)+end_incr}"
                flag_st=1
              end
            end

            fl_st=fl_st+val[0].to_i+gap
            end_incr=end_incr+val[2].to_i
            gap=val[1].to_i 
          else
            fl_st=fl_st+val[0].to_i+gap
            end_incr=end_incr+val[2].to_i
            gap=val[1].to_i
          end
        end
      end
      gap=val[1].to_i
    end
  end
  a= "#{res_start.to_i}"
 return a
end

.parse_chain_fileObject

Given a pair of genomes, fetch chain file and parse it into a array.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/bio-liftover.rb', line 18

def self.parse_chain_file(gen1,gen2,chr_input,coord_start,coord_end,chain_file)
  @chains=[]
  @chains_complete=[]
  @@gen1=gen1
  @@gen2=gen2
  @@chr_input=chr_input
  @@coord_start=coord_start.to_i
  @@coord_end=coord_end.to_i
  if chain_file.nil? #switch between local/remote parse file
    LiftOver::Query.parse_remote_chain
  else
    LiftOver::Query.parse_local_chain
  end
end

.parse_local_chainObject

Parse local chain file



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/bio-liftover.rb', line 101

def self.parse_local_chain
  gz_file=open(doc["<chain_file>"])
  gz=Zlib::GzipReader.new(gz_file)
  flag=0
  header=""
  coords=[]
  result=[]
  gz.each_line do |line|
    if line.chomp.start_with?("chain")
      qry=LiftOver::Query.search_for_chain_by_interval(line)
      if qry == true
        flag=1
        header=line
      else
        flag=0
        if header!=""
          a={"#{header.chomp}"=>coords}
          result.push(a)
        end
        a=""
        header=""
        coords=[]
      end
    else
      if flag==1
        coords.push(line.chomp.split("\t").map(&:to_i))
      end
    end          
  end
  scores=[]
  if not result.empty?
    result.each do |hits|
      hits.keys.each do |ev|
        scores.push(ev.split(" ")[1].to_i)
      end
    end
  else
    puts "Candidate chain not found"
  end
  start_val=""
  end_val=""
  res=[]
  result.each do |hits|
    start_val=LiftOver::Query.lift(hits,@@coord_start)
    end_val=LiftOver::Query.lift(hits,@@coord_end)
    if hits.keys[0].split(" ")[9] != "-"
      res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
    else
      res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
    end
    start_val=""
    end_val=""
  end
puts res
end

.parse_remote_chainObject

Retrieve and parse remote chain file from hgdownload.cse.ucsc.edu/goldenPath/



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/bio-liftover.rb', line 34

def self.parse_remote_chain
  begin
    open("http://hgdownload.cse.ucsc.edu/goldenPath/#{@@gen1}/liftOver/#{@@gen1}To#{@@gen2}.over.chain.gz") do |rmt_file|
      gz=Zlib::GzipReader.new(rmt_file).read
      flag=0
      header=""
      coords=[]
      result=[]
      gz.split("\n").each do |line|
        if line.start_with?("chain")
          qry=LiftOver::Query.search_for_chain_by_interval(line)
          if qry == true
            flag=1
            header=line
          else
            flag=0
            if header!=""
              a={"#{header.chomp}"=>coords}
              result.push(a)
            end
            a=""
            header=""
            coords=[]
          end
        else
          if flag==1
            coords.push(line.chomp.split("\t").map(&:to_i))
          end
        end
      end
    scores=[]
    if not result.empty?
      result.each do |hits|
        hits.keys.each do |ev|
          scores.push(ev.split(" ")[1].to_i)
        end      
      end
    else
      puts "Candidate chain not found"
    end
    start_val=""
    end_val=""
    res=[]
    result.each do |hits|
      start_val=LiftOver::Query.lift(hits,@@coord_start)
      end_val=LiftOver::Query.lift(hits,@@coord_end)
      if hits.keys[0].split(" ")[9] != "-"
        res.push("#{hits.keys[0].split(" ")[7]}:#{start_val}-#{end_val}")
      else
        res.push("#{hits.keys[0].split(" ")[7]}:#{end_val}-#{end_val}")
      end
      start_val=""
      end_val=""
    end
    puts res
    end
  rescue SocketError => e
    puts "There's a connection problem with your request.\n Error message: #{e.message}."
  rescue SystemCallError => e
    puts "There's a problem with your request.\n Error message: #{e.message}."
  rescue OpenURI::HTTPError => e 
    puts "Couldn't retrieve chain file. Genome names are case sensitive. Please check http://hgdownload.cse.ucsc.edu/goldenPath\n Error message: #{e.message}."
  exit
  end
end

.search_for_chain_by_interval(string) ⇒ Object

Search for chains that fits on coordinates given to be “lifted”



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/bio-liftover.rb', line 158

def self.search_for_chain_by_interval(string)
  hits=[]
  field=string.split(" ")
  if @@chr_input==field[2]
    itval=IntervalTree::InclusiveTree.new(field[5].to_i...field[6].to_i)
    res=itval.search(@@coord_start...@@coord_end)  
    if not res.nil?
      if not res.empty?
        return true
      else
        return false
      end
    else
      return false
    end
  end
end

Instance Method Details

#initilizeObject



15
# File 'lib/bio-liftover.rb', line 15

def initilize;end