Class: JgiGenesGff

Inherits:
Object
  • Object
show all
Defined in:
lib/jgi_genes.rb

Direct Known Subclasses

EupathDBGFF

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ JgiGenesGff

Returns a new instance of JgiGenesGff.



13
14
15
16
# File 'lib/jgi_genes.rb', line 13

def initialize(path)
  @jgi_file = File.open(path, "r")
  @next_gff = read_record
end

Instance Method Details

#distance_iteratorObject



95
96
97
# File 'lib/jgi_genes.rb', line 95

def distance_iterator
  return JgiGenesIterator.new(self)
end

#next_geneObject

Return a enhanced_gene object or nil if none exists



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/jgi_genes.rb', line 19

def next_gene
  # Parse the first line into data structures for current gene
  cur = @next_gff
  if !cur
    return nil
  end
  
  # Make sure the assumption that the first one is an exon is true
  if cur.feature==='exon'
    seqname = cur.seqname
    strand = cur.strand
    source = cur.source
    name = parse_name(cur.attributes)
    
    f = Bio::Location.new
    f.from = cur.start
    f.to = cur.end
    exons = [f]
    cds = []
    protein_id = nil #Unknown until we have a CDS line in the file
    
    # Continue reading until finished gene or finished file
    finished_gene = false
    while !finished_gene and (cur = read_record)
      
      
      # if still in the same gene
      if parse_name(cur.attributes) === name
        if cur.strand != strand or cur.seqname != seqname or cur.source != source
          puts "EXCEPTION !!!!!!!!!!!!!!!!!!!"
          raise Exception, 'Data bug in JGI file or parsing is being done incorrectly'
        end
        f = Bio::Location.new
        f.from = cur.start
        f.to = cur.end
        case cur.feature
        when 'exon'
          exons.push f
        when 'CDS'
          cds.push f
          protein_id = parse_protein_id(cur.attributes)
        when 'start_codon' #meh
        when 'stop_codon'
        else
          puts "EXCEPTION !!!!!!!!!!!!!!!!!!!"
          raise Exception, "Unknown feature type #{cur.feature} found."
        end
      else 
        finished_gene = true
      end
    end
    
    #make ready for the next gene
    @next_gff = cur
    
    #create a new positioned gene with the useful characteristics
    #      puts "Returning gene:"
    #      p exons.length
    #      p cds.length
    g = PositionedGene.new
    g.seqname = seqname
    g.name = name
    g.strand = strand
    g.start = exons[0].from
    g.exons = exons
    g.cds = cds
    g.protein_id = protein_id
    return g
  else
    p cur.feature
    # I'm not sure if this is detrimental or not, but to be safe..
    raise Exception, "Assumption failed: exon is not first feature in the gene"
  end
  
end