Class: Reference_parser

Inherits:
Object
  • Object
show all
Defined in:
lib/pets/parsers/reference_parser.rb

Class Method Summary collapse

Class Method Details

.load(file_path, file_format: nil, feature_type: nil) ⇒ Object



4
5
6
7
8
9
10
11
# File 'lib/pets/parsers/reference_parser.rb', line 4

def self.load(file_path, file_format: nil, feature_type: nil)
	file_format = file_path.split('.', 2).last if file_format.nil?
	if file_format == 'gtf'
		regions, all_attrs = parse_gtf(file_path, feature_type: feature_type)
	end

	return Genomic_Feature.new(regions, annotations: all_attrs)
end

.parse_gtf(file_path, feature_type: nil) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/pets/parsers/reference_parser.rb', line 13

def self.parse_gtf(file_path, feature_type: nil) # https://www.ensembl.org/info/website/upload/gff.html
	features = []
	all_attrs = {}
	File.open(file_path).each do |line|
		next if /^#/ =~ line
		seqname, source, feature, start, stop, score, strand, frame, attribute = line.chomp.split("\t")
		if feature_type.nil? || feature_type == feature
			attrs = process_attrs(attribute, ';', ' ')
			attrs['source'] = source
			attrs['feature'] = feature
			id = attrs['gene_id']
			features << [seqname.gsub('chr',''), start.to_i, stop.to_i, id]
			all_attrs[id] = attrs
		end
	end
	return features, all_attrs
end