Class: Reference_parser
- Inherits:
-
Object
- Object
- Reference_parser
- Defined in:
- lib/pets/parsers/reference_parser.rb
Class Method Summary collapse
- .load(file_path, file_format: nil, feature_type: nil) ⇒ Object
- .parse_gtf(file_path, feature_type: nil) ⇒ Object
Class Method Details
.load(file_path, file_format: nil, feature_type: nil) ⇒ Object
4 5 6 7 8 9 10 11 |
# File 'lib/pets/parsers/reference_parser.rb', line 4 def self.load(file_path, file_format: nil, feature_type: nil) file_format = file_path.split('.', 2).last if file_format.nil? if file_format == 'gtf' regions, all_attrs = parse_gtf(file_path, feature_type: feature_type) end return Genomic_Feature.new(regions, annotations: all_attrs) end |
.parse_gtf(file_path, feature_type: nil) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/pets/parsers/reference_parser.rb', line 13 def self.parse_gtf(file_path, feature_type: nil) # https://www.ensembl.org/info/website/upload/gff.html features = [] all_attrs = {} File.open(file_path).each do |line| next if /^#/ =~ line seqname, source, feature, start, stop, score, strand, frame, attribute = line.chomp.split("\t") if feature_type.nil? || feature_type == feature attrs = process_attrs(attribute, ';', ' ') attrs['source'] = source attrs['feature'] = feature id = attrs['gene_id'] features << [seqname.gsub('chr',''), start.to_i, stop.to_i, id] all_attrs[id] = attrs end end return features, all_attrs end |