Class: GFA::Record::Path

Inherits:
GFA::Record show all
Defined in:
lib/gfa/record/path.rb

Constant Summary collapse

CODE =
:P
REQ_FIELDS =
%i[path_name segment_names overlaps]
OPT_FIELDS =
{}

Constants inherited from GFA::Record

CODES, TYPES

Instance Attribute Summary

Attributes inherited from GFA::Record

#fields

Instance Method Summary collapse

Methods inherited from GFA::Record

#[], [], #code, code_class, #dup, #empty?, #eql?, #hash, name_class, #to_s, #type

Constructor Details

#initialize(path_name, segment_names, overlaps, *opt_fields) ⇒ Path

Returns a new instance of Path.



13
14
15
16
17
18
19
# File 'lib/gfa/record/path.rb', line 13

def initialize(path_name, segment_names, overlaps, *opt_fields)
  @fields = {}
  add_field(2, :Z, path_name,     /[!-)+-<>-~][!-~]*/)
  add_field(3, :Z, segment_names, /[!-)+-<>-~][!-~]*/)
  add_field(4, :Z, overlaps,      /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/)
  opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) }
end

Instance Method Details

#include?(segment) ⇒ Boolean

Includes a GFA::Record::Segment segment?

Returns:

  • (Boolean)


37
38
39
40
41
42
43
# File 'lib/gfa/record/path.rb', line 37

def include?(segment)
  # unless segment.is_a? GFA::Record::Segment
  #   raise "Unrecognized class: #{segment.class}"
  # end

  segment_names_a.any? { |name| segment.name == name }
end

#segment_names_aObject

Array of segment names (without orientations) as strings



23
24
25
# File 'lib/gfa/record/path.rb', line 23

def segment_names_a
  segment_names.value.split(/[,;]/).map { |i| i.gsub(/[+-]$/, '') }
end

#segment_sequences(gfa) ⇒ Object

Array of GFA::Field::String with the sequences from each segment featuring the correct orientation from a gfa (which must be indexed)

TODO: Distinguish between a direct path (separated by comma) and a jump (separated by semicolon). Jumps include a distance estimate (column 6, optional) which could be used to add Ns between segment sequences (from GFA 1.2)



53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/gfa/record/path.rb', line 53

def segment_sequences(gfa)
  raise "Unindexed GFA" unless gfa.indexed?
  segment_names.value.split(/[,;]/).map do |i|
    orientation = i[-1]
    i[-1] = ''
    segment = gfa.segments[i]

    case orientation
      when '+' ; segment.sequence
      when '-' ; segment.rc
      else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
    end
  end
end

#segments(gfa) ⇒ Object

Extracts all linked segments from gfa (which must be indexed)



29
30
31
32
33
# File 'lib/gfa/record/path.rb', line 29

def segments(gfa)
  raise 'Unindexed GFA' unless gfa.indexed?

  segment_names_a.map { |name| gfa.segments[name] }
end

#sequence(gfa) ⇒ Object

Produce the contiguous path sequence based on the segment sequences and orientations from a gfa (which must be indexed)

TODO: Estimate gaps (Ns) from Jump distances (see segment_sequences)

TODO: Attempt reading CIGAR values from the path first, the corresponding links next, and actually performing the pairwise overlap as last resort

TODO: Support ambiguous IUPAC codes for overlap evaluation



78
79
80
81
# File 'lib/gfa/record/path.rb', line 78

def sequence(gfa)
  segment_sequences(gfa).map(&:value)
    .inject('') { |a, b| a + after_overlap(a, b) }
end