Class: GFA::Record::Path
- Inherits:
-
GFA::Record
- Object
- GFA::Record
- GFA::Record::Path
- Defined in:
- lib/gfa/record/path.rb
Constant Summary collapse
- CODE =
:P
- REQ_FIELDS =
%i[path_name segment_names overlaps]
- OPT_FIELDS =
{}
Constants inherited from GFA::Record
Instance Attribute Summary
Attributes inherited from GFA::Record
Instance Method Summary collapse
-
#include?(segment) ⇒ Boolean
Includes a GFA::Record::Segment
segment
?. -
#initialize(path_name, segment_names, overlaps, *opt_fields) ⇒ Path
constructor
A new instance of Path.
-
#segment_names_a ⇒ Object
Array of segment names (without orientations) as strings.
-
#segment_sequences(gfa) ⇒ Object
Array of GFA::Field::String with the sequences from each segment featuring the correct orientation from a
gfa
(which must be indexed). -
#segments(gfa) ⇒ Object
Extracts all linked segments from
gfa
(which must be indexed). -
#sequence(gfa) ⇒ Object
Produce the contiguous path sequence based on the segment sequences and orientations from a
gfa
(which must be indexed).
Methods inherited from GFA::Record
#[], [], #code, code_class, #dup, #empty?, #eql?, #hash, name_class, #to_s, #type
Constructor Details
#initialize(path_name, segment_names, overlaps, *opt_fields) ⇒ Path
Returns a new instance of Path.
13 14 15 16 17 18 19 |
# File 'lib/gfa/record/path.rb', line 13 def initialize(path_name, segment_names, overlaps, *opt_fields) @fields = {} add_field(2, :Z, path_name, /[!-)+-<>-~][!-~]*/) add_field(3, :Z, segment_names, /[!-)+-<>-~][!-~]*/) add_field(4, :Z, overlaps, /\*|([0-9]+[MIDNSHPX=]|[-+]?[0-9]+J|.)+/) opt_fields.each { |f| add_opt_field(f, OPT_FIELDS) } end |
Instance Method Details
#include?(segment) ⇒ Boolean
Includes a GFA::Record::Segment segment
?
37 38 39 40 41 42 43 |
# File 'lib/gfa/record/path.rb', line 37 def include?(segment) # unless segment.is_a? GFA::Record::Segment # raise "Unrecognized class: #{segment.class}" # end segment_names_a.any? { |name| segment.name == name } end |
#segment_names_a ⇒ Object
Array of segment names (without orientations) as strings
23 24 25 |
# File 'lib/gfa/record/path.rb', line 23 def segment_names_a segment_names.value.split(/[,;]/).map { |i| i.gsub(/[+-]$/, '') } end |
#segment_sequences(gfa) ⇒ Object
Array of GFA::Field::String with the sequences from each segment featuring the correct orientation from a gfa
(which must be indexed)
TODO: Distinguish between a direct path (separated by comma) and a jump (separated by semicolon). Jumps include a distance estimate (column 6, optional) which could be used to add Ns between segment sequences (from GFA 1.2)
53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/gfa/record/path.rb', line 53 def segment_sequences(gfa) raise "Unindexed GFA" unless gfa.indexed? segment_names.value.split(/[,;]/).map do |i| orientation = i[-1] i[-1] = '' segment = gfa.segments[i] case orientation when '+' ; segment.sequence when '-' ; segment.rc else ; raise "Unknown orientation: #{orientation} (path: #{path_name})" end end end |
#segments(gfa) ⇒ Object
Extracts all linked segments from gfa
(which must be indexed)
29 30 31 32 33 |
# File 'lib/gfa/record/path.rb', line 29 def segments(gfa) raise 'Unindexed GFA' unless gfa.indexed? segment_names_a.map { |name| gfa.segments[name] } end |
#sequence(gfa) ⇒ Object
Produce the contiguous path sequence based on the segment sequences and orientations from a gfa
(which must be indexed)
TODO: Estimate gaps (Ns) from Jump distances (see segment_sequences
)
TODO: Attempt reading CIGAR values from the path first, the corresponding links next, and actually performing the pairwise overlap as last resort
TODO: Support ambiguous IUPAC codes for overlap evaluation
78 79 80 81 |
# File 'lib/gfa/record/path.rb', line 78 def sequence(gfa) segment_sequences(gfa).map(&:value) .inject('') { |a, b| a + after_overlap(a, b) } end |