Module: GeneValidator

Defined in:
lib/genevalidator.rb,
lib/genevalidator/hsp.rb,
lib/genevalidator/blast.rb,
lib/genevalidator/query.rb,
lib/genevalidator/output.rb,
lib/genevalidator/version.rb,
lib/genevalidator/ext/array.rb,
lib/genevalidator/exceptions.rb,
lib/genevalidator/validation.rb,
lib/genevalidator/arg_validation.rb,
lib/genevalidator/clusterization.rb,
lib/genevalidator/tabular_parser.rb,
lib/genevalidator/validation_test.rb,
lib/genevalidator/get_raw_sequences.rb,
lib/genevalidator/validation_report.rb,
lib/genevalidator/json_to_gv_results.rb,
lib/genevalidator/validation_alignment.rb,
lib/genevalidator/validation_gene_merge.rb,
lib/genevalidator/validation_duplication.rb,
lib/genevalidator/validation_length_rank.rb,
lib/genevalidator/validation_length_cluster.rb,
lib/genevalidator/validation_open_reading_frame.rb,
lib/genevalidator/validation_blast_reading_frame.rb

Overview

Top level module / namespace.

Defined Under Namespace

Modules: ExtraArrayMethods Classes: AliasDuplicationError, AlignmentValidation, AlignmentValidationOutput, BlastRFValidationOutput, BlastReadingFrameValidation, BlastUtils, ClasspathError, Cluster, DuplicationValidation, DuplicationValidationOutput, FetchRawSequences, FileNotFoundException, GVArgValidation, GeneMergeValidation, GeneMergeValidationOutput, HierarchicalClusterization, Hsp, InconsistentTabularFormat, JsonToGVResults, LengthClusterValidation, LengthClusterValidationOutput, LengthRankValidation, LengthRankValidationOutput, NoInternetError, NoMafftInstallationError, NoPIdentError, NoValidationError, NotEnoughHitsError, ORFValidationOutput, OpenReadingFrameValidation, OtherError, Output, Pair, Pair1, PairCluster, Plot, Query, QueryError, RawSequences, ReadingFrameError, ReportClassError, SequenceTypeError, TabularParser, Validate, ValidationClassError, ValidationReport, ValidationTest, Validations

Constant Summary collapse

VERSION =
'1.6.5'

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.configObject

Returns the value of attribute config.



15
16
17
# File 'lib/genevalidator.rb', line 15

def config
  @config
end

.mutexObject

Returns the value of attribute mutex.



20
21
22
# File 'lib/genevalidator.rb', line 20

def mutex
  @mutex
end

.mutex_arrayObject

Returns the value of attribute mutex_array.



20
21
22
# File 'lib/genevalidator.rb', line 20

def mutex_array
  @mutex_array
end

.mutex_htmlObject

Returns the value of attribute mutex_html.



20
21
22
# File 'lib/genevalidator.rb', line 20

def mutex_html
  @mutex_html
end

.mutex_jsonObject

Returns the value of attribute mutex_json.



20
21
22
# File 'lib/genevalidator.rb', line 20

def mutex_json
  @mutex_json
end

.optObject

Returns the value of attribute opt.



15
16
17
# File 'lib/genevalidator.rb', line 15

def opt
  @opt
end

.overviewObject

Returns the value of attribute overview.



15
16
17
# File 'lib/genevalidator.rb', line 15

def overview
  @overview
end

.query_idxObject (readonly)

array of indexes for the start offsets of each query in the fasta file



19
20
21
# File 'lib/genevalidator.rb', line 19

def query_idx
  @query_idx
end

.raw_seq_file_indexObject (readonly)

Returns the value of attribute raw_seq_file_index.



16
17
18
# File 'lib/genevalidator.rb', line 16

def raw_seq_file_index
  @raw_seq_file_index
end

.raw_seq_file_loadObject (readonly)

Returns the value of attribute raw_seq_file_load.



17
18
19
# File 'lib/genevalidator.rb', line 17

def raw_seq_file_load
  @raw_seq_file_load
end

Class Method Details

.create_output_folder(output_dir = , aux_dir = ) ⇒ Object

Creates the output folder and copies the auxiliar folders to this folder



89
90
91
92
93
94
# File 'lib/genevalidator.rb', line 89

def create_output_folder(output_dir = @config[:html_path],
                         aux_dir = @config[:aux])
  Dir.mkdir(output_dir)
  aux_files = File.join(aux_dir, 'files/')
  FileUtils.cp_r(aux_files, output_dir)
end

.index_the_inputObject

create a list of index of the queries in the FASTA These offset can then be used to quickly read the input file using the start and end positions of each query.



100
101
102
103
104
# File 'lib/genevalidator.rb', line 100

def index_the_input
  fasta_content = IO.binread(@opt[:input_fasta_file])
  @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
  @query_idx.push(fasta_content.length)
end

.init(opt, start_idx = 1, summary = true) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/genevalidator.rb', line 22

def init(opt, start_idx = 1, summary = true)
  $stderr.puts 'Analysing input arguments'
  @opt = opt
  GVArgValidation.validate_args # validates @opt

  @config = {
    idx: 0,
    start_idx: start_idx,
    summary: summary,

    type: BlastUtils.guess_sequence_type_from_input_file,
    filename: File.basename(@opt[:input_fasta_file]),
    html_path: "#{@opt[:input_fasta_file]}.html",
    json_file: File.join(File.dirname(@opt[:input_fasta_file]),
                         "#{File.basename(@opt[:input_fasta_file])}.json"),
    plot_dir: "#{@opt[:input_fasta_file]}.html/files/json",
    aux: File.expand_path(File.join(File.dirname(__FILE__), '../aux')),

    json_output: [],
    run_no: 0,
    output_max: 2500 # max no. of queries in the output file
  }

  @overview = {
    no_queries: 0,
    scores: [],
    good_scores: 0,
    bad_scores: 0,
    nee: 0,
    no_mafft: 0,
    no_internet: 0,
    map_errors: Hash.new(0),
    run_time: Hash.new(Pair1.new(0, 0))
  }

  @mutex       = Mutex.new
  @mutex_array = Mutex.new
  @mutex_html  = Mutex.new
  @mutex_json  = Mutex.new
  create_output_folder
  index_the_input
  RawSequences.index_raw_seq_file if @opt[:raw_sequences]
end

.parse_blast_output_fileObject

Params: output: filename or stream, according to the type type: file or stream Returns an iterator..



111
112
113
114
115
116
117
118
# File 'lib/genevalidator.rb', line 111

def parse_blast_output_file
  if @opt[:blast_xml_file]
    Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
  else
    TabularParser.new
  end
  ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
end

.runObject

Parse the blast output and run validations



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/genevalidator.rb', line 68

def run
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
  # if no BLAST OUTPUT file provided...
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
    BlastUtils.run_blast_on_input_file
  end
  # Obtain fasta file of all BLAST hits if running align or dup validations
  if @opt[:validations].include?('align') ||
     @opt[:validations].include?('dup')
    RawSequences.run unless @opt[:raw_sequences]
  end
  # Run Validations
  iterator = parse_blast_output_file
  (Validations.new).run_validations(iterator)

  Output.write_json_file(@config[:json_output], @config[:json_file])
  Output.print_footer(@overview, @config)
end