Module: GeneValidator
- Defined in:
- lib/genevalidator.rb,
lib/genevalidator/hsp.rb,
lib/genevalidator/blast.rb,
lib/genevalidator/query.rb,
lib/genevalidator/output.rb,
lib/genevalidator/version.rb,
lib/genevalidator/ext/array.rb,
lib/genevalidator/exceptions.rb,
lib/genevalidator/validation.rb,
lib/genevalidator/arg_validation.rb,
lib/genevalidator/clusterization.rb,
lib/genevalidator/tabular_parser.rb,
lib/genevalidator/validation_test.rb,
lib/genevalidator/get_raw_sequences.rb,
lib/genevalidator/validation_report.rb,
lib/genevalidator/json_to_gv_results.rb,
lib/genevalidator/validation_alignment.rb,
lib/genevalidator/validation_gene_merge.rb,
lib/genevalidator/validation_duplication.rb,
lib/genevalidator/validation_length_rank.rb,
lib/genevalidator/validation_length_cluster.rb,
lib/genevalidator/validation_open_reading_frame.rb,
lib/genevalidator/validation_blast_reading_frame.rb
Overview
Top level module / namespace.
Defined Under Namespace
Modules: ExtraArrayMethods Classes: AliasDuplicationError, AlignmentValidation, AlignmentValidationOutput, BlastRFValidationOutput, BlastReadingFrameValidation, BlastUtils, ClasspathError, Cluster, DuplicationValidation, DuplicationValidationOutput, FetchRawSequences, FileNotFoundException, GVArgValidation, GeneMergeValidation, GeneMergeValidationOutput, HierarchicalClusterization, Hsp, InconsistentTabularFormat, JsonToGVResults, LengthClusterValidation, LengthClusterValidationOutput, LengthRankValidation, LengthRankValidationOutput, NoInternetError, NoMafftInstallationError, NoPIdentError, NoValidationError, NotEnoughHitsError, ORFValidationOutput, OpenReadingFrameValidation, OtherError, Output, Pair, Pair1, PairCluster, Plot, Query, QueryError, RawSequences, ReadingFrameError, ReportClassError, SequenceTypeError, TabularParser, Validate, ValidationClassError, ValidationReport, ValidationTest, Validations
Constant Summary collapse
- VERSION =
'1.6.5'
Class Attribute Summary collapse
-
.config ⇒ Object
Returns the value of attribute config.
-
.mutex ⇒ Object
Returns the value of attribute mutex.
-
.mutex_array ⇒ Object
Returns the value of attribute mutex_array.
-
.mutex_html ⇒ Object
Returns the value of attribute mutex_html.
-
.mutex_json ⇒ Object
Returns the value of attribute mutex_json.
-
.opt ⇒ Object
Returns the value of attribute opt.
-
.overview ⇒ Object
Returns the value of attribute overview.
-
.query_idx ⇒ Object
readonly
array of indexes for the start offsets of each query in the fasta file.
-
.raw_seq_file_index ⇒ Object
readonly
Returns the value of attribute raw_seq_file_index.
-
.raw_seq_file_load ⇒ Object
readonly
Returns the value of attribute raw_seq_file_load.
Class Method Summary collapse
-
.create_output_folder(output_dir = , aux_dir = ) ⇒ Object
Creates the output folder and copies the auxiliar folders to this folder.
-
.index_the_input ⇒ Object
create a list of index of the queries in the FASTA These offset can then be used to quickly read the input file using the start and end positions of each query.
- .init(opt, start_idx = 1, summary = true) ⇒ Object
-
.parse_blast_output_file ⇒ Object
Params:
output
: filename or stream, according to the typetype
: file or stream Returns an iterator.. -
.run ⇒ Object
Parse the blast output and run validations.
Class Attribute Details
.config ⇒ Object
Returns the value of attribute config.
15 16 17 |
# File 'lib/genevalidator.rb', line 15 def config @config end |
.mutex ⇒ Object
Returns the value of attribute mutex.
20 21 22 |
# File 'lib/genevalidator.rb', line 20 def mutex @mutex end |
.mutex_array ⇒ Object
Returns the value of attribute mutex_array.
20 21 22 |
# File 'lib/genevalidator.rb', line 20 def mutex_array @mutex_array end |
.mutex_html ⇒ Object
Returns the value of attribute mutex_html.
20 21 22 |
# File 'lib/genevalidator.rb', line 20 def mutex_html @mutex_html end |
.mutex_json ⇒ Object
Returns the value of attribute mutex_json.
20 21 22 |
# File 'lib/genevalidator.rb', line 20 def mutex_json @mutex_json end |
.opt ⇒ Object
Returns the value of attribute opt.
15 16 17 |
# File 'lib/genevalidator.rb', line 15 def opt @opt end |
.overview ⇒ Object
Returns the value of attribute overview.
15 16 17 |
# File 'lib/genevalidator.rb', line 15 def overview @overview end |
.query_idx ⇒ Object (readonly)
array of indexes for the start offsets of each query in the fasta file
19 20 21 |
# File 'lib/genevalidator.rb', line 19 def query_idx @query_idx end |
.raw_seq_file_index ⇒ Object (readonly)
Returns the value of attribute raw_seq_file_index.
16 17 18 |
# File 'lib/genevalidator.rb', line 16 def raw_seq_file_index @raw_seq_file_index end |
.raw_seq_file_load ⇒ Object (readonly)
Returns the value of attribute raw_seq_file_load.
17 18 19 |
# File 'lib/genevalidator.rb', line 17 def raw_seq_file_load @raw_seq_file_load end |
Class Method Details
.create_output_folder(output_dir = , aux_dir = ) ⇒ Object
Creates the output folder and copies the auxiliar folders to this folder
89 90 91 92 93 94 |
# File 'lib/genevalidator.rb', line 89 def create_output_folder(output_dir = @config[:html_path], aux_dir = @config[:aux]) Dir.mkdir(output_dir) aux_files = File.join(aux_dir, 'files/') FileUtils.cp_r(aux_files, output_dir) end |
.index_the_input ⇒ Object
create a list of index of the queries in the FASTA These offset can then be used to quickly read the input file using the start and end positions of each query.
100 101 102 103 104 |
# File 'lib/genevalidator.rb', line 100 def index_the_input fasta_content = IO.binread(@opt[:input_fasta_file]) @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) } @query_idx.push(fasta_content.length) end |
.init(opt, start_idx = 1, summary = true) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/genevalidator.rb', line 22 def init(opt, start_idx = 1, summary = true) $stderr.puts 'Analysing input arguments' @opt = opt GVArgValidation.validate_args # validates @opt @config = { idx: 0, start_idx: start_idx, summary: summary, type: BlastUtils.guess_sequence_type_from_input_file, filename: File.basename(@opt[:input_fasta_file]), html_path: "#{@opt[:input_fasta_file]}.html", json_file: File.join(File.dirname(@opt[:input_fasta_file]), "#{File.basename(@opt[:input_fasta_file])}.json"), plot_dir: "#{@opt[:input_fasta_file]}.html/files/json", aux: File.(File.join(File.dirname(__FILE__), '../aux')), json_output: [], run_no: 0, output_max: 2500 # max no. of queries in the output file } @overview = { no_queries: 0, scores: [], good_scores: 0, bad_scores: 0, nee: 0, no_mafft: 0, no_internet: 0, map_errors: Hash.new(0), run_time: Hash.new(Pair1.new(0, 0)) } @mutex = Mutex.new @mutex_array = Mutex.new @mutex_html = Mutex.new @mutex_json = Mutex.new create_output_folder index_the_input RawSequences.index_raw_seq_file if @opt[:raw_sequences] end |
.parse_blast_output_file ⇒ Object
Params: output
: filename or stream, according to the type type
: file or stream Returns an iterator..
111 112 113 114 115 116 117 118 |
# File 'lib/genevalidator.rb', line 111 def parse_blast_output_file if @opt[:blast_xml_file] Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum else TabularParser.new end ## TODO: Add a Rescue statement - e.g. if unable to create the Object... end |
.run ⇒ Object
Parse the blast output and run validations
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/genevalidator.rb', line 68 def run # Run BLAST on all sequences (generates @opt[:blast_xml_file]) # if no BLAST OUTPUT file provided... unless @opt[:blast_xml_file] || @opt[:blast_tabular_file] BlastUtils.run_blast_on_input_file end # Obtain fasta file of all BLAST hits if running align or dup validations if @opt[:validations].include?('align') || @opt[:validations].include?('dup') RawSequences.run unless @opt[:raw_sequences] end # Run Validations iterator = parse_blast_output_file (Validations.new).run_validations(iterator) Output.write_json_file(@config[:json_output], @config[:json_file]) Output.(@overview, @config) end |