Module: MiseqRunStats
- Defined in:
- lib/miseq_run_stats.rb
Defined Under Namespace
Classes: AssemblyRunStats, AssemblySampleStats, ResequencingRunStats, ResequencingSampleStats
Instance Method Summary
collapse
Instance Method Details
#parse_assembly_run_stats(xml_file, original_sample_names = nil) ⇒ Object
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/miseq_run_stats.rb', line 31
def parse_assembly_run_stats(xml_file, original_sample_names = nil)
xml = Nokogiri::XML(File.read(xml_file))
assembly_run_stats = AssemblyRunStats.new
xml.search('//RunStats').each do |run_stats|
assembly_run_stats.number_of_bases = run_stats.search('YieldInBasesPF').text.to_f/1000000000
assembly_run_stats.number_of_clusters = run_stats.search('NumberOfClustersPF').text.to_i
end
assembly_stats = Array.new
xml.search('//AssemblyStatistics').each do |assembly_sample_stats|
number_of_contigs = assembly_sample_stats.search('NumberOfContigs').text.to_i
mean_contig_size = assembly_sample_stats.search('MeanContigLength').text.to_f.to_i
n50 = assembly_sample_stats.search('N50').text.to_i
number_of_bases = assembly_sample_stats.search('BaseCount').text.to_i
assembly_stats << {:number_of_contigs => number_of_contigs, :mean_contig_size => mean_contig_size, :n50 => n50, :number_of_bases => number_of_bases}
end
assembly_run_stats.sample_stats = Hash.new
xml.search('//SampleStatistics').each do |sample_stats|
sample_name = sample_stats.search('SampleName').text
sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil? next if sample_name.nil?
assembly_run_stats.sample_stats[sample_name] = AssemblySampleStats.new
assembly_run_stats.sample_stats[sample_name].sample_name = sample_name
assembly_run_stats.sample_stats[sample_name].number_of_clusters = sample_stats.search('NumberOfClustersPF').text
assembly_sample_stats = assembly_stats.shift
assembly_run_stats.sample_stats[sample_name].number_of_contigs = assembly_sample_stats[:number_of_contigs]
assembly_run_stats.sample_stats[sample_name].mean_contig_size = assembly_sample_stats[:mean_contig_size]
assembly_run_stats.sample_stats[sample_name].n50 = assembly_sample_stats[:n50]
assembly_run_stats.sample_stats[sample_name].number_of_bases = assembly_sample_stats[:number_of_bases]
end
return assembly_run_stats
end
|
#parse_resequencing_run_stats(xml_file, original_sample_names = nil) ⇒ Object
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# File 'lib/miseq_run_stats.rb', line 7
def parse_resequencing_run_stats(xml_file, original_sample_names = nil)
xml = Nokogiri::XML(File.read(xml_file))
resequencing_run_stats = ResequencingRunStats.new
xml.search('//RunStats').each do |run_stats|
resequencing_run_stats.number_of_bases = run_stats.search('YieldInBasesPF').text.to_f/1000000000
resequencing_run_stats.number_of_clusters = run_stats.search('NumberOfClustersPF').text.to_i
end
resequencing_run_stats.sample_stats = Hash.new
xml.search('//SummarizedSampleStatisics').each do |summarised_samples_stats|
sample_name = summarised_samples_stats.search('SampleName').text
sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil?
resequencing_run_stats.sample_stats[sample_name] = ResequencingSampleStats.new
resequencing_run_stats.sample_stats[sample_name].sample_name = sample_name
resequencing_run_stats.sample_stats[sample_name].number_of_clusters = summarised_samples_stats.search('NumberOfClustersPF').text
resequencing_run_stats.sample_stats[sample_name].number_of_forward_reads_aligned = summarised_samples_stats.search('ClustersAlignedR1').text
resequencing_run_stats.sample_stats[sample_name].number_of_reverse_reads_aligned = summarised_samples_stats.search('ClustersAlignedR2').text
resequencing_run_stats.sample_stats[sample_name].coverage = summarised_samples_stats.search('WeightedCoverage').text
resequencing_run_stats.sample_stats[sample_name].number_of_snps = summarised_samples_stats.search('NumberHomozygousSNPs').text
end
return resequencing_run_stats
end
|