Class: IMW::Tools::Summarizer
- Inherits:
-
Object
- Object
- IMW::Tools::Summarizer
- Defined in:
- lib/imw/tools/summarizer.rb
Overview
A class for producing summary data about a collection of resources.
This summary data includes the directory tree, file sizes, file formats, record counts, &c.
Instance Attribute Summary collapse
-
#inputs ⇒ Object
The inputs to this Summarizer.
Instance Method Summary collapse
-
#clear_cached_statistics! ⇒ Object
Reset all the cached statistics of this summarizer to
nil
. -
#extension_counts ⇒ Hash
Return the file counts of each extension.
-
#extension_sizes ⇒ Hash
Return the amount of data corresponding to each extension.
-
#initialize(*inputs) ⇒ IMW::Tools::Summarizer
constructor
Initialize a new Summarizer with the given
inputs
. -
#most_common_data_format ⇒ String
Returns a guess as to the most common data format for this Summarizer’s inputs.
-
#most_common_extension ⇒ String
Return a guess as to the most common extension format for this Summarizer’s inputs.
-
#most_common_extension_by_count ⇒ Object
Return the most common extension by count of files.
-
#most_common_extension_by_size ⇒ String
Return the most common extension by amount of data.
-
#normalized_extension_counts ⇒ Hash
Return the file counts of each extension, normalized by the total number of files.
-
#normalized_extension_sizes ⇒ Hash
Return the fractional share of each extension by file size.
-
#num_directories ⇒ Integer
Return the number of directories.
-
#num_files ⇒ Integer
Return the number of files.
-
#total_size ⇒ Integer
Return the total size.
Constructor Details
#initialize(*inputs) ⇒ IMW::Tools::Summarizer
Initialize a new Summarizer with the given inputs
.
18 19 20 |
# File 'lib/imw/tools/summarizer.rb', line 18 def initialize *inputs self.inputs = inputs.flatten end |
Instance Attribute Details
#inputs ⇒ Object
The inputs to this Summarizer.
12 13 14 |
# File 'lib/imw/tools/summarizer.rb', line 12 def inputs @inputs end |
Instance Method Details
#clear_cached_statistics! ⇒ Object
Reset all the cached statistics of this summarizer to nil
.
37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/imw/tools/summarizer.rb', line 37 def clear_cached_statistics! [:num_files, :num_direcories, :total_size, :extension_counts, :most_common_extension_by_count, :normalized_extension_counts, :extension_sizes, :most_common_extension_by_size, :normalized_extension_sizes].each do |instance_variable| self.instance_variable_set("@#{instance_variable}", nil) end end |
#extension_counts ⇒ Hash
Return the file counts of each extension.
75 76 77 78 79 80 81 82 83 |
# File 'lib/imw/tools/summarizer.rb', line 75 def extension_counts @extension_counts ||= returning({}) do |counts| inputs.each do |input| next if input.is_directory? counts[input.extension] = 0 unless counts.has_key?(input.extension) counts[input.extension] += 1 end end end |
#extension_sizes ⇒ Hash
Return the amount of data corresponding to each extension.
111 112 113 114 115 116 117 118 119 |
# File 'lib/imw/tools/summarizer.rb', line 111 def extension_sizes @extension_sizes ||= returning({}) do |sizes| inputs.each do |input| next if input.is_directory? sizes[input.extension] = 0 unless sizes.has_key?(input.extension) sizes[input.extension] += input.size end end end |
#most_common_data_format ⇒ String
Returns a guess as to the most common data format for this Summarizer’s inputs.
162 163 164 165 |
# File 'lib/imw/tools/summarizer.rb', line 162 def most_common_data_format extension = most_common_extension ['tar', 'tar.bz2', 'tar.gz', 'tgz', 'tbz2', 'zip', 'rar'].include?(extension) ? 'archive' : extension end |
#most_common_extension ⇒ String
Return a guess as to the most common extension format for this Summarizer’s inputs.
149 150 151 152 153 154 155 156 |
# File 'lib/imw/tools/summarizer.rb', line 149 def most_common_extension return most_common_extension_by_size if most_common_extension_by_size == most_common_extension_by_count # no contest count_fraction = normalized_extension_counts[most_common_extension_by_count] size_fraction = normalized_extension_sizes[most_common_extension_by_size] return most_common_extension_by_count if count_fraction > 0.5 and size_fraction < 0.5 # choose the winner based on differential return most_common_extension_by_size if count_fraction < 0.5 and size_fraction > 0.5 most_common_extension_by_size # default to size end |
#most_common_extension_by_count ⇒ Object
Return the most common extension by count of files.
86 87 88 89 90 91 92 93 94 |
# File 'lib/imw/tools/summarizer.rb', line 86 def most_common_extension_by_count return @most_common_extension_by_count if @most_common_extension_by_count current_count, current_extension = 0, nil extension_counts.each_pair do |extension, count| current_extension = extension if count > current_count end if current_extension.strip.blank? then current_extension = 'flat' end @most_common_extension_by_count = current_extension end |
#most_common_extension_by_size ⇒ String
Return the most common extension by amount of data.
124 125 126 127 128 129 130 131 132 |
# File 'lib/imw/tools/summarizer.rb', line 124 def most_common_extension_by_size return @most_common_extension_by_size if @most_common_extension_by_size current_size, current_extension = 0, nil extension_sizes.each_pair do |extension, size| current_extension = extension if size > current_size end if current_extension.strip.blank? then current_extension = 'flat' end @most_common_extension_by_size = current_extension end |
#normalized_extension_counts ⇒ Hash
Return the file counts of each extension, normalized by the total number of files.
100 101 102 103 104 105 106 |
# File 'lib/imw/tools/summarizer.rb', line 100 def normalized_extension_counts @normalized_extension_counts ||= returning({}) do |weighted| extension_counts.each_pair do |extension, count| weighted[extension] = count.to_f / num_files.to_f end end end |
#normalized_extension_sizes ⇒ Hash
Return the fractional share of each extension by file size.
137 138 139 140 141 142 143 |
# File 'lib/imw/tools/summarizer.rb', line 137 def normalized_extension_sizes @normalized_extension_sizes ||= returning({}) do |weighted| extension_sizes.each_pair do |extension, size| weighted[extension] = size.to_f / total_size.to_f end end end |
#num_directories ⇒ Integer
Return the number of directories.
61 62 63 |
# File 'lib/imw/tools/summarizer.rb', line 61 def num_directories @num_directories ||= inputs.collect { |input| input.is_directory? } end |
#num_files ⇒ Integer
Return the number of files.
54 55 56 |
# File 'lib/imw/tools/summarizer.rb', line 54 def num_files @num_files ||= inputs.size end |
#total_size ⇒ Integer
Return the total size.
68 69 70 |
# File 'lib/imw/tools/summarizer.rb', line 68 def total_size @total_size ||= inputs.map(&:size).inject(0) { |e, sum| sum += e } end |