Module: TSV
- Extended by:
- Annotation
- Defined in:
- lib/scout/tsv.rb,
lib/scout/tsv/csv.rb,
lib/scout/tsv/open.rb,
lib/scout/tsv/util.rb,
lib/scout/tsv/index.rb,
lib/scout/tsv/attach.rb,
lib/scout/tsv/dumper.rb,
lib/scout/tsv/parser.rb,
lib/scout/tsv/stream.rb,
lib/scout/tsv/traverse.rb,
lib/scout/tsv/change_id.rb,
lib/scout/tsv/util/melt.rb,
lib/scout/tsv/util/sort.rb,
lib/scout/tsv/util/unzip.rb,
lib/scout/tsv/transformer.rb,
lib/scout/tsv/util/filter.rb,
lib/scout/tsv/util/select.rb,
lib/scout/association/item.rb,
lib/scout/tsv/util/process.rb,
lib/scout/tsv/util/reorder.rb,
lib/scout/tsv/change_id/translate.rb
Defined Under Namespace
Classes: Dumper, Parser, Transformer
Constant Summary collapse
- KEY_PARAMETERS =
begin params = [] (method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name| params << name if type == :key end params end
Class Method Summary collapse
- .all_fields(file) ⇒ Object
- .attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) ⇒ Object
- .cast_value(value, cast) ⇒ Object
- .change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) ⇒ Object
- .change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) ⇒ Object
- .collapse_stream(stream, *args, **kwargs, &block) ⇒ Object
- .concat_streams(streams) ⇒ Object
- .csv(obj, options = {}) ⇒ Object
- .field_match_counts(file, values, options = {}) ⇒ Object
- .identifier_files(obj) ⇒ Object
- .identify_field(key_field, fields, name, strict: nil) ⇒ Object
- .identify_field_in_obj(obj, field) ⇒ Object
- .incidence(tsv, **kwargs) ⇒ Object
- .index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) ⇒ Object
- .match_keys(source, other, match_key: nil, other_key: nil) ⇒ Object
- .open(file, options = {}) ⇒ Object
- .original_setup ⇒ Object
- .parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block) ⇒ Object
- .parse_header(stream, fix: true, header_hash: '#', sep: "\t") ⇒ Object
- .parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) ⇒ Object
- .parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) ⇒ Object
- .paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil) ⇒ Object
- .pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
- .process_stream(stream, header_hash: "#", &block) ⇒ Object
- .range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
- .select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) ⇒ Object
- .select_prefix_str(select) ⇒ Object
- .setup(obj, *rest, &block) ⇒ Object
- .str2options(str) ⇒ Object
- .str_setup(option_str, obj) ⇒ Object
- .translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) ⇒ Object
- .translation_index(files, source, target, persist_options = {}) ⇒ Object
- .translation_path(file_fields, source, target) ⇒ Object
- .traverse(*args, **kwargs, &block) ⇒ Object
- .unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) ⇒ Object
Instance Method Summary collapse
- #[](key, *rest) ⇒ Object
- #add_field(name = nil) ⇒ Object
- #all_fields ⇒ Object
- #attach(*args, **kwargs) ⇒ Object
- #change_id(*args, **kwargs) ⇒ Object
- #change_key(*args, **kwargs) ⇒ Object
- #chunked_values_at(keys, max = 5000) ⇒ Object
- #collapse_stream(*args, **kwargs, &block) ⇒ Object
- #collect(*args, &block) ⇒ Object
- #column(field, **kwargs) ⇒ Object
- #digest_str ⇒ Object
- #dumper_stream(options = {}) ⇒ Object (also: #stream)
- #each(*args, &block) ⇒ Object
- #filter(filter_dir = nil) ⇒ Object
- #fingerprint ⇒ Object
- #identifier_files ⇒ Object
- #identify_field(name, strict: nil) ⇒ Object
- #index(*args, **kwargs, &block) ⇒ Object
- #inspect ⇒ Object
- #melt_columns(value_field, column_field) ⇒ Object
- #merge(other) ⇒ Object
- #options ⇒ Object
- #page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object
- #pos_index(*args, **kwargs, &block) ⇒ Object
- #process(field, &block) ⇒ Object
- #range_index(*args, **kwargs, &block) ⇒ Object
- #remove_duplicates(pivot = 0) ⇒ Object
- #reorder(key_field = nil, fields = nil, merge: true, one2one: true, **kwargs) ⇒ Object
- #reset_filters ⇒ Object
- #select(method = nil, invert = false, &block) ⇒ Object
- #slice(fields, **kwargs) ⇒ Object
- #sort(field = nil, just_keys = false, &block) ⇒ Object
- #sort_by(field = nil, just_keys = false, &block) ⇒ Object
- #subset(keys) ⇒ Object
- #summary ⇒ Object
- #to_double ⇒ Object
- #to_flat ⇒ Object
- #to_hash ⇒ Object
- #to_list ⇒ Object
- #to_s(options = {}) ⇒ Object
- #to_single ⇒ Object
- #translate(*args, **kwargs) ⇒ Object
- #transpose(key_field = "Unkown ID") ⇒ Object
- #transpose_double(key_field = "Unkown ID") ⇒ Object
- #transpose_list(key_field = "Unkown ID") ⇒ Object
- #traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: false, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) ⇒ Object (also: #through)
- #unzip(*args, **kwargs) ⇒ Object
- #unzip_replicates ⇒ Object
- #with_filters(filters, &block) ⇒ Object
- #with_unnamed(unnamed = true) ⇒ Object
- #write_file(file) ⇒ Object
- #zip(merge = false, field = "New Field", sep = ":") ⇒ Object
- #zip_new(key, values, insitu: :lax) ⇒ Object
Methods included from Annotation
list_tsv_values, load_info, load_tsv, load_tsv_values, obj_tsv_values, resolve_tsv_array, tsv
Class Method Details
.all_fields(file) ⇒ Object
152 153 154 155 156 157 158 |
# File 'lib/scout/tsv/util.rb', line 152 def self.all_fields(file) if file.respond_to?(:all_fields) file.all_fields else TSV.parse_header(file)["all_fields"] end end |
.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/scout/tsv/attach.rb', line 45 def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source other = TSV::Parser.new other unless TSV === other || TSV::Parser === other fields = [fields] if String === fields match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key) if ! (TSV === other) other_key_name = other_key == :key ? other.key_field : other.fields[other_key] other = TSV.open other, key_field: other_key_name, fields: fields, one2one: true, persist: persist_input other_key = :key if other.key_field == source.key_field end if TSV::Transformer === source source.dumper = case target when :stream TSV::Dumper.new(source..merge(sep: "\t")) when nil TSV.setup({}, **source..dup) else target end end other.with_unnamed do source.with_unnamed do other_key_name = other_key == :key ? other.key_field : other_key other_key_name = other.fields[other_key_name] if Integer === other_key fields = other.all_fields - [other_key_name, source.key_field] if fields.nil? match_key_name = match_key == :key ? source.key_field : match_key_name if index.nil? && ! source.identify_field(other_key_name) identifier_files = [] identifier_files << identifiers if identifiers identifier_files << source identifier_files << TSV.identifier_files(source) identifier_files << TSV.identifier_files(other) identifier_files << other index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name) end if other_key != :key other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double end other_field_positions = other.identify_field(fields.dup) = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})" Log.debug = if TrueClass === new = fields - source.fields source.fields = (source.fields + fields).uniq overlaps = source.identify_field(fields) orig_type = source.type type = source.type == :single ? :list : source.type empty_other_values = case type when :list [nil] * other.fields.length when :flat [] when :double [[]] * other.fields.length end empty_other_values = nil if other.type == :single insitu = TSV === source ? true : false if insitu.nil? insitu = false if source.type == :single match_key_pos = source.identify_field(match_key) source.traverse bar: , unnamed: true do |orig_key,current_values| current_values = [current_values] if source.type == :single keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos] keys = [keys].compact unless Array === keys keys = index.chunked_values_at(keys).flatten if index current_values = current_values.dup unless insitu keys = [nil] if keys.empty? keys.each do |current_key| other_values = current_key.nil? ? empty_other_values : other[current_key] if other_values.nil? other_values = empty_other_values elsif other.type == :flat other_values = [other_values] elsif other.type == :list && source.type == :double other_values = other_values.collect{|v| [v] } elsif other.type == :double && source.type == :list other_values = other_values.collect{|v| v.first } end other_values = other_field_positions.collect do |pos| if pos == :key current_key else other.type == :single ? other_values : other_values[pos] end end other_values.zip(overlaps).each do |v,overlap| if type == :list current_values[overlap] = v if current_values[overlap].nil? || (String === current_values[overlap] && current_values[overlap].empty?) elsif type == :flat next if v.nil? v = [v] unless Array === v current_values.concat v else current_values[overlap] ||= [] next if v.nil? v = [v] unless Array === v current_values[overlap].concat (v - current_values[overlap]) end end end source[orig_key] = current_values unless insitu nil end if complete && match_key == :key empty_self_values = case type when :list [nil] * source.fields.length when :flat [] when :double [[]] * source.fields.length end other.each do |other_key,other_values| next if source.include?(other_key) if other.type == :flat other_values = [other_values] elsif other.type == :single other_values = [other_values] elsif other.type == :list && type == :double other_values = other_values.collect{|v| [v] } elsif other.type == :double && type == :list other_values = other_values.collect{|v| v.first } end new_values = case type when :list [nil] * source.fields.length when :flat [] when :double source.fields.length.times.collect{ [] } end other_values.zip(overlaps).each do |v,overlap| next if v.nil? if overlap == :key other_key = Array === v ? v : v.first elsif type == :list new_values[overlap] = v if new_values[overlap].nil? || (String === new_values[overlap] && new_values[overlap].empty?) else v = [v] unless Array === v new_values[overlap].concat v end end source[other_key] = new_values end end source.type = type end end source end |
.cast_value(value, cast) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 |
# File 'lib/scout/tsv/parser.rb', line 3 def self.cast_value(value, cast) if Array === value value.collect{|e| cast_value(e, cast) } else if Proc === cast cast.call value else value.send(cast) end end end |
.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) ⇒ Object
30 31 32 33 34 35 36 37 38 |
# File 'lib/scout/tsv/change_id.rb', line 30 def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) source = TSV::Parser.new source if String === source identifiers = identifiers.nil? ? source.identifiers : identifiers new_fields = source.fields.dup new_fields[new_fields.index(source_id)] = new_id return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields) end |
.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/scout/tsv/change_id.rb', line 4 def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) source = TSV::Parser.new source if String === source identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers) if identifiers && source.identify_field(new_key_field, strict: true).nil? identifiers = identifiers.nil? ? source.identifiers : identifiers new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers) new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge) return new end fields = source.fields.dup - [new_key_field] fields.unshift source.key_field if keep transformer = TSV::Transformer.new source transformer.key_field = new_key_field transformer.fields = fields transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v| [k, v] end stream ? transformer : transformer.tsv(merge: merge, one2one: one2one) end |
.collapse_stream(stream, *args, **kwargs, &block) ⇒ Object
209 210 211 212 213 214 215 |
# File 'lib/scout/tsv/open.rb', line 209 def self.collapse_stream(stream, *args, **kwargs, &block) stream = stream.stream if stream.respond_to?(:stream) self.process_stream(stream) do |sin, line| collapsed = Open.collapse_stream(stream, line: line) Open.consume_stream(collapsed, false, sin) end end |
.concat_streams(streams) ⇒ Object
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/scout/tsv/stream.rb', line 212 def self.concat_streams(streams) streams = streams.collect do |stream| case stream when(defined? Step and Step) stream.stream when Path stream.open when TSV::Dumper stream.stream when TSV stream.dumper_stream else stream end end.compact done_streams = [] Open.open_pipe do |sin| first_stream = streams.first while line = first_stream.gets sin.write line break unless line[0] == "#" end while streams.any? streams.each do |stream| line = stream.gets sin.write line unless line[0] == "#" end streams.delete_if{|stream| stream.eof? } end end end |
.csv(obj, options = {}) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/scout/tsv/csv.rb', line 4 def self.csv(obj, = {}) = IndiferentHash.add_defaults , :headers => true, :type => :list headers = [:headers] noheaders = ! headers type = .delete :type cast = .delete :cast merge = .delete :merge key_field = .delete :key_field fields = .delete :fields if key_field || fields orig_type = type type = :double merge = true end [:headers] = false csv = case obj when Path CSV.read obj.find.open, ** when String if Open.remote?(obj) CSV.read Open.open(obj), ** elsif Path.is_filename?(obj) CSV.read obj, ** else CSV.new obj, ** end else CSV.new obj, ** end tsv = if noheaders TSV.setup({}, :key_field => nil, :fields => nil, :type => type) else key, *csv_fields = csv.shift TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type) end csv.each_with_index do |row,i| if noheaders key, values = ["row-#{i}", row] else key, *values = row end if cast values = values.collect{|v| v.send cast } end case type when :double, :flat tsv.zip_new(key, values) when :single tsv[key] = values.first when :list tsv[key] = values end end if key_field || fields tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true) if tsv.type != orig_type tsv = case orig_type when :list tsv.to_list when :single tsv.to_single when :list tsv.to_list when :flat tsv.to_flat end end end tsv end |
.field_match_counts(file, values, options = {}) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/scout/tsv/util.rb', line 13 def self.field_match_counts(file, values, = {}) = IndiferentHash.add_defaults , :persist_prefix => "Field_Matches" = IndiferentHash.pull_keys , :persist filename = TSV === file ? file.filename : file path = Persist.persist filename, :string, .merge(:no_load => true) do tsv = TSV === file ? file : TSV.open(file, ) text = "" fields = nil tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names| names.zip(fields).each do |list, format| list = [list] unless Array === list list.delete_if do |name| name.empty? end next if list.empty? text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n" end text << [gene, tsv.key_field] * "\t" << "\n" end text end TmpFile.with_file(values.uniq * "\n", false) do |value_file| cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'" begin TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i) rescue Log.exception $! TSV.setup({}, :type => :single, :cast => :to_i) end end end |
.identifier_files(obj) ⇒ Object
252 253 254 255 256 257 258 259 260 |
# File 'lib/scout/tsv/attach.rb', line 252 def self.identifier_files(obj) if TSV === obj obj.identifier_files elsif Path === obj obj.dirname.identifiers else nil end end |
.identify_field(key_field, fields, name, strict: nil) ⇒ Object
46 47 48 49 50 |
# File 'lib/scout/tsv/util.rb', line 46 def self.identify_field(key_field, fields, name, strict: nil) return :key if name == :key || (! strict && NamedArray.field_match(key_field, name)) name.collect!{|n| NamedArray.field_match(key_field, n) ? :key : n } if Array === name NamedArray.identify_name(fields, name, strict: strict) end |
.identify_field_in_obj(obj, field) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/scout/tsv/change_id/translate.rb', line 3 def self.identify_field_in_obj(obj, field) case obj when TSV obj.identify_field(field) when TSV::Parser, TSV::Dumper TSV.identify_field(obj.key_field, obj.fields, field) when Path, String all_fields = TSV.parse_header(obj)["all_fields"] identify_field_in_obj(all_fields, field) when Array key_field, *fields = obj TSV.identify_field(key_field, fields, field) end end |
.incidence(tsv, **kwargs) ⇒ Object
224 225 226 |
# File 'lib/scout/association/item.rb', line 224 def self.incidence(tsv, **kwargs) AssociationItem.incidence Association.index(tsv, **kwargs).keys end |
.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/scout/tsv/index.rb', line 40 def self.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true engine = IndiferentHash. kwargs, :engine fields = :all if fields.nil? prefix = case fields when :all "Index[#{target}]" else "Index[#{Log.fingerprint(fields)}->#{target}]" end prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :engine => :HDB, :persist => false = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:engine], .merge(other_options: kwargs.merge(target: target, fields: fields, order: order, data_options: ))) do |filename| if filename index = ScoutCabinet.open(filename, true, engine) TSV.setup(index, :type => :single) index.extend TSVAdapter else index = TSV.setup({}, :type => :single) end tsv_file = TSV.open(tsv_file, **) if ! TSV === tsv_file log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" Log.low log_msg = log_msg if TrueClass === if order tmp_index = {} include_self = fields == :all || (Array === fields) && fields.include?(target) target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, bar: , **kwargs do |k,values| tmp_index[k] ||= [[k]] if include_self values.each_with_index do |list,i| i += 1 if include_self list.each do |e| tmp_index[e] ||= [] tmp_index[e][i] ||= [] tmp_index[e][i] << k end end end tmp_index.each do |e,list| index[e] = list.flatten.compact.uniq.first end index.key_field = source_field_names * "," index.fields = [target_key_field] tmp_index = {} else target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: , **kwargs do |k,values| values.each do |e| index[e] = k unless index.include?(e) end end end index.key_field = source_field_names * "," index.fields = [target_key_field] index end end |
.match_keys(source, other, match_key: nil, other_key: nil) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/scout/tsv/attach.rb', line 3 def self.match_keys(source, other, match_key: nil, other_key: nil) #match_key = (source.all_fields & other.all_fields).first if match_key.nil? if match_key.nil? match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first match_key = source.all_fields[match_key_pos] if match_key_pos end if match_key.nil? source.all_fields.collect do |f| other_key = other.identify_field(f) if other_key other_key = other.key_field if other_key == :key match_key = f break end end end if match_key.nil? other.all_fields.collect do |f| match_key = source.identify_field(f) if match_key other_key = f break end end end match_key = source.key_field if match_key.nil? if other_key.nil? other_key = other.identify_field(match_key) end other_key = other.key_field if other_key.nil? match_key = :key if NamedArray.field_match(match_key, source.key_field) other_key = :key if NamedArray.field_match(other_key, other.key_field) [match_key, other_key] end |
.open(file, options = {}) ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/scout/tsv.rb', line 76 def self.open(file, = {}) grep, invert_grep, nocache, monitor, = IndiferentHash. , :grep, :invert_grep, :nocache, :monitor, :entity_options = IndiferentHash.pull_keys , :persist = IndiferentHash.add_defaults , prefix: "TSV", type: :HDB, persist: false [:data] ||= [:data] file = StringIO.new file if String === file && ! (Path === file) && file.index("\n") source_name, = case file when StringIO [file.inspect, ] when TSV::Parser [file.[:filename], file.] else [file, ] end Persist.tsv(source_name, , persist_options: ) do |data| [:data] = data if data [:filename] ||= if TSV::Parser === file file.[:filename] elsif Path === file file elsif file.respond_to?(:filename) file.filename elsif Path.is_filename?(file) file else nil end if data Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}" else Log.debug "TSV open #{Log.fingerprint file}" end tsv = if TSV::Parser === file TSV.parse(file, **) else [:tsv_invert_grep] ||= invert_grep if invert_grep Open.open(file, grep: grep, invert_grep: invert_grep, nocache: nocache) do |f| TSV.parse(f, **) end end tsv end end |
.original_setup ⇒ Object
34 |
# File 'lib/scout/tsv.rb', line 34 alias original_setup setup |
.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block) ⇒ Object
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 |
# File 'lib/scout/tsv/parser.rb', line 438 def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block) parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep) cast = kwargs[:cast] cast = parser.[:cast] if cast.nil? identifiers = kwargs.delete(:identifiers) type = kwargs[:type] ||= parser.[:type] ||= :double if (data = kwargs[:data]) && data.respond_to?(:persistence_class) TSV.setup(data, type: type) data.extend TSVAdapter serializer ||= if cast case [cast, type] when [:to_i, :single] :integer when [:to_i, :list], [:to_i, :flat] :integer_array when [:to_f, :single] :float when [:to_f, :list], [:to_f, :flat] :float_array when [:to_f, :double], [:to_i, :double] :marshal else type end else type end data.serializer = TSVAdapter::SERIALIZER_ALIAS[serializer] || serializer end kwargs[:data] = {} if kwargs[:data].nil? data = parser.traverse **kwargs, &block data.type = type data.cast = cast data.filename = filename || parser.[:filename] data.namespace = namespace || parser.[:namespace] data.identifiers = identifiers data.unnamed = unnamed data.save_annotation_hash if data.respond_to?(:save_annotation_hash) data end |
.parse_header(stream, fix: true, header_hash: '#', sep: "\t") ⇒ Object
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
# File 'lib/scout/tsv/parser.rb', line 230 def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t") sep = "\t" if sep.nil? if (Path === stream) || ((String === stream) && Path.is_filename?(stream)) Open.open(stream) do |f| return parse_header(f, fix: fix, header_hash: header_hash, sep: sep) end end if IO === stream && stream.closed? stream.join if stream.respond_to?(:join) raise "Closed stream" end opts = {} preamble = [] # Get line begin #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream line = stream.gets return {} if line.nil? line = Misc.fixutf8 line.chomp if fix # Process options line if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/)) opts = IndiferentHash.string2hash m.captures.first.chomp line = stream.gets if line && fix if Proc === fix line = fix.call line else line = Misc.fixutf8 line.chomp if line && fix end end end # Determine separator sep = opts[:sep] if opts[:sep] # Process fields line preamble << line if line while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash))) fields = line.split(sep, -1) key_field = fields.shift key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty? line = (header_hash != "" ? stream.gets : nil) line = Misc.fixutf8 line.chomp if line preamble << line if line break if TrueClass === header_hash || header_hash == "" end preamble = preamble[0..-3] * "\n" line ||= stream.gets first_line = line opts[:type] = opts[:type].to_sym if opts[:type] opts[:cast] = opts[:cast].to_sym if opts[:cast] all_fields = [key_field] + fields if key_field && fields NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields], %w(options key_field fields first_line preamble all_fields)) rescue Exception raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception stream.abort($!) if stream.respond_to?(:abort) raise $! end end |
.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/scout/tsv/parser.rb', line 15 def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) items = line.split(sep, -1) return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2) if positions.nil? && key == 0 key = items.shift elsif positions.nil? if type == :flat key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten items = items.slice(0,1) else key = items.delete_at(key) end key = key.split(sep2) if type == :double else key, items = items[key], items.values_at(*positions) key = key.split(sep2) if type == :double || type == :flat end items = case type when :list items when :single items.first when :flat items.collect{|i| i.split(sep2, -1) }.flatten when :double items.collect{|i| i.nil? ? [] : i.split(sep2, -1) } end if cast items = cast_value(items, cast) end [key, items] end |
.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
# File 'lib/scout/tsv/parser.rb', line 54 def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) begin = "Parsing #{Log.fingerprint stream}" if TrueClass === = Log::ProgressBar.(stream, ) if .init if source_type = type if source_type.nil? type_swap_key = [source_type.to_s, type.to_s] * "_" same_type = source_type.to_s == type.to_s if data && data.respond_to?(:load_stream) && data.serializer.to_s.include?("String") && same_type && ! (head || kwargs[:cast] || kwargs[:positions] || (kwargs[:key] && kwargs[:key] != 0) || Proc === fix ) && (kwargs[:sep].nil? || kwargs[:sep] == "\t") Log.debug "Loading #{Log.fingerprint stream} directly into #{Log.fingerprint data}" if first_line full_stream = Open.open_pipe do |sin| sin.puts first_line Open.consume_stream(stream, false, sin) end data.load_stream(full_stream) else data.load_stream(stream) end return data end data = {} if data.nil? merge = false if type != :double && type != :flat line = first_line || stream.gets while line break if head && head <= 0 begin line.chomp! if Proc === fix line = fix.call line elsif fix line = Misc.fixutf8(line) end .tick if if type == :array || type == :line block.call line next end key, items = parse_line(line, type: source_type, field_names: field_names, **kwargs) next if key.nil? if Array === key keys = key if one2one key_items = keys.length.times.collect{|i| items.collect{|list| [list[i] || list[0]] } } else key_items = false end else keys = [key] key_items = false end keys.each_with_index do |key,i| if key_items these_items = key_items[i] else these_items = items end these_items = case type_swap_key when "single_single" these_items when "list_single" these_items.first when "flat_single" these_items.first when "double_single" these_items.first.first when "single_list" [these_items] when "list_list" these_items when "flat_list" these_items when "double_list" these_items.collect{|l| l.first } when "single_flat" [these_items] when "list_flat" these_items when "flat_flat" these_items when "double_flat" these_items.flatten when "single_double" [[these_items]] when "list_double" these_items.collect{|l| l.nil? ? [] : [l] } when "flat_double" [these_items] when "double_double" these_items end if block_given? res = block.call(key, these_items, field_names) data[key] = res unless res.nil? || FalseClass === data next end if ! merge || ! data.include?(key) these_items = these_items.collect{|i| i.empty? ? [nil] : i } if type == :double && one2one data[key] = these_items elsif type == :double current = data[key] if merge == :concat these_items.each_with_index do |new,i| new = [nil] if new.empty? current[i].concat(new) end else merged = [] these_items.each_with_index do |new,i| new = [nil] if new.empty? merged[i] = current[i] + new end data[key] = merged end elsif type == :flat current = data[key] if merge == :concat current[i].concat these_items else data[key] = current + these_items end end end rescue Exception raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception stream.abort($!) if stream.respond_to?(:abort) raise $! ensure head = head - 1 if head if stream.closed? line = nil else line = stream.gets end end end data ensure if stream.respond_to?(:stream_exception) && stream.stream_exception .remove(stream.stream_exception) else .remove end if if stream.respond_to?(:join) eof = begin stream.eof? rescue IOError true end stream.join if eof end end end |
.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/scout/tsv/stream.rb', line 2 def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil) streams = streams.collect do |stream| case stream when(defined? Step and Step) stream.stream when Path stream.open when TSV::Dumper stream.stream when TSV stream.dumper_stream else stream end end.compact num_streams = streams.length streams = streams.collect do |stream| Open.sort_stream(stream, memory: sort_memory) end if sort begin lines =[] fields =[] sizes =[] key_fields =[] =[] empty =[] preambles =[] parser_types =[] type ||= :double streams = streams.collect do |stream| parser = TSV::Parser.new stream, type: type, sep: sep sfields = parser.fields if field_prefix index = streams.index stream prefix = field_prefix[index] sfields = sfields.collect{|f|[prefix, f]* ":"} end first_line = parser.first_line first_line = nil if first_line == "" lines << first_line key_fields << parser.key_field fields << sfields sizes << sfields.length if sfields << parser. preambles << parser.preamble if preamble and not parser.preamble.empty? parser_types << parser.type empty << stream if parser.first_line.nil? || parser.first_line.empty? stream end all_fields = fields.dup key_field = key_fields.compact.first if same_fields fields = fields.first else fields = fields.compact.flatten end = .first type ||= [:type] type ||= :list if type == :single type ||= :double if type == :flat preamble_txt = case preamble when TrueClass preambles * "\n" when String if preamble[0]== '+' preambles * "\n" + "\n" + preamble[1..-1] else preamble end else nil end empty_pos = empty.collect{|stream| streams.index stream} keys =[] parts =[] lines.each_with_index do |line,i| if line.nil? || line.empty? keys[i]= nil parts[i]= nil else vs = line.chomp.split(sep, -1) key, *p = vs keys[i]= key parts[i]= p end sizes[i] ||= parts[i].length unless parts[i].nil? end done_streams =[] fields = nil if fields && fields.empty? dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type dumper.init(preamble: preamble_txt || !!key_field) t = Thread.new do Thread.report_on_exception = false Thread.current["name"] = "Paste streams" last_min = nil while lines.reject{|line| line.nil?}.any? min = keys.compact.sort.first break if min.nil? new_values =[] skip = all_match && keys.uniq !=[min] keys.each_with_index do |key,i| case key when min new_values << parts[i] begin line = lines[i]= begin streams[i].gets rescue Log.exception $! nil end if line.nil? keys[i]= nil parts[i]= nil else k, *p = line.chomp.split(sep, -1) p = p.collect{|e| e.nil? ? "" : e } if k == keys[i] new_values = NamedArray.zip_fields(new_values).zip(p).collect{|p| [p.flatten * "|"] } raise TryAgain end keys[i]= k parts[i]= p end rescue TryAgain keys[i]= nil parts[i]= nil Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}" retry end else p = [nil] * sizes[i] new_values << p end end next if skip if same_fields new_values_same = [] new_values.each do |list| list.each_with_index do |l,i| new_values_same[i] ||= [] new_values_same[i] << l end end new_values = new_values_same else new_values = new_values.inject([]){|acc,l| acc.concat l } end dumper.add min, new_values end dumper.close streams.each do |stream| stream.close if stream.respond_to?(:close) && ! stream.closed? stream.join if stream.respond_to? :join end end rescue Aborted Log.error "Aborted pasting streams #{streams.inspect}: #{$!.}" streams.each do |stream| stream.abort if stream.respond_to? :abort end raise $! rescue Exception Log.error "Exception pasting streams #{streams.inspect}: #{$!.}" streams.each do |stream| stream.abort if stream.respond_to? :abort end raise $! end Thread.pass until t["name"] ConcurrentStream.setup(dumper.stream, threads: [t]) end |
.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# File 'lib/scout/tsv/index.rb', line 161 def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true type, data_persist = IndiferentHash. kwargs, :type prefix = "PositionIndex[#{pos_field}]" prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :type => :fwt, :persist => true = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:type], .merge(other_options: kwargs.merge(pos_field: pos_field, key_field: key_field))) do |filename| tsv_file = TSV.open(tsv_file, *) if [:persist] && ! TSV === tsv_file log_msg = "PositionIndex #{Log.fingerprint tsv_file} #{pos_field}" Log.low log_msg = log_msg if TrueClass === max_key_size = 0 index_data = [] TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :flat, cast: :to_i, bar: , **kwargs do |key, pos| key_size = key.length max_key_size = key_size if key_size > max_key_size if Array === pos pos.each do |p| index_data << [key, p] end else index_data << [key, pos] end end filename = :memory if filename.nil? index = FixWidthTable.get(filename, max_key_size, false) index.add_point index_data index.read index end end |
.process_stream(stream, header_hash: "#", &block) ⇒ Object
199 200 201 202 203 204 205 206 207 |
# File 'lib/scout/tsv/open.rb', line 199 def self.process_stream(stream, header_hash: "#", &block) sout = Open.open_pipe do |sin| while line = stream.gets break unless line.start_with?(header_hash) sin.puts line end yield sin, line end end |
.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/scout/tsv/index.rb', line 117 def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true type, data_persist = IndiferentHash. kwargs, :type, :data_persist prefix = "RangeIndex[#{start_field}-#{end_field}]" prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :type => :fwt, :persist => true = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:type], .merge(other_options: kwargs.merge(start_field: start_field, end_field: end_field, key_field: key_field))) do |filename| tsv_file = TSV.open(tsv_file, *) if [:persist] && ! TSV === tsv_file log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}" Log.low log_msg = log_msg if TrueClass === max_key_size = 0 index_data = [] TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: , unnamed: true, **kwargs do |key, values| key_size = key.length max_key_size = key_size if key_size > max_key_size start_pos, end_pos = values if Array === start_pos start_pos.zip(end_pos).each do |s,e| index_data << [key, [s.to_i, e.to_i]] end else index_data << [key, [start_pos.to_i, end_pos.to_i]] end end filename = :memory if filename.nil? index = FixWidthTable.get(filename, max_key_size, true) index.add_range index_data index.read index end end |
.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/scout/tsv/util/select.rb', line 2 def self.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) return ! select(key, values, method, field: field, invert: false, type: type, sep: sep, &block) if invert return yield(key, values) if method.nil? && block_given if Hash === method if method.include?(:invert) method = method.dup invert = method.delete(:invert) return select(key, values, method, fields: fields, field: field, invert: invert, type: type, sep: sep, &block) end field = method.keys.first value = method[field] return select(key, values, value, fields: fields, field: field, invert: invert, type: type, sep: sep, &block) end if field field = NamedArray.identify_name(fields, field) if fields && String === field set = field == :key ? [key] : (type == :double ? values[field].split(sep) : values[field]) else set = [key, (type == :double ? values.collect{|v| v.split(sep) } : values)] end if Array === set set.flatten! else set = [set] end case method when Array (method & set).any? when Regexp set.select{|v| v =~ method }.any? when Symbol set.first.send(method) when Numeric set.size > method when String if block_given? field = method field = fields.index?(field) if fields && String === field case when block.arity == 1 if (method == key_field or method == :key) yield(key) else yield(values[method]) end when block.arity == 2 if (method == key_field or method == :key) yield(key, key) else yield(key, values[method]) end end elsif m = method.match(/^([<>]=?)(.*)/) set.select{|v| v.to_f.send($1, $2.to_f) }.any? else set.select{|v| v == method }.any? end when Proc set.select{|v| method.call(v) }.any? end end |
.select_prefix_str(select) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/scout/tsv/index.rb', line 6 def self.select_prefix_str(select) str = begin case select when nil nil when Array case select.first when nil nil when Array select.collect{|p| p * "="}*"," else select.collect{|p| p.to_s }*"=" end when Hash if select.empty? nil else select.collect do |key,value| [key.to_s, value.to_s] * "=" end * "," end end rescue Log.warn "Error in select_prefix_str: #{Log.fingerprint(select)}: #{$!.}" str = nil end if str.nil? "" else "[select:#{str}]" end end |
.setup(obj, *rest, &block) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/scout/tsv.rb', line 36 def setup(obj, *rest, &block) if rest.length == 1 && String === rest.first = TSV.(rest.first) if Array === obj default_value = case [:type] when :double, :flat, :list, nil [] when :single nil end obj = Misc.array2hash(obj, default_value) end original_setup(obj, , &block) else if Array === obj = rest.first if Hash === rest.first ||= {} default_value = case [:type] when :double, :flat, :list, nil [] when :single nil end obj = Misc.array2hash(obj, default_value) end original_setup(obj, *rest, &block) end obj.save_annotation_hash if obj.respond_to?(:save_annotation_hash) obj end |
.str2options(str) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/scout/tsv.rb', line 21 def self.(str) ,_sep, rest = str.partition("#") key, fields_str = .split("~") fields = fields_str.nil? ? [] : fields_str.split(/,\s*/) rest = ":type=" << rest if rest =~ /^:?\w+$/ = rest.nil? ? {} : IndiferentHash.string2hash(rest) {:key_field => key, :fields => fields}.merge() end |
.str_setup(option_str, obj) ⇒ Object
71 72 73 74 |
# File 'lib/scout/tsv.rb', line 71 def self.str_setup(option_str, obj) = TSV.(option_str) setup(obj, **) end |
.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/scout/tsv/change_id/translate.rb', line 108 def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) identifiers ||= tsv.identifier_files index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index) key_field, *fields = TSV.all_fields(tsv) if field == key_field new_key_field = format new_fields = fields else new_key_field = key_field new_fields = fields.collect{|f| f == field ? format : f } end field_pos = new_key_field == key_field ? new_fields.index(format) : :key transformer = TSV::Transformer.new tsv transformer.key_field = new_key_field transformer.fields = new_fields transformer.traverse one2one: one2one, unnamed: true do |k,v| if field_pos == :key [index[k], v] else v = v.dup if Array === v[field_pos] v[field_pos] = index.values_at(*v[field_pos]).compact else v[field_pos] = index[v[field_pos]] end [k, v] end end stream ? transformer : transformer.tsv(merge: merge, one2one: one2one) end |
.translation_index(files, source, target, persist_options = {}) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/scout/tsv/change_id/translate.rb', line 49 def self.translation_index(files, source, target, = {}) return nil if source == target = IndiferentHash.add_defaults .dup, :persist => true, :prefix => "Translation index" file_fields = {} files = [files] unless Array === files files.each do |file| next if Path === file && ! Open.exist?(file) file = file.find if Path === file file_fields[file] = all_fields(file) end begin path = translation_path(file_fields, source, target) rescue exception = $! begin path = translation_path(file_fields, source, target) rescue raise exception end end name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})" second_target = if path.length == 1 target else file1, file2 = path.values_at 0, 1 pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2)) TSV.all_fields(file1)[pos.compact.first] end Persist.persist(name, "HDB", ) do index = path.inject(nil) do |acc,file| if acc.nil? if source.nil? if TSV === file acc = file.index target: second_target else acc = TSV.index(file, target: second_target) end else if TSV === file acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source) else acc = TSV.open(file, key_field: source) end end else acc = acc.attach file, insitu: false end acc end index.slice([target]).to_single end end |
.translation_path(file_fields, source, target) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/scout/tsv/change_id/translate.rb', line 18 def self.translation_path(file_fields, source, target) target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file } if source.nil? source_files = file_fields.keys else source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file } end if source && (one_step = target_files & source_files).any? [one_step.first] else source_fields = file_fields.values_at(*source_files).flatten target_fields = file_fields.values_at(*target_files).flatten if (common_fields = source_fields & target_fields).any? source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first [source_file, target_file] else file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? } middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first if middle_file source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first [source_file, middle_file, target_file] else raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target} in #{Log.fingerprint file_fields}" end end end end |
.traverse(*args, **kwargs, &block) ⇒ Object
195 196 197 |
# File 'lib/scout/tsv/open.rb', line 195 def self.traverse(*args, **kwargs, &block) Open.traverse(*args, **kwargs, &block) end |
.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/scout/tsv/util/unzip.rb', line 3 def self.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) source = TSV::Parser.new source if String === source field_pos = source.identify_field(field) new_fields = source.fields.dup field_name = new_fields[field_pos] new_fields.delete_at(field_pos) if delete new_key_field = [source.key_field, field_name] * sep type = :double if merge stream = target == :stream target = case target when :stream TSV::Dumper.new(source..merge(sep: "\t")) when nil TSV.setup({}) else target end target.fields = new_fields target.key_field = new_key_field target.type = type transformer = TSV::Transformer.new source, target, unnamed: true = "Unzip #{new_key_field}" if TrueClass === transformer.traverse unnamed: true, one2one: one2one, bar: do |k,v| if source.type == :double if one2one res = NamedArray.zip_fields(v).collect do |_v| field_value = _v[field_pos] if delete new_values = _v.dup new_values.delete_at field_pos else new_values = _v end new_key = [k,field_value] * sep new_values = new_values.collect{|e| [e] } if transformer.type == :double [new_key, new_values] end else all_values = v.collect{|e| e.dup } all_values.delete_at field_pos if delete res = NamedArray.zip_fields(v).collect do |_v| field_value = _v[field_pos] new_key = [k,field_value] * sep new_values = all_values if transformer.type == :double [new_key, new_values] end end MultipleResult.setup(res) else field_value = v[field_pos] if delete new_values = v.dup new_values.delete_at field_pos else new_values = v end new_key = [k,field_value] * sep new_values = new_values.collect{|e| [e] } if transformer.type == :double [new_key, new_values] end end stream ? transformer : transformer.tsv(merge: merge) end |
Instance Method Details
#[](key, *rest) ⇒ Object
56 57 58 59 60 |
# File 'lib/scout/tsv/util.rb', line 56 def [](key, *rest) v = super(key, *rest) NamedArray.setup(v, @fields, key) unless @unnamed || @type == :flat || ! (Array === v) v end |
#add_field(name = nil) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/scout/tsv/util/process.rb', line 46 def add_field(name = nil) through do |key, values| new_values = yield(key, values) new_values = [new_values].compact if type == :double and not Array === new_values case when (values.nil? and (fields.nil? or fields.empty?)) values = [new_values] when values.nil? values = [nil] * fields.length + [new_values] when Array === values values += [new_values] else values << new_values end self[key] = values end if not fields.nil? and not name.nil? new_fields = self.fields + [name] self.fields = new_fields end self end |
#all_fields ⇒ Object
147 148 149 150 |
# File 'lib/scout/tsv/util.rb', line 147 def all_fields return [] if @fields.nil? [@key_field] + @fields end |
#attach(*args, **kwargs) ⇒ Object
225 226 227 |
# File 'lib/scout/tsv/attach.rb', line 225 def attach(*args, **kwargs) TSV.attach(self, *args, **kwargs) end |
#change_id(*args, **kwargs) ⇒ Object
40 41 42 |
# File 'lib/scout/tsv/change_id.rb', line 40 def change_id(*args, **kwargs) TSV.change_id(self, *args, **kwargs) end |
#change_key(*args, **kwargs) ⇒ Object
26 27 28 |
# File 'lib/scout/tsv/change_id.rb', line 26 def change_key(*args, **kwargs) TSV.change_key(self, *args, **kwargs) end |
#chunked_values_at(keys, max = 5000) ⇒ Object
266 267 268 269 270 271 272 |
# File 'lib/scout/tsv/util/select.rb', line 266 def chunked_values_at(keys, max = 5000) Misc.ordered_divide(keys, max).inject([]) do |acc,c| new = self.values_at(*c) new.annotate acc if new.respond_to? :annotate and acc.empty? acc.concat(new) end end |
#collapse_stream(*args, **kwargs, &block) ⇒ Object
217 218 219 |
# File 'lib/scout/tsv/open.rb', line 217 def collapse_stream(*args, **kwargs, &block) TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block) end |
#collect(*args, &block) ⇒ Object
96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/scout/tsv/util.rb', line 96 def collect(*args, &block) if block_given? res = [] each do |k,v| res << yield(k, v) end res else super(*args) end end |
#column(field, **kwargs) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/scout/tsv/util/reorder.rb', line 47 def column(field, **kwargs) new_type = case type when :double, :flat :flat else :single end kwargs[:type] = new_type slice(field, **kwargs) end |
#digest_str ⇒ Object
168 169 170 |
# File 'lib/scout/tsv/util.rb', line 168 def digest_str "TSV:{"<< Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << ";" << Log.fingerprint(self.values) << "}" end |
#dumper_stream(options = {}) ⇒ Object Also known as: stream
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/scout/tsv/dumper.rb', line 147 def dumper_stream( = {}) preamble, unmerge, keys, stream = IndiferentHash. , :preamble, :unmerge, :keys, :stream, :preamble => true, :unmerge => false unmerge = false unless @type === :double dumper = TSV::Dumper.new self.annotation_hash.merge() dump_entry = Proc.new do |k,value_list| if unmerge max = value_list.collect{|v| v.length}.max if unmerge == :expand and max > 1 value_list = value_list.collect do |values| if values.length == 1 [values.first] * max else values end end end NamedArray.zip_fields(value_list).each do |values| dumper.add k, values end else dumper.add k, value_list end end if stream.nil? t = Thread.new do begin Thread.current.report_on_exception = true Thread.current["name"] = "Dumper thread" dumper.init(preamble: preamble) if keys keys.each do |k| dump_entry.call k, self[k] end else self.each &dump_entry end dumper.close rescue dumper.abort($!) end end Thread.pass until t["name"] stream = dumper.stream ConcurrentStream.setup(stream, :threads => [t]) stream else dumper.set_stream stream begin dumper.init(preamble: preamble) if keys keys.each do |k| dump_entry.call k, self[k] end else self.each &dump_entry end dumper.close rescue dumper.abort($!) end stream end end |
#each(*args, &block) ⇒ Object
85 86 87 88 89 90 91 92 93 94 |
# File 'lib/scout/tsv/util.rb', line 85 def each(*args, &block) if block_given? super(*args) do |k,v| NamedArray.setup(v, @fields) unless @unnamed || @type == :flat || ! (Array === v) block.call(k, v) end else super(*args) end end |
#filter(filter_dir = nil) ⇒ Object
285 286 287 288 289 290 |
# File 'lib/scout/tsv/util/filter.rb', line 285 def filter(filter_dir = nil) self.extend Filtered self.filter_dir = filter_dir self.filters = [] self end |
#fingerprint ⇒ Object
164 165 166 |
# File 'lib/scout/tsv/util.rb', line 164 def fingerprint "TSV:{"<< Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << "}" end |
#identifier_files ⇒ Object
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/scout/tsv/attach.rb', line 229 def identifier_files case when (identifiers and TSV === identifiers) [identifiers] when (identifiers and Array === identifiers) case when (TSV === identifiers.first or identifiers.empty?) identifiers else identifiers.collect{|f| Path === f ? f : Path.setup(f)} end when identifiers [ Path === identifiers ? identifiers : Path.setup(identifiers) ] when Path === filename path_files = filename.dirname.identifiers [path_files].flatten.compact.select{|f| f.exists?} when filename [Path.setup(filename.dup).dirname.identifiers] else [] end end |
#identify_field(name, strict: nil) ⇒ Object
52 53 54 |
# File 'lib/scout/tsv/util.rb', line 52 def identify_field(name, strict: nil) TSV.identify_field(@key_field, @fields, name, strict: strict) end |
#index(*args, **kwargs, &block) ⇒ Object
113 114 115 |
# File 'lib/scout/tsv/index.rb', line 113 def index(*args, **kwargs, &block) TSV.index(self, *args, **kwargs, &block) end |
#inspect ⇒ Object
172 173 174 |
# File 'lib/scout/tsv/util.rb', line 172 def inspect fingerprint end |
#melt_columns(value_field, column_field) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 |
# File 'lib/scout/tsv/util/melt.rb', line 2 def melt_columns(value_field, column_field) target = TSV.setup({}, :key_field => "ID", :fields => [key_field, value_field, column_field], :type => :list, :cast => cast) each do |k,values| i = 0 values.zip(fields).each do |v,f| target["#{k}:#{i}"] = [k,v,f] i+=1 end end target end |
#merge(other) ⇒ Object
176 177 178 |
# File 'lib/scout/tsv/util.rb', line 176 def merge(other) self.annotate(super(other)) end |
#options ⇒ Object
62 63 64 |
# File 'lib/scout/tsv/util.rb', line 62 def annotation_hash end |
#page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object
148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/scout/tsv/util/sort.rb', line 148 def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) pstart = psize * (pnum - 1) pend = psize * pnum - 1 field = :key if field == "key" keys = sort_by(field || :key, true, &block) keys.reverse! if reverse if just_keys keys[pstart..pend] else select :key => keys[pstart..pend] end end |
#pos_index(*args, **kwargs, &block) ⇒ Object
208 209 210 |
# File 'lib/scout/tsv/index.rb', line 208 def pos_index(*args, **kwargs, &block) TSV.pos_index(self, *args, **kwargs, &block) end |
#process(field, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/scout/tsv/util/process.rb', line 2 def process(field, &block) field_pos = identify_field field through do |key, values| case when type == :single field_values = values when type == :flat field_values = values else next if values.nil? field_values = values[field_pos] end new_values = case when block.arity == 1 yield(field_values) when block.arity == 2 yield(field_values, key) when block.arity == 3 yield(field_values, key, values) else raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}" end case when type == :single self[key] = new_values when type == :flat self[key] = new_values else if ! values[field_pos].frozen? && ((String === values[field_pos] && String === new_values) || (Array === values[field_pos] && Array === new_values)) values[field_pos].replace new_values else values[field_pos] = new_values end self[key] = values end end self end |
#range_index(*args, **kwargs, &block) ⇒ Object
204 205 206 |
# File 'lib/scout/tsv/index.rb', line 204 def range_index(*args, **kwargs, &block) TSV.range_index(self, *args, **kwargs, &block) end |
#remove_duplicates(pivot = 0) ⇒ Object
73 74 75 76 77 78 79 |
# File 'lib/scout/tsv/util/process.rb', line 73 def remove_duplicates(pivot = 0) new = self.annotate({}) self.through do |k,values| new[k] = NamedArray.zip_fields(NamedArray.zip_fields(values).uniq) end new end |
#reorder(key_field = nil, fields = nil, merge: true, one2one: true, **kwargs) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/scout/tsv/util/reorder.rb', line 4 def reorder(key_field = nil, fields = nil, merge: true, one2one: true, **kwargs) res = self.annotate({}) res.type = kwargs[:type] if kwargs.include?(:type) kwargs[:one2one] = one2one key_field_name, field_names = with_unnamed do traverse key_field, fields, **kwargs do |k,v| if res.type == :double && merge && res.include?(k) current = res[k] if merge == :concat v.each_with_index do |new,i| next if new.empty? current[i].concat(new) end else merged = [] v.each_with_index do |new,i| next if new.empty? merged[i] = current[i] + new end res[k] = merged end elsif res.type == :flat res[k] ||= [] if merge == :concat res[k].concat v else res[k] += v end else res[k] = v end end end res.key_field = key_field_name res.fields = field_names res end |
#reset_filters ⇒ Object
292 293 294 295 296 297 298 299 300 301 |
# File 'lib/scout/tsv/util/filter.rb', line 292 def reset_filters if @filter_dir.nil? or @filter_dir.empty? @filters.each do |filter| filter.reset end if Array === @filters return end Dir.glob(File.join(@filter_dir, '*.filter')).each do |f| FileUtils.rm f end end |
#select(method = nil, invert = false, &block) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/scout/tsv/util/select.rb', line 68 def select(method = nil, invert = false, &block) new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers) self.annotate(new) case when (method.nil? and block_given?) through do |key, values| new[key] = values if invert ^ (yield key, values) end when Array === method method = Set.new method with_unnamed do case type when :single through do |key, value| new[key] = value if invert ^ (method.include? key or method.include? value) end when :list, :flat through do |key, values| new[key] = values if invert ^ (method.include? key or (method & values).length > 0) end else through do |key, values| new[key] = values if invert ^ (method.include? key or (method & values.flatten).length > 0) end end end when Regexp === method with_unnamed do through do |key, values| new[key] = values if invert ^ ([key,values].flatten.select{|v| v =~ method}.any?) end end when ((String === method) || (Symbol === method)) if block_given? case when block.arity == 1 with_unnamed do case when (method == key_field or method == :key) through do |key, values| new[key] = values if invert ^ (yield(key)) end when (type == :single or type == :flat) through do |key, value| new[key] = value if invert ^ (yield(value)) end else pos = identify_field method raise "Field #{ method } not identified. Available: #{ fields * ", " }" if pos.nil? through do |key, values| new[key] = values if invert ^ (yield(values[pos])) end end end when block.arity == 2 with_unnamed do case when (method == key_field or method == :key) through do |key, values| new[key] = values if invert ^ (yield(key, key)) end when (type == :single or type == :flat) through do |key, value| new[key] = value if invert ^ (yield(key, value)) end else pos = identify_field method through do |key, values| new[key] = values if invert ^ (yield(key, values[pos])) end end end end else with_unnamed do through do |key, values| new[key] = values if invert ^ ([key,values].flatten.select{|v| v == method}.any?) end end end when Hash === method key = method.keys.first method = method.values.first case when ((Array === method) and (key == :key or key_field == key)) with_unnamed do keys.each do |key| new[key] = self[key] if invert ^ (method.include? key) end end when Array === method with_unnamed do method = Set.new method unless Set === method case type when :single through :key, key do |key, value| new[key] = self[key] if invert ^ (method.include? value) end when :list through :key, key do |key, values| new[key] = self[key] if invert ^ (method.include? values.first) end when :flat #untested through :key, key do |key, values| new[key] = self[key] if invert ^ ((method & values.flatten).any?) end else through :key, key do |key, values| new[key] = self[key] if invert ^ ((method & values.flatten).any?) end end end when Regexp === method with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| v =~ method}.any?) end end when ((String === method) and (method =~ /name:(.*)/)) name = $1 old_unnamed = self.unnamed self.unnamed = false if name.strip =~ /^\/(.*)\/$/ regexp = Regexp.new $1 through :key, key do |key, values| case type when :single values = values.annotate([values]) when :double values = values[0] end new[key] = self[key] if invert ^ (values.select{|v| v.name =~ regexp}.any?) end else through :key, key do |key, values| case type when :single values = values.annotate([values]) when :double values = values[0] end new[key] = self[key] if invert ^ (values.select{|v| v.name == name}.any?) end end self.unnamed = old_unnamed when String === method if method =~ /^([<>]=?)(.*)/ with_unnamed do through :key, key do |key, values| value = Array === values ? values.flatten.first : values new[key] = self[key] if value.to_f.send($1, $2.to_f) end end else with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| v == method}.length > 0) end end end when Numeric === method with_unnamed do through :key, key do |key, values| new[key] = self[key] if invert ^ (values.flatten.length >= method) end end when Proc === method with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| method.call(v)}.length > 0) end end end end new end |
#slice(fields, **kwargs) ⇒ Object
43 44 45 |
# File 'lib/scout/tsv/util/reorder.rb', line 43 def slice(fields, **kwargs) reorder :key, fields, **kwargs end |
#sort(field = nil, just_keys = false, &block) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/scout/tsv/util/sort.rb', line 75 def sort(field = nil, just_keys = false, &block) field = :all if field.nil? if field == :all elems = collect else elems = [] case type when :single through :key, field do |key, field| elems << [key, field] end when :list, :flat through :key, field do |key, fields| elems << [key, fields.first] end when :double through :key, field do |key, fields| elems << [key, fields.first] end end end if not block_given? if fields == :all if just_keys keys = elems.sort_by{|key, value| key }.collect{|key, values| key} keys = prepare_entity(keys, key_field, .merge(:dup_array => true)) else elems.sort_by{|key, value| key } end else sorted = elems.sort do |a, b| a_value = a.last b_value = b.last a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?) b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?) case when (a_empty and b_empty) 0 when a_empty -1 when b_empty 1 when Array === a_value if a_value.length == 1 and b_value.length == 1 a_value.first <=> b_value.first else a_value.length <=> b_value.length end else a_value <=> b_value end end if just_keys keys = sorted.collect{|key, value| key} keys = prepare_entity(keys, key_field, .merge(:dup_array => true)) unless @unnamed keys else sorted.collect{|key, value| [key, self[key]]} end end else if just_keys keys = elems.sort(&block).collect{|key, value| key} keys = prepare_entity(keys, key_field, .merge(:dup_array => true)) unless @unnamed keys else elems.sort(&block).collect{|key, value| [key, self[key]]} end end end |
#sort_by(field = nil, just_keys = false, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/scout/tsv/util/sort.rb', line 2 def sort_by(field = nil, just_keys = false, &block) field = :all if field.nil? if field == :all elems = collect else elems = [] case type when :single through :key, field do |key, field| elems << [key, field] end when :list, :flat through :key, field do |key, fields| elems << [key, fields.first] end when :double through :key, field do |key, fields| elems << [key, fields.first] end end end if not block_given? if fields == :all if just_keys keys = elems.sort_by{|key, value| key }.collect{|key, values| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed else elems.sort_by{|key, value| key } end else sorted = elems.sort do |a, b| a_value = a.last b_value = b.last a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?) b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?) case when (a_empty and b_empty) 0 when a_empty -1 when b_empty 1 when Array === a_value if a_value.length == 1 and b_value.length == 1 a_value.first <=> b_value.first else a_value.length <=> b_value.length end else a_value <=> b_value end end if just_keys keys = sorted.collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else sorted.collect{|key, value| [key, self[key]]} end end else if just_keys keys = elems.sort_by(&block).collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else elems.sort_by(&block).collect{|key, value| [key, self[key]]} end end end |
#subset(keys) ⇒ Object
256 257 258 259 260 261 262 263 264 |
# File 'lib/scout/tsv/util/select.rb', line 256 def subset(keys) new = self.annotate({}) self.with_unnamed do keys.each do |k| new[k] = self[k] if self.include?(k) end end new end |
#summary ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/scout/tsv/util.rb', line 118 def summary key = nil values = nil self.each do |k, v| key = k values = v break end filename = @filename filename = "No filename" if filename.nil? || String === filename && filename.empty? filename.find if Path === filename filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path with_unnamed do <<-EOF Filename = #{filename} Key field = #{key_field || "*No key field*"} Fields = #{fields ? Log.fingerprint(fields) : "*No field info*"} Type = #{type} Size = #{size} namespace = #{Log.fingerprint namespace} identifiers = #{Log.fingerprint identifiers} Example: - #{key} -- #{Log.fingerprint values } EOF end end |
#to_double ⇒ Object
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/scout/tsv/transformer.rb', line 135 def to_double return self if self.type == :double res = self.annotate({}) self.with_unnamed do transformer = Transformer.new self, res transformer.type = :double transformer.traverse do |k,v| case self.type when :single [k, [[v]]] when :list [k, v.collect{|v| [v] }] when :flat [k, [v]] end end end res end |
#to_flat ⇒ Object
168 169 170 171 172 173 174 175 176 177 |
# File 'lib/scout/tsv/transformer.rb', line 168 def to_flat res = self.annotate({}) transformer = Transformer.new self, res transformer.type = :flat transformer.traverse do |k,v| v = Array === v ? v.flatten : [v] [k, v] end res end |
#to_hash ⇒ Object
128 129 130 |
# File 'lib/scout/tsv.rb', line 128 def to_hash self.dup end |
#to_list ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/scout/tsv/transformer.rb', line 116 def to_list res = self.annotate({}) self.with_unnamed do transformer = Transformer.new self, res transformer.type = :list transformer.traverse do |k,v| case self.type when :single [k, [v]] when :double [k, v.collect{|v| v.first }] when :flat [k, v.slice(0,1)] end end end res end |
#to_s(options = {}) ⇒ Object
220 221 222 |
# File 'lib/scout/tsv/dumper.rb', line 220 def to_s( = {}) dumper_stream({stream: ''}.merge()) end |
#to_single ⇒ Object
156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/scout/tsv/transformer.rb', line 156 def to_single res = self.annotate({}) transformer = Transformer.new self, res transformer.type = :single transformer.unnamed = true transformer.traverse do |k,v| v = v.first while Array === v [k, v] end res end |
#translate(*args, **kwargs) ⇒ Object
144 145 146 |
# File 'lib/scout/tsv/change_id/translate.rb', line 144 def translate(*args, **kwargs) TSV.translate(self, *args, **kwargs) end |
#transpose(key_field = "Unkown ID") ⇒ Object
81 82 83 84 85 86 87 88 89 90 |
# File 'lib/scout/tsv/util/reorder.rb', line 81 def transpose(key_field = "Unkown ID") case type when :single, :flat self.to_list.transpose_list key_field when :list transpose_list key_field when :double transpose_double key_field end end |
#transpose_double(key_field = "Unkown ID") ⇒ Object
74 75 76 77 78 79 |
# File 'lib/scout/tsv/util/reorder.rb', line 74 def transpose_double(key_field = "Unkown ID") sep = "-!SEP--#{rand 10000}!-" tmp = self.to_list{|v| v * sep} new = tmp.transpose_list(key_field) new.to_double{|v| v.split(sep)} end |
#transpose_list(key_field = "Unkown ID") ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/scout/tsv/util/reorder.rb', line 59 def transpose_list(key_field="Unkown ID") new_fields = keys.dup new = self.annotate({}) TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers) m = Matrix.rows values new_rows = m.transpose.to_a fields.zip(new_rows) do |key,row| new[key] = row end new end |
#traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: false, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) ⇒ Object Also known as: through
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/scout/tsv/traverse.rb', line 3 def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: false, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) key_field = key_field_pos if key_field.nil? fields = fields_pos.dup if fields.nil? type = @type if type.nil? key_pos = self.identify_field(key_field) fields = self.all_fields if fields == :all fields = [fields] unless fields.nil? || Array === fields positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields) positions = nil if fields == self.fields if key_pos == :key key_name = @key_field else key_name = @fields[key_pos] if positions.nil? positions = (0..@fields.length-1).to_a positions.delete_at key_pos positions.unshift :key end end fields = positions.collect{|p| p == :key ? self.key_field : self.fields[p] } if positions if positions.nil? && key_pos == :key field_names = @fields.dup elsif positions.nil? && key_pos != :key field_names = @fields.dup field_names.delete_at key_pos unless fields == :all elsif positions.include?(:key) field_names = positions.collect{|p| p == :key ? @key_field : @fields[p] } else field_names = @fields.values_at *positions end key_index = positions.index :key if positions positions.delete :key if positions = "Traverse #{Log.fingerprint self}" Log.debug = if TrueClass === type_swap_tag = [type.to_s, @type.to_s] * "_" Log::ProgressBar.(self, ) do || with_unnamed unnamed do each do |key,values| .tick if values = [values] if @type == :single if positions.nil? if key_pos != :key values = values.dup if @type == :flat key = values else key = values.delete_at(key_pos) end end else orig_key = key key = @type == :flat ? values : values[key_pos] if key_pos != :key values = values.values_at(*positions) NamedArray.setup(values, fields) if key_index if @type == :double values.insert key_index, [orig_key] else values.insert key_index, orig_key end end end values = TSV.cast_value(values, cast) if cast if Array === key key = key.uniq if uniq if @type == :double && one2one if one2one == :strict key.each_with_index do |key_i,i| if type == :double v_i = values.collect{|v| [v[i]] } else v_i = values.collect{|v| v[i] } end yield key_i, v_i end else key.each_with_index do |key_i,i| if type == :double v_i = values.collect{|v| [v[i] || v.first] } else v_i = values.collect{|v| v[i] || v.first } end yield key_i, v_i, @fields end end else key.each_with_index do |key_i, i| if type == :double yield key_i, values elsif type == :list yield key_i, values.collect{|v| v[i] } elsif type == :flat yield key_i, values.flatten elsif type == :single yield key_i, values.first end end end else if type == @type if type == :single yield key, values.first else yield key, values end else case type_swap_tag when "double_list" yield key, values.collect{|v| [v] } when "double_flat" yield key, [values] when "double_single" yield key, [values] when "list_double" yield key, values.collect{|v| v.first } when "list_flat" yield key, [values.first] when "list_single" yield key, values when "flat_double" yield key, values.flatten when "flat_list" yield key, values.flatten when "flat_single" yield key, values when "single_double" yield key, values.flatten.first when "single_list" yield key, values.first when "single_flat" yield key, values.first end end end end end end [key_name, field_names] end |
#unzip(*args, **kwargs) ⇒ Object
83 84 85 |
# File 'lib/scout/tsv/util/unzip.rb', line 83 def unzip(*args, **kwargs) TSV.unzip(self, *args, **kwargs) end |
#unzip_replicates ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/scout/tsv/util/unzip.rb', line 87 def unzip_replicates raise "Can only unzip replicates in :double TSVs" unless type == :double new = {} self.with_unnamed do through do |k,vs| NamedArray.zip_fields(vs).each_with_index do |v,i| new[k + "(#{i})"] = v end end end self.annotate(new) new.type = :list new end |
#with_filters(filters, &block) ⇒ Object
303 304 305 306 307 308 309 310 |
# File 'lib/scout/tsv/util/filter.rb', line 303 def with_filters(filters, &block) filter begin filters.each{|field,value| add_filter field, value } ensure reset_filters end end |
#with_unnamed(unnamed = true) ⇒ Object
108 109 110 111 112 113 114 115 116 |
# File 'lib/scout/tsv/util.rb', line 108 def with_unnamed(unnamed = true) begin old_unnamed = @unnamed @unnamed = unnamed yield ensure @unnamed = old_unnamed end end |
#write_file(file) ⇒ Object
226 227 228 229 230 |
# File 'lib/scout/tsv/dumper.rb', line 226 def write_file(file) Open.open(file, mode: 'w') do |f| dumper_stream(stream: f) end end |
#zip(merge = false, field = "New Field", sep = ":") ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/scout/tsv/util/unzip.rb', line 105 def zip(merge = false, field = "New Field", sep = ":") new = {} self.annotate new new.type = :double if merge new.with_unnamed do if merge self.through do |key,values| new_key, new_value = key.split(sep) new_values = values + [[new_value] * values.first.length] if new.include? new_key current = new[new_key] current.each_with_index do |v,i| v.concat(new_values[i]) end else new[new_key] = new_values end end else self.through do |key,values| new_key, new_value = key.split(sep) new_values = values + [new_value] new[new_key] = new_values end end end if self.key_field and self.fields new.key_field = self.key_field.partition(sep).first new.fields = new.fields + [field] end new end |
#zip_new(key, values, insitu: :lax) ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/scout/tsv/util.rb', line 66 def zip_new(key, values, insitu: :lax) values = values.collect{|v| Array === v ? v : [v] } unless Array === values.first if current_values = self[key] if insitu == :lax self[key] = NamedArray.add_zipped(current_values, values) elsif insitu NamedArray.add_zipped(current_values, values) else self[key] = NamedArray.add_zipped(current_values.dup, values) end else if insitu && insitu != :lax self[key] = values.dup else self[key] = values end end end |