Module: TSV

Defined in:
lib/rbbt/tsv.rb,
lib/rbbt/util/R.rb,
lib/rbbt/tsv/melt.rb,
lib/rbbt/tsv/util.rb,
lib/rbbt/tsv/excel.rb,
lib/rbbt/tsv/index.rb,
lib/rbbt/tsv/attach.rb,
lib/rbbt/tsv/dumper.rb,
lib/rbbt/tsv/filter.rb,
lib/rbbt/tsv/matrix.rb,
lib/rbbt/tsv/parser.rb,
lib/rbbt/tsv/stream.rb,
lib/rbbt/tsv/accessor.rb,
lib/rbbt/tsv/parallel.rb,
lib/rbbt/tsv/change_id.rb,
lib/rbbt/tsv/manipulate.rb,
lib/rbbt/util/excel2tsv.rb,
lib/rbbt/tsv/attach/util.rb,
lib/rbbt/tsv/field_index.rb,
lib/rbbt/tsv/serializers.rb,
lib/rbbt/tsv/parallel/through.rb,
lib/rbbt/tsv/parallel/traverse.rb

Defined Under Namespace

Classes: CleanSerializer, Dumper, FloatArraySerializer, FloatSerializer, IntegerArraySerializer, IntegerSerializer, Parser, StringArraySerializer, StringDoubleArraySerializer, StringSerializer, TSVMarshalSerializer, TSVSerializer, Traverser

Constant Summary collapse

TSV_SERIALIZER =
YAML
SERIALIZED_NIL =
TSV_SERIALIZER.dump nil
KEY_PREFIX =

{{{ TSV ENTRIES and ENTRY_KEYS

"__tsv_hash_"
ENTRIES =
[]
ENTRY_KEYS =
Set.new
NIL_VALUE =
"NIL_VALUE"
SERIALIZER_ALIAS =
{
  :integer => IntegerSerializer, 
  :float => FloatSerializer, 
  :integer_array => IntegerArraySerializer,
  :float_array => FloatArraySerializer,
  :marshal => Marshal,
  :single => StringSerializer,
  :string => StringSerializer,
  :list => StringArraySerializer,
  :flat => StringArraySerializer,
  :double => StringDoubleArraySerializer,
  :clean => CleanSerializer,
  :tsv => TSVSerializer,
  :marshal_tsv => TSVMarshalSerializer
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.field_index_dirObject

Returns the value of attribute field_index_dir.



4
5
6
# File 'lib/rbbt/tsv/field_index.rb', line 4

def field_index_dir
  @field_index_dir
end

.lock_dirObject

Returns the value of attribute lock_dir.



24
25
26
# File 'lib/rbbt/tsv.rb', line 24

def lock_dir
  @lock_dir
end

.unnamedObject

Returns the value of attribute unnamed.



24
25
26
# File 'lib/rbbt/tsv.rb', line 24

def unnamed
  @unnamed
end

Instance Attribute Details

#entity_optionsObject

Returns the value of attribute entity_options.



11
12
13
# File 'lib/rbbt/tsv/accessor.rb', line 11

def entity_options
  @entity_options
end

#entity_templatesObject

Returns the value of attribute entity_templates.



11
12
13
# File 'lib/rbbt/tsv/accessor.rb', line 11

def entity_templates
  @entity_templates
end

#field_indicesObject

Returns the value of attribute field_indices.



10
11
12
# File 'lib/rbbt/tsv/field_index.rb', line 10

def field_indices
  @field_indices
end

#monitorObject

Returns the value of attribute monitor.



5
6
7
# File 'lib/rbbt/tsv/manipulate.rb', line 5

def monitor
  @monitor
end

#serializer_moduleObject

Returns the value of attribute serializer_module.



11
12
13
# File 'lib/rbbt/tsv/accessor.rb', line 11

def serializer_module
  @serializer_module
end

#unnamedObject

Returns the value of attribute unnamed.



11
12
13
# File 'lib/rbbt/tsv/accessor.rb', line 11

def unnamed
  @unnamed
end

Class Method Details

._extended(data) ⇒ Object



128
129
130
131
132
133
134
135
# File 'lib/rbbt/tsv/accessor.rb', line 128

def self._extended(data)
  if not data.respond_to? :write
    class << data
      attr_accessor :writable

    end
  end
end

.build_traverse_index(files, options = {}) ⇒ Object



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/rbbt/tsv/attach/util.rb', line 291

def self.build_traverse_index(files, options = {})
  options       = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  in_namespace  = options[:in_namespace]
  persist_input = options[:persist_input]

  path = find_path(files, options)

  return nil if path.nil?

  traversal_ids = path.collect{|p| p.first}

  Log.debug "Found Traversal: #{traversal_ids * " => "}"

  index_for_traversal path, persist_input
end

.change_key(tsv, format, options = {}, &block) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/rbbt/tsv/change_id.rb', line 6

def self.change_key(tsv, format, options = {}, &block)
  options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers

  identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input

  if not tsv.fields.include? format
    new = {}
    tsv.each do |k,v|
      new[k] = v.dup
    end
    orig_fields = tsv.fields
    tsv = tsv.annotate new
    new.fields = new.fields.collect{|f| "TMP-" << f }

    orig_type = tsv.type 
    tsv = tsv.to_double if orig_type != :double

    if Array === identifiers
      tsv = tsv.attach identifiers.first, :fields => [format], :persist_input => true, :identifiers => identifiers.last
    else
      tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
    end

    tsv = tsv.reorder(format, tsv.fields[0..-2])

    tsv = tsv.to_flat  if orig_type == :flat

    tsv = tsv.to_list(&block)  if orig_type == :list

    tsv.fields = orig_fields

    tsv
  else
    tsv.reorder(format)
  end
end

.collapse_stream(input, options = {}) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/rbbt/tsv/stream.rb', line 4

def self.collapse_stream(input, options = {})
  options = Misc.add_defaults options, :sep => "\t"
  input_stream = TSV.get_stream input

  sorted_input_stream = Misc.sort_stream input_stream

  parser = TSV::Parser.new sorted_input_stream, options.dup
  dumper = TSV::Dumper.new parser
  header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
  dumper.close_in
  dumper.close_out
  dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header
  dumper
end

.entry(*entries) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/rbbt/tsv/accessor.rb', line 154

def self.entry(*entries)
  entries = entries.collect{|entry| entry.to_s}
  ENTRIES.concat entries
  entries.each do |entry|
    key = KEY_PREFIX + entry
    ENTRY_KEYS << key
    var_name = ("@" << entry).to_sym

    TSV.send(:define_method, entry) do
      return instance_variable_get(var_name) if instance_variables.include? var_name
      svalue = self.send(:[], key, :entry_key)
      value = load_entry_value(svalue)
      instance_variable_set(var_name, value)
      value
    end

    TSV.send(:define_method, entry + "=") do |value|
      instance_variable_set(var_name, value)
      value = value.to_s if Path === value
      self.send(:[]=, key, dump_entry_value(value), :entry_key)
      value
    end

  end
end

.excel(tsv, filename, options = {}) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/rbbt/tsv/excel.rb', line 3

def self.excel(tsv, filename, options ={})
  name = Misc.process_options options, :name
  sort_by = Misc.process_options options, :sort_by
  sort_by_cast = Misc.process_options options, :sort_by_cast
  fields = Misc.process_options(options, :fields) || tsv.all_fields

  book = Spreadsheet::Workbook.new
  sheet1 = book.create_worksheet 
  sheet1.row(0).concat fields
  i = 1
  if sort_by
    if sort_by_cast
      data = tsv.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = tsv.sort_by sort_by
    end
  else
    data = tsv
  end

  data.each do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        cells.push v * ", "
      else
        cells.push v
      end
    end

    sheet1.row(i).concat cells
    i += 1
  end
  book.write filename
end

.excel2tsv(file, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rbbt/util/excel2tsv.rb', line 6

def self.excel2tsv(file, options = {})
  sheet = options.delete :sheet
  header = options.delete :header
  header = true unless header == false
  sheet ||= 0
  TmpFile.with_file do |filename|
    workbook = Spreadsheet.open Open.open(file)
    sheet    = workbook.worksheet sheet

    rows = []

    sheet.each do |row|
      rows << row.values_at(0..(row.size - 1))
    end

    File.open(filename, 'w') do |f|
      if header
        header = rows.shift
        f.puts "#" + header * "\t"
      end

      rows.each do |row| f.puts row * "\t" end
    end

    TSV.open(filename, options)
  end
end

.field_match_counts(file, values, options = {}) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/rbbt/tsv/util.rb', line 38

def self.field_match_counts(file, values, options = {})
  options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
  persist_options = Misc.pull_keys options, :persist

  filename = TSV === file ? file.filename : file
  path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
    tsv = TSV === file ? file : TSV.open(file)

    text = ""
    fields = nil
    tsv.tap{|e| e.unnamed =  true; fields = e.fields}.through do |gene, names|
      names.zip(fields).each do |list, format|
        list.delete_if do |name| name.empty? end
        next if list.empty?
        text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
      end
    end
    text
  end

  TmpFile.with_file(values.uniq * "\n") do |value_file|
    cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' |cut -f 2 | sed 's/HEADERNOMATCH//' | sort|uniq -c|sed 's/^ *//;s/ /\t/'"
    begin
      TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
    rescue
      TSV.setup({}, :type => :single, :cast => :to_i)
    end
  end
end

.find_path(files, options = {}) ⇒ Object

May make an extra index!



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/rbbt/tsv/attach/util.rb', line 211

def self.find_path(files, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  if in_namespace
    if files.first.all_fields.include? in_namespace
      ids = [[in_namespace]]
    else
      ids = [files.first.all_namespace_fields(in_namespace)]
    end
    ids += files[1..-1].collect{|f| f.all_fields}
  else
    ids = files.collect{|f| f.all_fields }
  end

  id_list = []

  ids.each_with_index do |list, i|
    break if i == ids.length - 1
    match = list.select{|field| 
      ids[i + 1].select{|f| field == f}.any?
    }
    return nil if match.empty?
    id_list << match.first
  end

  if id_list.last != files.last.all_fields.first
    id_list << files.last.all_fields.first
    id_list.zip(files)
  else
    id_list.zip(files[0..-1])
  end
end

.find_traversal(tsv1, tsv2, options = {}) ⇒ Object



308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/rbbt/tsv/attach/util.rb', line 308

def self.find_traversal(tsv1, tsv2, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  identifiers1 = tsv1.identifier_files || []
  identifiers1 += [options[:identifiers]].flatten if options[:identifiers]
  identifiers2 = tsv2.identifier_files || []

  identifiers1.unshift tsv1
  identifiers2.unshift tsv2

  files1 = []
  files2 = []
  while identifiers1.any?
    files1.push identifiers1.shift
    identifiers2.each_with_index do |e,i|
      files2 = identifiers2[(0..i)]
      index  = build_traverse_index(files1 + files2.reverse, options)
      return index if not index.nil?
    end
  end

  return nil
end

.get_filename(file) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/rbbt/tsv/util.rb', line 68

def self.get_filename(file)
  case
  when (defined? Step and Step === file)
    file.path
  when Path === file
    file
  when (String === file and (Open.exists? file or Open.remote? file))
    file
  when String === file 
    "String-#{Misc.digest file}"
  when file.respond_to?(:filename)
    file.filename
  when file.respond_to?(:gets)
    nil
  else
    raise "Cannot get filename from: #{file.inspect}"
  end
end

.get_stream(file, open_options = {}) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/rbbt/tsv/util.rb', line 87

def self.get_stream(file, open_options = {})
  case file
  when Zlib::GzipReader
    file
  when (defined? Bgzf and Bgzf)
    file
  when TSV
    file
  when TSV::Dumper
    file.stream
  when TSV::Parser
    file.stream
  when Path
    file.open(open_options)
  when (defined? Tempfile and Tempfile)
    begin
      file.rewind if file.respond_to?(:rewind) and file.eof?
    rescue
    end
    file
  when IO, StringIO, File
    begin
      file.rewind if file.respond_to?(:rewind) and file.eof?
    rescue
    end
    file
  when String
    if Open.remote?(file) or File.exists? file
      Open.open(file, open_options)
    else
      StringIO.new file
    end
  when (defined? Step and Step)
    file.grace
    stream = file.get_stream
    if stream
      stream
    else
      file.join
      get_stream(file.path)
    end
  when Array
    Misc.open_pipe do |sin|
      file.each do |l|
        sin.puts l
      end
    end
  else
    raise "Cannot get stream from: #{file.inspect}"
  end
end

.get_streams_to_close(obj) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 422

def self.get_streams_to_close(obj)
  close_streams = []
  case obj
  when IO, File
    close_streams << obj
  when TSV::Parser
  when TSV::Dumper
    close_streams << obj.result.in_stream
  when (defined? Step and Step)
    obj.mutex.synchronize do
      case obj.result
      when IO
        close_streams << obj.result
      when TSV::Dumper
        close_streams << obj.result.in_stream
      end
    end
    obj.inputs.each do |input|
      close_streams = get_streams_to_close(input) + close_streams
    end
    obj.dependencies.each do |dependency|
      close_streams = get_streams_to_close(dependency) + close_streams
    end
  end 
  close_streams
end

.guess_max(obj) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 18

def self.guess_max(obj)
  begin
    case obj
    when (defined? Step and Step)
      if obj.done?
        CMD.cmd("wc -l '#{obj.path.find}'").read.to_i
      else
        nil
      end
    when TSV
      obj.length
    when Array, Hash
      obj.size
    when File
      return nil if Misc.gzip?(file) or Misc.bgzip?(file)
      CMD.cmd("wc -l '#{obj.filename}'").read.to_i
    when Path
      return nil if Misc.gzip?(file) or Misc.bgzip?(file)
      CMD.cmd("wc -l '#{obj.find}'").read.to_i
    when String
      if File.exists? obj
        return nil if Misc.gzip?(file) or Misc.bgzip?(file)
        CMD.cmd("wc -l '#{obj}'").read.to_i
      else
        nil
      end
    end
  rescue Exception
    nil
  end
end

.header_lines(key_field, fields, entry_hash = nil) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/rbbt/tsv/util.rb', line 163

def self.header_lines(key_field, fields, entry_hash = nil)
  if Hash === entry_hash 
    sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
    preamble = entry_hash[:preamble]
  end

  preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?

  str = "" 
  str << preamble.strip << "\n" if preamble and not preamble.empty?
  if fields
    str << "#" << (key_field || "ID").to_s << sep << (fields * sep) << "\n" 
  end

  str
end

.identify_field(key_field, fields, field) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/rbbt/tsv/util.rb', line 139

def self.identify_field(key_field, fields, field)
  case field
  when nil
    :key
  when Symbol
    field == :key ? field : identify_field(key_field, fields, field.to_s)
  when Integer
    field
  when (fields.nil? and String)
    raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
    identify_field(key_field, fields, field.to_i)
  when String
    return :key if key_field == field
    pos = fields.index field
    return pos if pos
    return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
    raise "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
  else
    raise "Field #{ field } was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
  end
end

.index(file, options = {}) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/rbbt/tsv/index.rb', line 124

def self.index(file, options = {})
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
   
  Log.debug "Static Index: #{ file } - #{options.inspect}"
  Persist.persist_tsv nil, file, options, persist_options do |data|
    data_options = Misc.pull_keys options, :data
    identifiers = TSV.open(file, data_options)
    identifiers.with_monitor :desc => "Creating Index for #{ file }" do
      index = identifiers.index(options.merge :persist_data => data, :persist => persist_options[:persist])
    end
  end
end

.index_for_traversal(path, persist_input = false) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/rbbt/tsv/attach/util.rb', line 245

def self.index_for_traversal(path, persist_input = false)
  data_key, data_file = path.shift
  data_index = if data_key == data_file.key_field
                 Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
                 nil
               else
                 Log.debug "Data index required"
                 data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false, :type => (data_file.type == :single ? :single : :flat)
               end

  current_index = data_index
  current_key   = data_key
  while not path.empty?
    next_key, next_file = path.shift

    if current_index.nil?
      current_index = next_file.index(:target => next_key, :fields => [current_key], :persist => persist_input)
      current_index = current_index.select :key => data_file.keys
    else
      next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input

      next_index.with_unnamed do
        current_index.with_unnamed do
          current_index.process current_index.fields.first do |values|
            if values.nil?
              nil
            else
              new_values = next_index.values_at(*values).flatten
              if current_index.type == :single
                new_values.first
              else
                new_values
              end
            end
          end
          current_index.fields = [next_key]
        end
      end
    end
    current_key = next_key
  end

  current_index

end

.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/tsv/melt.rb', line 2

def self.melt(tsv, key_field, header_field, fields, *info_fields, &block)
  info_fields.unshift header_field
  TSV.traverse tsv, :into => :dumper, :key_field => key_field, :fields => info_fields do |k,values|
    res = fields.zip(values).collect do |field, value|
      info_values = if block_given?
                      new = block.call value
                      next if new.nil?
                      new
                    else
                      [value]
                    end
      info_values.unshift field
      [field, info_values]
    end
    res.extend MultipleResult
    res
  end
end

.merge_different_fields(file1, file2, output, options = {}) ⇒ Object

Merge two files with the same keys and different fields



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/rbbt/tsv/attach.rb', line 65

def self.merge_different_fields(file1, file2, output, options = {})
  options = Misc.add_defaults options, :sep => "\t"
  monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
  sep = options[:sep] || "\t"

  case
  when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
    size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
  when (String === file1 or StringIO === file1)
    size = file1.length if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
  when TSV === file1
    size = file1.size if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
  end

  case
  when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exists?(file2))
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
  when (String === file2 or StringIO === file2)
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
  when TSV === file2
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
  end

  begin
    output = File.open(output, 'w') if String === output

    cols1 = nil
    cols2 = nil

    done1 = false
    done2 = false

    key1 = key2 = nil
    while key1.nil?
      while (line1 = file1.gets) =~ /^#/
        key_field1, *fields1 = line1.strip.sub('#','').split(sep)
      end
      key1, *parts1 = line1.sub("\n",'').split(sep, -1)
      cols1 = parts1.length
    end

    while key2.nil?
      while (line2 = file2.gets) =~ /^#/
        key_field2, *fields2 = line2.strip.sub('#','').split(sep)
      end
      key2, *parts2 = line2.sub("\n",'').split(sep, -1)
      cols2 = parts2.length
    end

    #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
    progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor

    entry_hash = options
    entry_hash.delete :sep if entry_hash[:sep] == "\t"
    output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2

    key = key1 < key2 ? key1 : key2
    parts = [""] * (cols1 + cols2)
    while not (done1 and done2)
      while (not done1 and key1 == key)
        parts1.each_with_index do |part, i|
          parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
        end
        key1 = nil
        while key1.nil? and not done1
          if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
        end
        progress_monitor.tick if monitor
      end
      while (not done2 and key2 == key)
        parts2.each_with_index do |part, i|
          i += cols1
          parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
        end
        key2 = nil
        while key2.nil? and not done2
          if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
        end
      end

      output.puts [key, parts].flatten * sep
      parts = [""] * (cols1 + cols2)

      case
      when done1
        key = key2
      when done2
        key = key1
      else
        key = key1 < key2 ? key1 : key2
      end
    end

    output.close
    file1.join if file1.respond_to? :join
    file2.join if file2.respond_to? :join
  rescue
    file1.abort if file1.respond_to? :abort
    file2.abort if file2.respond_to? :abort
    file1.join if file1.respond_to? :join
    file2.join if file2.respond_to? :join
  end
end

.merge_paste(files, delim = "$") ⇒ Object

Merge columns from different files



173
174
175
# File 'lib/rbbt/tsv/attach.rb', line 173

def self.merge_paste(files, delim = "$")
  CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
end

.merge_row_fields(input, output, options = {}) ⇒ Object

Merge columns from different rows of a file



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/rbbt/tsv/attach.rb', line 6

def self.merge_row_fields(input, output, options = {})
  options = Misc.add_defaults options, :sep => "\t"
  key_field, fields = Misc.process_options options, :key_field, :fields
  sep = options[:sep]

  is = case
       when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
         CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
       when (String === input or StringIO === input)
         CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
       else
         input
       end

  if key_field.nil? or fields.nil?
    parser = TSV::Parser.new(is, options.dup)
    fields ||= parser.fields
    key_field ||= parser.key_field
    line = parser.first_line
  else
    line = is.gets
  end
 
  current_key  = nil
  current_parts = []

  done = false
  Open.write(output) do |os|
    options.delete :sep if options[:sep] == "\t"
    os.puts TSV.header_lines(key_field, fields, options) 

    while line
      key, *parts = line.sub("\n",'').split(sep, -1)
      current_key ||= key
      case
      when key.nil?
      when current_key == key
        parts.each_with_index do |part,i|
          if current_parts[i].nil?
            current_parts[i] = part
          else
            current_parts[i] = current_parts[i] << "|" << part
          end
        end
      when current_key != key
        os.puts [current_key, current_parts].flatten * sep
        current_key = key
        current_parts = parts
      end

      line = is.gets
    end

    os.puts [current_key, current_parts].flatten * sep unless current_key.nil?

  end
end

.obj_stream(obj) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 3

def self.obj_stream(obj)
  case obj
  when nil
    nil
  when (defined? Step and Step)
    obj.result
  when IO, File, Zlib::GzipReader, Bgzf
    obj
  when TSV::Dumper
    obj.stream
  when TSV::Parser
    obj.stream
  end
end

.open(source, type = nil, options = nil) ⇒ Object

options shift if type.nil?



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rbbt/tsv.rb', line 49

def self.open(source, type = nil, options = nil)
  type, options = nil, type if options.nil? and Hash === type
  options ||= {}
  options[:type] ||= type unless type.nil?

  persist_options = Misc.pull_keys options, :persist

  raise "TSV source is nil" if source.nil?

  filename = get_filename source
  serializer = Misc.process_options options, :serializer
  unnamed = Misc.process_options options, :unnamed
  entity_options = Misc.process_options options, :entity_options

  Log.debug "TSV open: #{ filename } - #{Misc.fingerprint options }.#{unnamed ? " [unnamed]" : "[not unnamed]"}"

  data = nil

  lock_filename = filename.nil? ? nil : Persist.persistence_path(filename + '.open', {:dir => TSV.lock_dir})
  Misc.lock lock_filename  do
    data = Persist.persist_tsv source, filename, options, persist_options do |data|
      if serializer
        data.extend TSV unless TSV === data
        data.serializer = serializer
      end

      open_options = Misc.pull_keys options, :open

      stream = get_stream source, options.merge(open_options)
      parse stream, data, options

      data.filename = filename.to_s unless filename.nil?

      if data.identifiers.nil? and Path === filename and filename.identifier_file_path
        data.identifiers = filename.identifier_file_path.find if filename.identifier_file_path.exists?
      end

      data
    end
  end


  data.unnamed = unnamed unless unnamed.nil?

  data.entity_options = entity_options

  if Path === source and data.identifiers
    data.identifiers = Path.setup(data.identifiers, source.pkgdir, source.resource)
  end

  if data.respond_to? :persistence_path
    data
  else
    h = data.dup
    data.clear
    data.annotate h
  end
end

.parse(stream, data, options = {}) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/rbbt/tsv.rb', line 125

def self.parse(stream, data, options = {})

  parser = Misc.process_options options, :parser
  parser = TSV::Parser.new stream, options if parser.nil?

  # dump with tchmgr
  if defined? TokyoCabinet and TokyoCabinet::HDB === data and parser.straight and
    data.close
    begin
      bin = 'tchmgr'
      CMD.cmd("#{bin} version", :log => false)
      FileUtils.mkdir_p File.dirname(data.persistence_path)
      CMD.cmd("#{bin} importtsv '#{data.persistence_path}'", :in => stream, :log => false, :dont_close_in => true)
    rescue
      Log.debug("tchmgr importtsv failed for: #{data.persistence_path}")
    end
    data.write
  end

  # make TSV
  data.extend TSV unless TSV === data
  data.unnamed = true

  # choose serializer
  if data.serializer == :type
    data.serializer = case
                      when parser.cast.nil?
                        data.serializer = parser.type
                      when (parser.cast == :to_i and (parser.type == :list or parser.type == :flat))
                        data.serializer = :integer_array
                      when (parser.cast == :to_i and parser.type == :single)
                        data.serializer = :integer
                      when (parser.cast == :to_f and parser.type == :single)
                        data.serializer = :float
                      when (parser.cast == :to_f and (parser.type == :list or parser.type == :flat))
                        data.serializer = :float_array
                      end
  end

  parser.traverse(options) do |key,values|
    parser.add_to_data data, key, values
  end

  # setup the TSV
  parser.setup data

  data.unnamed = false

  data
end

.parse_header(stream, options = {}) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/rbbt/tsv.rb', line 108

def self.parse_header(stream, options = {})
  case
  when Path === stream 
    stream.open do |f|
      Parser.new f, options
    end
  when (String === stream and stream.length < 300 and (Open.exists? stream or Open.remote? stream))
    Open.open(stream) do |f|
      Parser.new f, options
    end
  else
    filename = stream.respond_to?(:filename) ? stream.filename : Misc.fingerprint(stream)
    Log.debug("Parsing header of open stream: #{filename}")
    Parser.new stream, options
  end
end

.paste_streams(streams, options = {}) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/rbbt/tsv/stream.rb', line 19

def self.paste_streams(streams, options = {})
  options = Misc.add_defaults options, :sep => "\t", :sort => true
  sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble

  out = Misc.open_pipe do |sin|

    streams = streams.collect do |stream|
      case stream
      when (defined? Step and Step) 
        stream.grace
        stream.get_stream || stream.join.path.open
      when Path
        stream.open
      when TSV::Dumper
        stream.stream
      else
        stream
      end
    end.compact

    num_streams = streams.length

    streams = streams.collect do |stream|
      sorted = Misc.sort_stream(stream)
      stream.annotate sorted if stream.respond_to? :annotate
      sorted
    end if sort

    lines         = []
    fields        = []
    sizes         = []
    key_fields    = []
    input_options = []
    empty         = []
    preambles     = []

    streams = streams.collect do |stream|
      parser = TSV::Parser.new stream, options

      lines         << parser.first_line
      empty         << stream               if parser.first_line.nil?
      key_fields    << parser.key_field
      fields        << parser.fields
      sizes         << parser.fields.length if parser.fields
      input_options << parser.options
      preambles     << parser.preamble      if TrueClass === preamble and 
                                               not parser.preamble.empty?

      parser.stream
    end

    key_field = key_fields.compact.first
    fields = fields.compact.flatten
    options = options.merge(input_options.first)
    options[:type] = :list if options[:type] == :single

    preamble_txt = case preamble
                   when TrueClass
                     preambles * "\n"
                   when String
                     preamble
                   else
                     nil
                   end

    header = TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
    sin.puts header

    empty_pos = empty.collect{|stream| streams.index stream }
    empty_pos.sort.reverse.each do |i|
      key_fields.delete_at i
      input_options.delete_at i
    end

    begin
      done_streams = []

      keys = []
      parts = []
      lines.each_with_index do |line,i|
        if line.nil?
          keys[i] = nil
          parts[i] = nil
        else
          vs = line.chomp.split(sep, -1) 
          key, *p = vs
          keys[i] = key
          parts[i] = p
        end
        sizes[i] ||= parts[i].length-1 unless parts[i].nil?
      end

      last_min = nil
      while lines.compact.any?
        min = keys.compact.sort.first
        break if min.nil?
        str = []
        keys.each_with_index do |key,i|
          case key
          when min
            str << parts[i] * sep

            begin
              line = lines[i] = begin
                                  streams[i].gets
                                rescue
                                  Log.exception $!
                                  nil
                                end
              if line.nil?
                stream = streams[i]
                keys[i] = nil
                parts[i] = nil
              else
                k, *p = line.chomp.split(sep, -1)
                raise TryAgain if k == keys[i]
                keys[i] = k
                parts[i] = p.collect{|e| e.nil? ? "" : e }
              end
            rescue TryAgain
              Log.warn "Skipping repeated key in stream #{i}: #{keys[i]}"
              retry
            end
          else
            if sizes[i] and sizes[i] > 0
              p = sep * (sizes[i]-1)
              str << p
            end
          end
        end

        values = str.inject(nil) do |acc,part| 
          if acc.nil?
            acc = part.dup
          else
            acc << sep << part
          end
          acc
        end
        text = [min, values] * sep
        sin.puts text
      end

      streams.each do |stream|
        stream.join if stream.respond_to? :join
      end
    rescue Aborted
      Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
      streams.each do |stream|
        stream.abort if stream.respond_to? :abort
      end
      raise $!
    rescue Exception
      Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
      streams.each do |stream|
        stream.abort if stream.respond_to? :abort
      end
      raise $!
    end
  end

  out
end

.pos_index(file, pos_field = nil, options = {}) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/rbbt/tsv/index.rb', line 175

def self.pos_index(file, pos_field = nil, options = {})
  pos_field ||= "Position"

  data_options = Misc.pull_keys options, :data
  filename = case
             when (String === file or Path === file)
               file
             when file.respond_to?(:filename)
               file.filename
             else
               file.object_id.to_s
             end
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticPosIndex[#{pos_field}]"

  filters = Misc.process_options options, :filters

  if filters
    filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
  end

  Persist.persist(filename, :fwt, persist_options) do
    tsv = TSV.open(file, data_options)
    if filters
      tsv.filter
      filters.each do |match, value|
        tsv.add_filter match, value
      end
    end
    tsv.pos_index(pos_field, options)
  end
end

.range_index(file, start_field = nil, end_field = nil, options = {}) ⇒ Object



246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/rbbt/tsv/index.rb', line 246

def self.range_index(file, start_field = nil, end_field = nil, options = {})
  start_field ||= "Start"
  end_field ||= "End"

  data_options = Misc.pull_keys options, :data
  filename = case
             when (String === file or Path === file)
               file
             when file.respond_to?(:filename)
               file.filename
             else
               file.object_id.to_s
             end
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"

  filters = Misc.process_options options, :filters

  if filters
    filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
  end

  Persist.persist(filename, :fwt, persist_options) do
    tsv = TSV.open(file, data_options)
    if filters
      tsv.filter
      filters.each do |match, value|
        tsv.add_filter match, value
      end
    end
 
    tsv.range_index(start_field, end_field, options)
  end
end

.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/rbbt/tsv/matrix.rb', line 4

def self.read_matrix(tsv, field_format = "ID", value_format = "Value", *others)
  tsv = TSV.open(tsv) unless TSV === tsv
  

  if others.any?
    other_tsv = tsv.slice(others)
    tsv = tsv.slice(tsv.fields - others)
  end

  key_field, *fields = tsv.all_fields
  options = tsv.options.merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil)

  options[:filename] ||= tsv.filename
  options[:identifiers] ||= tsv.identifier_files.first

  dumper = TSV::Dumper.new(options)

  dumper.init
  TSV.traverse tsv, :into => dumper do |key, values|
    [key, [fields, values]]
  end

  res = TSV.open(dumper.stream, options)
  if others.any?
    other_tsv = other_tsv.to_double
    res.attach other_tsv, :one2one => true
  else
    res
  end
end

.reorder_stream(stream, positions, sep = "\t") ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/rbbt/tsv/util.rb', line 4

def self.reorder_stream(stream, positions, sep = "\t")
  Misc.open_pipe do |sin|
    line = stream.gets
    while line =~ /^#\:/
      sin.puts line
      line = stream.gets
    end
    while line  =~ /^#/
      if Hash === positions
        new = (0..line.split(sep).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      sin.puts "#" + line.sub!(/^#/,'').strip.split(sep).values_at(*positions).compact * sep
      line = stream.gets
    end
    while line
      if Hash === positions
        new = (0..line.split(sep).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      sin.puts line.strip.split(sep).values_at(*positions) * sep
      line = stream.gets
    end
  end
end

.report(msg, obj, into) ⇒ Object



58
59
60
61
62
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 58

def self.report(msg, obj, into)
  into = into[:into] if Hash === into and into.include? :into

  Log.medium{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
end

.setup(hash, options = {}) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/tsv.rb', line 31

def self.setup(hash, options = {})
  options = Misc.add_defaults options, :default_value => [], :unnamed => TSV.unnamed
  default_value = Misc.process_options options, :default_value
  hash = Misc.array2hash(hash, default_value) if Array === hash
  hash.extend TSV

  IndiferentHash.setup(options)
  ENTRIES.each do |entry|
    hash.send("#{ entry }=", options[entry.to_s]) if options.include? entry.to_s
    hash.send("#{ entry }=", options[entry.to_sym]) if options.include? entry.to_sym
  end

  hash.unnamed = options[:unnamed]

  hash
end

.store_into(store, value) ⇒ Object



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 369

def self.store_into(store, value)
  if MultipleResult === value
    value.each do |v|
      store_into store, v
    end
    return
  end
  begin
    return false if value.nil?
    case store
    when TSV
      if store.type == :double or store.type == :flat
        case value
        when TSV, Hash
          store.merge_zip value
        else
          store.zip_new *value
        end
      else
        k,v = value
        store[k] = v
      end
    when Hash
      case value
      when TSV, Hash
        store.merge! value 
      else
        k,v = value
        store[k] = v
      end
    when TSV::Dumper
      return false if value.nil?
      store.add *value
    when IO
      return false if value.nil?
      value.strip!
      store.puts value
    else
      store << value
    end 
    true
  rescue Aborted, Interrupt
    Log.medium "Aborted storing into #{Misc.fingerprint store}"
    stream = obj_stream(store)
    stream.abort if stream.respond_to? :abort
  rescue Exception
    Log.medium "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
    stream = obj_stream(store)
    stream.abort if stream.respond_to? :abort
    raise $!
  end
end

.stream_flat2double(stream, options = {}) ⇒ Object



183
184
185
186
187
188
189
190
191
192
# File 'lib/rbbt/tsv/stream.rb', line 183

def self.stream_flat2double(stream, options = {})
  parser = TSV::Parser.new TSV.get_stream(stream)
  dumper_options = parser.options.merge(options).merge(:type => :double)
  dumper = TSV::Dumper.new dumper_options
  dumper.init
  TSV.traverse parser, :into => dumper do |key,values|
    [key, [values]]
  end
  dumper
end

.stream_name(obj) ⇒ Object



50
51
52
53
54
55
56
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 50

def self.stream_name(obj)
  return "nil" if obj.nil?
  filename_obj   = obj.respond_to?(:filename) ? obj.filename : nil
  filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
  stream_obj = obj_stream(obj) || obj
  obj.class.to_s << "-" << Misc.fingerprint(stream_obj)
end

.swap_id(tsv, field, format, options = {}, &block) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/rbbt/tsv/change_id.rb', line 48

def self.swap_id(tsv, field, format, options = {}, &block)
  options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers, :compact => true

  identifiers, persist_input, compact = Misc.process_options options, :identifiers, :persist, :compact

  fields = identifiers.all_fields.include?(field)? [field] : nil
  index = identifiers.index :target => format, :fields => fields, :persist => persist_input

  orig_type = tsv.type 
  tsv = tsv.to_double if orig_type != :double

  pos = tsv.fields.index field
  tsv.with_unnamed do
    if tsv.type == :list or tsv.type == :single
      tsv.through do |k,v|
        v[pos] = index[v[pos]]
        tsv[k] = v
      end
    else
      tsv.through do |k,v|
        _values = index.values_at(*v[pos])
        _values.compact! if compact
        v[pos] = _values
        tsv[k] = v
      end
    end
    
    tsv.fields = tsv.fields.collect{|f| f == field ? format : f}
  end

  tsv = tsv.to_flat  if orig_type == :flat

  tsv = tsv.to_list(&block)  if orig_type == :list

  tsv
end

.translate(tsv, *args) ⇒ Object



144
145
146
147
148
# File 'lib/rbbt/tsv/change_id.rb', line 144

def self.translate(tsv, *args)
  new = TSV.open translate_stream(tsv, *args)
  new.identifiers = tsv.identifiers
  new
end

.translate_stream(tsv, field, format, options = {}, &block) ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/rbbt/tsv/change_id.rb', line 150

def self.translate_stream(tsv, field, format, options = {}, &block)
  options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true

  identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
  identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?

  identifier_files.uniq!

  index = translation_index identifier_files, format, field, options.dup
  raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?

  orig_type = tsv.type 
  tsv = tsv.to_double if orig_type != :double

  pos = tsv.identify_field field

  new_options = tsv.options
  new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers

  case pos
  when :key
    new_options[:key_field] = format if tsv.key_field == field
    dumper = TSV::Dumper.new new_options
    dumper.init
    TSV.traverse tsv, :into => dumper do |key,values|
      new_key = index[key]
      [new_key, values]
    end
  else
    new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
    dumper = TSV::Dumper.new new_options
    dumper.init

    case tsv.type
    when :double
      TSV.traverse tsv, :into => dumper do |key,values|
        original = values[pos]
        new = index.values_at *original
        values[pos] = new
        [key, values]
      end
    when :list
      TSV.traverse tsv, :into => dumper do |key,values|
        original = values[pos]
        new = index[original]
        values[pos] = new
        [key, values]
      end
    when :flat
      TSV.traverse tsv, :into => dumper do |key,values|
        new = index.values_at *values
        [key, new]
      end
    when :single
      TSV.traverse tsv, :into => dumper do |key,original|
        new = index[original]
        [key, new]
      end
    end
  end

  dumper.stream
end

.translation_index(files, target = nil, source = nil, options = {}) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/rbbt/tsv/change_id.rb', line 89

def self.translation_index(files, target = nil, source = nil, options = {})
  return nil if source == target
  options = Misc.add_defaults options.dup, :persist => true
  fields = (source and not source.empty?) ? [source] : nil
  files.each do |file|
    if TSV === file
      all_fields = file.all_fields
      target = file.fields.first if target.nil?
      if (source.nil? or all_fields.include? source) and all_fields.include? target
        return file.index(options.merge(:target => target, :fields => fields, :order => true)) 
      end
    else
      all_fields = TSV.parse_header(file).all_fields
      target = all_fields[1] if target.nil?
      if (source.nil? or all_fields.include? source) and all_fields.include? target
        index = TSV.index(file, options.merge(:target => target, :fields => fields, :order => true)) 
        return index
      end
    end
  end

  files.each do |file|
    all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields 

    files.each do |other_file|
      next if file == other_file

      other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields 

      common_field = (all_fields & other_all_fields).first

      if common_field and (source.nil? or source.empty? or all_fields.include? source) and other_all_fields.include? target 

        index = Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|

          index = TSV === file ? 
            file.index(options.merge(:target => common_field, :fields => fields)) :
            TSV.index(file, options.merge(:target => common_field, :fields => fields))

          other_index = TSV === other_file ? 
            other_file.index(options.merge(:target => target, :fields => [common_field])) :
            TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))

          data.serializer = :clean
          
          # ToDo: remove the need to to the `to_list` transformation
          data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
        end
        return index
      end
    end
  end
  return nil
end

.traverse(obj, options = {}, &block) ⇒ Object



495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 495

def self.traverse(obj, options = {}, &block)
  into = options[:into]

  case into
  when :stream
    sout = Misc.open_pipe false, false do |sin|                                                                                                                                           
      begin
        traverse(obj, options.merge(:into => sin), &block)                                                                                                                                  
      rescue Exception
        sout.abort if sout.respond_to? :abort
        sout.join if sout.respond_to? :join
      end
    end                                                                                                                                                                                   
    return sout
  when :dumper
    obj_options = obj.respond_to?(:options) ? obj.options : {}
    dumper = TSV::Dumper.new obj_options.merge(options)
    dumper.init
    traverse(obj, obj_options.merge(:into => dumper), &block)                                                                                                                                  
    return dumper
  end

  threads = Misc.process_options options, :threads
  cpus = Misc.process_options options, :cpus
  threads = nil if threads and threads.to_i <= 1
  cpus = nil if cpus and cpus.to_i <= 1

  if options[:keys]
    case options[:keys]
    when TrueClass
      options[:type] = :keys
    when String
      options[:type] = :keys
      options[:key_field] = options[:keys]
      options[:fields] = []
    end
  end

  bar = Misc.process_options options, :bar
  bar ||= Misc.process_options options, :progress
  options[:bar] = case bar
                  when String
                    max = guess_max(obj)
                    Log::ProgressBar.new_bar(max, {:desc => bar}) 
                  when TrueClass
                    max = guess_max(obj)
                    Log::ProgressBar.new_bar(max, nil) 
                  when Fixnum
                    max = guess_max(obj)
                    Log::ProgressBar.new_bar(bar) 
                  when Hash
                    max = Misc.process_options(bar, :max) || max
                    Log::ProgressBar.new_bar(max, bar) 
                  else
                    bar
                  end

  if into
    bar = Misc.process_options options, :bar

    options[:join] = Proc.new do
      Log::ProgressBar.remove_bar(bar)
    end if bar

    options[:callback] = Proc.new do |e|
      begin
        store_into into, e
      rescue Aborted
        Log.medium "Aborted callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
        stream = nil
        stream = get_stream obj
        stream.abort if stream.respond_to? :abort
        raise $!
      rescue Exception
        Log.medium "Exception callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
        stream = nil
        stream = get_stream obj
        stream.abort if stream.respond_to? :abort
        raise $!
      ensure
        bar.tick if bar
      end
    end

    case into
    when TSV::Dumper, IO
      traverse_stream(obj, threads, cpus, options, &block)
    else
      traverse_run(obj, threads, cpus, options, &block)
      into.close if into.respond_to?(:close) and not (into.respond_to? :closed and into.closed?)
    end

    into
  else
    traverse_run(obj, threads, cpus, options, &block)
  end
end

.traverse_array(array, options = {}, &block) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 114

def self.traverse_array(array, options = {}, &block)
  callback, bar, join = Misc.process_options options, :callback, :bar, :join

  if callback
    array.each do |e|
      begin
        callback.call yield(e)
      ensure
        bar.tick if bar
      end
    end
  else
    array.each do |e|
      begin
        yield e
      ensure
        bar.tick if bar
      end
    end
  end
  Log::ProgressBar.remove_bar(bar) if bar
  join.call if join
end

.traverse_cpus(num, obj, options, &block) ⇒ Object



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 335

def self.traverse_cpus(num, obj, options, &block)
  begin
    callback, cleanup, join, respawn = Misc.process_options options, :callback, :cleanup, :join, :respawn

    q = RbbtProcessQueue.new num, cleanup, join, respawn
    q.callback &callback
    q.init &block

    traverse_obj(obj, options) do |*p|
      q.process *p
    end

    q.join
  rescue Interrupt, Aborted
    q.abort
    Log.medium{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
    stream = obj_stream(obj)
    stream.abort if stream.respond_to? :abort
    stream = obj_stream(options[:into])
    stream.abort if stream.respond_to? :abort
    raise "Traversal aborted"
  rescue Exception
    q.abort
    Log.medium "Exception during traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.message}"
    stream = obj_stream(obj)
    stream.abort if stream.respond_to? :abort
    stream = obj_stream(options[:into])
    stream.abort if stream.respond_to? :abort
    raise $!
  ensure
    q.clean
  end
end

.traverse_hash(hash, options = {}, &block) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 90

def self.traverse_hash(hash, options = {}, &block)
  callback, bar, join = Misc.process_options options, :callback, :bar, :join

  if callback
    hash.each do |k,v|
      begin
        callback.call yield(k,v)
      ensure
        bar.tick if bar
      end
    end
  else
    hash.each do |k,v|
      begin
        yield k,v 
      ensure
        bar.tick if bar
      end
    end
  end
  Log::ProgressBar.remove_bar(bar) if bar
  join.call if join
end

.traverse_io(io, options = {}, &block) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 177

def self.traverse_io(io, options = {}, &block)
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
  if File === io and io.closed? 
    begin
      Log.medium{"Rewinding stream #{stream_name(io)}"}
      io.reopen io.filename, "r"
    rescue
      Log.exception $!
      raise "File closed and could not reopen #{stream_name(io)}"
    end
  end

  if callback
    TSV::Parser.traverse(io, options) do |k,v|
      begin
        callback.call yield k, v
      ensure
        bar.tick if bar
      end
    end
  else
    options[:monitor] = bar
    TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
  end
  Log::ProgressBar.remove_bar(bar) if bar
  join.call if join
end

.traverse_io_array(io, options = {}, &block) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 138

def self.traverse_io_array(io, options = {}, &block)
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
  if File === io and io.closed? 
    begin
      Log.medium{"Rewinding stream #{stream_name(io)}"}
      io.reopen io.filename, "r"
    rescue
      Log.exception $!
      raise "File closed and could not reopen #{stream_name(io)}"
    end
  end

  if callback
    while line = io.gets
      if line[-1] != "\n"
        while c = io.getc
          line << c
          break if c=="\n"
        end
      end
      begin
        callback.call yield line.strip
      ensure
        bar.tick if bar
      end
    end
  else
    while line = io.gets
      begin
        yield line.strip
      ensure
        bar.tick if bar
      end
    end
  end
  Log::ProgressBar.remove_bar(bar) if bar
  join.call if join
end

.traverse_obj(obj, options = {}, &block) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 205

def self.traverse_obj(obj, options = {}, &block)
  if options[:type] == :keys
    options[:fields] = []
    options[:type] = :single
  end

  Log.medium{"Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
  begin
    case obj
    when TSV
      traverse_tsv(obj, options, &block)
    when Hash
      traverse_hash(obj, options, &block)
    when TSV::Parser
      callback = Misc.process_options options, :callback
      if callback
        obj.traverse(options) do |k,v|
          callback.call yield k, v
        end
      else
        obj.traverse(options, &block)
      end
    when IO, File, Zlib::GzipReader, Bgzf, StringIO
      begin
        if options[:type] == :array
          traverse_io_array(obj, options, &block)
        else
          traverse_io(obj, options, &block)
        end
      rescue Aborted
        obj.abort if obj.respond_to? :abort
      rescue Exception
        obj.abort if obj.respond_to? :abort
        raise $!
      ensure
        obj.close if obj.respond_to? :close and not obj.closed?
        obj.join if obj.respond_to? :join
      end
    when Path
      obj.open do |stream|
        traverse_obj(stream, options, &block)
      end
    when TSV::Dumper
      traverse_obj(obj.stream, options, &block)
    when (defined? Step and Step)

      stream = obj.get_stream

      if stream
        traverse_obj(stream, options, &block)
      else
        obj.join
        traverse_obj(obj.path, options, &block)
      end
    when Array
      traverse_array(obj, options, &block)
    when String
      if Open.remote? obj or Misc.is_filename? obj
        Open.open(obj) do |s|
          traverse_obj(s, options, &block)
        end
      else
        raise "Can not open obj for traversal #{Misc.fingerprint obj}"
      end
    when nil
      raise "Can not traverse nil object into #{stream_name(options[:into])}"
    else
      raise "Unknown object for traversal: #{Misc.fingerprint obj }"
    end
  rescue IOError
    Log.medium{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
    stream = obj_stream(obj)
    stream.abort if stream and stream.respond_to? :abort
    stream = obj_stream(options[:into])
    stream.abort if stream.respond_to? :abort
    raise $!
  rescue Errno::EPIPE
    Log.medium{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
    stream = obj_stream(obj)
    stream.abort if stream and stream.respond_to? :abort
    stream = obj_stream(options[:into])
    stream.abort if stream.respond_to? :abort
    raise $!
  rescue Aborted
    Log.medium{"Aborted traversing #{stream_name(obj)}"}
    stream = obj_stream(obj)
    stream.abort if stream and stream.respond_to? :abort
    stream = obj_stream(options[:into])
    stream.abort if stream.respond_to? :abort
    Log.medium{"Aborted traversing 2 #{stream_name(obj)}"}
  rescue Exception
    Log.medium{"Exception traversing #{stream_name(obj)}"}
    begin
      stream = obj_stream(obj)
      stream.abort if stream and stream.respond_to? :abort
      stream = obj_stream(options[:into])
      stream.abort if stream.respond_to? :abort
    rescue Exception
    ensure
      raise $!
    end
  end
end

.traverse_run(obj, threads, cpus, options = {}, &block) ⇒ Object



449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 449

def self.traverse_run(obj, threads, cpus, options = {}, &block)
  threads = nil if threads == 1
  cpus = nil if cpus == 1
  if ENV["RBBT_NO_MAP_REDUCE"] == "true" or (threads.nil? and cpus.nil?)
    traverse_obj obj, options, &block
  else
    if threads
      traverse_threads threads, obj, options, &block 
    else
      close_streams = Misc.process_options(options, :close_streams) || []
      close_streams = [close_streams] unless Array === close_streams

      close_streams.concat(get_streams_to_close(obj))
      options[:close_streams] = close_streams

      if close_streams and close_streams.any?
        options[:cleanup] = Proc.new do
          close_streams.uniq.each do |s|
            s.close unless s.closed?
          end
        end 
      end

      traverse_cpus cpus, obj, options, &block
    end
  end
end

.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block) ⇒ Object



477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 477

def self.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block)
  into = options[:into]
  thread = Thread.new(Thread.current) do |parent|
    begin
      traverse_run(obj, threads, cpus, options, &block)
      into.close if into.respond_to?(:close) and not (into.respond_to? :closed? and into.closed?)
    rescue Exception
      stream = obj_stream(obj)
      stream.abort if stream and stream.respond_to? :abort
      stream = obj_stream(into)
      stream.abort if stream and stream.respond_to? :abort
      parent.raise $!
      raise $!
    end
  end
  ConcurrentStream.setup(obj_stream(into), :threads => thread)
end

.traverse_threads(num, obj, options, &block) ⇒ Object



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 309

def self.traverse_threads(num, obj, options, &block)
  callback = Misc.process_options options, :callback

  q = RbbtThreadQueue.new num

  if callback
    block = Proc.new do |k,v,mutex|
      v, mutex = nil, v if mutex.nil?
      res = yield k, v, mutex
      mutex.synchronize do
        callback.call res 
      end
    end
  end

  q.init true, &block

  traverse_obj(obj, options) do |*p|
    q.process p
  end

  q.join
  q.clean
  nil
end

.traverse_tsv(tsv, options = {}, &block) ⇒ Object

{{{ TRAVERSE OBJECTS



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rbbt/tsv/parallel/traverse.rb', line 66

def self.traverse_tsv(tsv, options = {}, &block)
  callback, bar, join = Misc.process_options options, :callback, :bar, :join

  if callback
    tsv.through options[:key_field], options[:fields] do |k,v|
      begin
        callback.call yield(k,v)
      ensure
        bar.tick if bar
      end
    end
  else
    tsv.through options[:key_field], options[:fields] do |k,v|
      begin
        yield k,v 
      ensure
        bar.tick if bar
      end
    end
  end
  Log::ProgressBar.remove_bar(bar) if bar
  join.call if join
end

.zip_fields(list, fields = nil) ⇒ Object



489
490
491
492
493
494
495
# File 'lib/rbbt/tsv/accessor.rb', line 489

def self.zip_fields(list, fields = nil)
  return [] if list.nil? || list.empty?
  fields ||= list.fields if list.respond_to? :fields
  zipped = list[0].zip(*list[1..-1])
  zipped = zipped.collect{|v| setup_array(v, fields)} if fields 
  zipped 
end

Instance Method Details

#[](key, clean = false) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/rbbt/tsv/accessor.rb', line 229

def [](key, clean = false)
  value = super(key)
  return value if clean or value.nil?
  @serializer_module ||= self.serializer_module

  if MultipleResult === value
    res = value.collect{|v| prepare_value key, v }
    res.extend MultipleResult
    res
  else
    prepare_value key, value
  end
end

#[]=(key, value, clean = false) ⇒ Object



243
244
245
246
# File 'lib/rbbt/tsv/accessor.rb', line 243

def []=(key, value, clean = false)
  return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module 
  super(key, @serializer_module.dump(value))
end

#add_field(name = nil) ⇒ Object



603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
# File 'lib/rbbt/tsv/manipulate.rb', line 603

def add_field(name = nil)
  old_monitor = @monitor
  @monitor = {:desc => "Adding field #{ name }"} if TrueClass === monitor

  through do |key, values|
    new_values = yield(key, values)
    new_values = [new_values] if type == :double and not Array === new_values

    case
    when (values.nil? and (fields.nil? or fields.empty?))
      values = [new_values]
    when values.nil?  
      values = [nil] * fields.length + [new_values]
    when Array === values
      values += [new_values]
    else
      values << new_values
    end

    self[key] = values
  end
  @monitor = old_monitor

  if not fields.nil? and not name.nil?
    new_fields = self.fields + [name]
    self.fields = new_fields
  end

  self
end

#all_fieldsObject



528
529
530
531
# File 'lib/rbbt/tsv/accessor.rb', line 528

def all_fields
  return nil if key_field.nil? or fields.nil?
  [key_field] + fields
end

#annotate(tsv) ⇒ Object



17
18
19
# File 'lib/rbbt/tsv/accessor.rb', line 17

def annotate(tsv)
  TSV.setup(tsv, info)
end

#attach(other, options = {}) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/rbbt/tsv/attach.rb', line 177

def attach(other, options = {})
  options      = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  fields, one2one = Misc.process_options options, :fields, :one2one
  in_namespace = options[:in_namespace]

  unless TSV === other
    other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
    other = TSV.open(other, :persist => options[:persist_input] == true)
    other.identifiers ||= other_identifier_file
  end

  fields = other.fields - [key_field].concat(self.fields) if fields.nil?  or fields == :all 
  if in_namespace
    fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
  else
    fields = other.fields - [key_field].concat(self.fields) if fields.nil?
  end

  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")

  case
  when key_field == other.key_field 
    Log.debug "Attachment with same key: #{other.key_field}"
    attach_same_key other, fields
  when (not in_namespace and self.fields.include?(other.key_field))
    Log.debug "Found other key field: #{other.key_field}"
    attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
    Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
    attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
  else
    index = TSV.find_traversal(self, other, options)
    raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
    Log.debug "Attachment with index: #{other.key_field}"
    attach_index other, index, fields
  end
  Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")

  self
end

#attach_index(other, index, fields = nil) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/rbbt/tsv/attach/util.rb', line 130

def attach_index(other, index, fields = nil)
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
  fields = [fields] unless Array === fields

  other = other.tsv unless TSV === other
  field_positions = fields.collect{|field| other.identify_field field}
  field_names     = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }

  length = self.fields.length
  other.with_unnamed do
    index.with_unnamed do
      with_unnamed do
        through do |key, values|
          source_keys = index[key]
          source_keys = [source_keys] unless Array === source_keys
          if source_keys.nil? or source_keys.empty?
            all_new_values = []
          else
            all_new_values = []
            source_keys.each do |source_key|
              next unless other.include? source_key
              new_values = field_positions.collect do |pos|
                if pos == :key
                  if other.type == :double
                    [source_key]
                  else
                    source_key
                  end
                else
                  if other.type == :flat
                    other[source_key]
                  else
                    other[source_key][pos]
                  end
                end
              end
              new_values.collect!{|v| v.nil? ? [[]] : [v]}    if     type == :double and not other.type == :double
              new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and     other.type == :double
              new_values.flatten! if type == :flat
              all_new_values << new_values
            end
          end

          if all_new_values.empty?
            if type == :double
              all_new_values = [[[]] * field_positions.length]
            else
              all_new_values = [[nil] * field_positions.length]
            end
          end

          current = self[key] || [[]] * fields.length

          current = [current] unless Array === current

          if current.length > length
            all_new_values << current.slice!(length..current.length - 1)
          end

          if type == :double
            all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
          else
            all_new_values = all_new_values.first
          end

          current += all_new_values

          self[key].replace current
        end
      end
    end
  end

  self.type = :list if self.type == :single

  self.fields = self.fields.concat field_names
end

#attach_same_key(other, fields = nil) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/rbbt/tsv/attach/util.rb', line 3

def attach_same_key(other, fields = nil)
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?

  fields = [fields].compact unless Array === fields

  field_positions = fields.collect{|field| other.identify_field field}
  other.with_unnamed do
    with_unnamed do
      through do |key, values|
        self[key] = [] if self[key].nil?
        current = self[key]
        current = [current] unless Array === current
        if other.include? key
          case
          when other.type == :flat
            if type == :flat
              new_values = other[key]
            else
              new_values = [other[key]]
            end
          when other.type == :single
            new_values = [other[key]]
          else
            other_values = other[key] || [nil] * other.fields.length
            new_values = field_positions.collect do |pos|
              pos == :key ? key : other_values[pos]
            end
          end

          new_values.collect!{|v| [v]}     if     type == :double and not other.type == :double
          new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and     other.type == :double

          new_values.flatten if type == :flat

          self[key] = current + new_values
        else
          if type == :double
            self[key] = current + [[]] * fields.length
          else
            self[key] = current + [nil] * fields.length
          end
        end
      end
    end
  end

  self.type = :list if self.type == :single

  self.fields = self.fields.concat fields

  self
end

#attach_source_key(other, source, options = {}) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/rbbt/tsv/attach/util.rb', line 56

def attach_source_key(other, source, options = {})
  fields = Misc.process_options options, :fields
  one2one = Misc.process_options options, :one2one

  fields = other.fields - [key_field].concat(self.fields) if fields.nil?

  other = other.tsv(:persistence => :no_create) unless TSV === other
  field_positions = fields.collect{|field| other.identify_field field}
  field_names     = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }

  source_pos = identify_field source

  other.with_unnamed do
    with_unnamed do
      through do |key, values|
        source_keys = values[source_pos]

        case
        when (source_keys.nil? or (Array === source_keys and source_keys.empty?))
          if type == :double
            self[key] = values.concat field_positions.collect{|v| []}
          else
            self[key] = values.concat [nil] * field_positions
          end
        when Array === source_keys
          all_new_values = source_keys.collect do |source_key|
            positions = field_positions.collect do |pos|
              if pos == :key
                [source_key]
              else
                if other.include? source_key
                  v = other[source_key][pos]
                  Array === v ? v : [v]
                else
                  [nil]
                end
              end
            end

            positions.collect!{|v| v[0..0]} if one2one
            positions
          end

          new = Misc.zip_fields(all_new_values).each{|field_entry|
            field_entry.flatten!
          }

          self[key] = values.concat new
        else
          source_key = source_keys
          all_new_values = field_positions.collect do |pos|
            if pos == :key
              source_key
            else
              if other.include? source_key
                v = other[source_key][pos]
                Array === v ? v.first : v
              else
                nil
              end
            end
          end

          self[key] = values.concat all_new_values
        end

      end
    end
  end

  self.fields = self.fields.concat field_names
  self
end

#change_key(format, options = {}, &block) ⇒ Object



43
44
45
46
# File 'lib/rbbt/tsv/change_id.rb', line 43

def change_key(format, options = {}, &block)
  options = Misc.add_defaults options, :identifiers => self.identifiers
  TSV.change_key(self, format, options, &block)
end

#chunked_values_at(keys, max = 5000) ⇒ Object



366
367
368
369
370
371
372
# File 'lib/rbbt/tsv/accessor.rb', line 366

def chunked_values_at(keys, max = 5000)
  Misc.ordered_divide(keys, max).inject([]) do |acc,c|
    new = self.values_at(*c)
    new.annotate acc if new.respond_to? :annotate and acc.empty?
    acc.concat(new)
  end
end

#closeObject



97
98
99
100
101
102
103
# File 'lib/rbbt/tsv/accessor.rb', line 97

def close
  begin
    super
  rescue Exception
    self
  end
end

#collectObject



323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/rbbt/tsv/accessor.rb', line 323

def collect
  serializer_module = self.serializer_module
  super do |key, value|
    next if ENTRY_KEYS.include? key

    # TODO Update this to be more efficient
    value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module

    # Annotated with Entity and NamedArray
    if not @unnamed
      if not fields.nil? 
        case type
        when :double, :list
          setup_array value, fields, key, entity_options if Array === value 
        when :flat, :single
          value = prepare_entity(value, fields.first, entity_options)
        end
      end
      key = prepare_entity(key, key_field, entity_options)
    end

    if block_given?
      yield key, value
    else
      [key, value]
    end
  end
end

#column(field, cast = nil) ⇒ Object



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
# File 'lib/rbbt/tsv/manipulate.rb', line 511

def column(field, cast = nil)
  new = slice(field)

  new.with_unnamed do
    new.each do |k,v|
      nv = v.first 
      nv = nv.send(cast) unless cast.nil?
      new[k] = nv
    end
  end

  case type
  when :double, :flat
    new.type = :flat
  else
    new.type = :single
  end

  new
end

#column_values(field, options = {}) ⇒ Object



532
533
534
535
536
537
538
539
# File 'lib/rbbt/tsv/manipulate.rb', line 532

def column_values(field, options = {})
  all = []
  through :key, field do |k,values|
    values = Array === values ? values.flatten : [values]
    all.concat value
  end
  prepare_entity(all, field, options = {})
end

#detach(file) ⇒ Object



219
220
221
222
223
224
# File 'lib/rbbt/tsv/attach.rb', line 219

def detach(file)
  file_fields = file.fields.collect{|field| field.fullname}
  detached_fields = []
  self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
  reorder :key, detached_fields
end

#dump_entry_value(value) ⇒ Object



149
150
151
152
# File 'lib/rbbt/tsv/accessor.rb', line 149

def dump_entry_value(value)
  return value unless respond_to? :persistence_path
  (value.nil? or value == SERIALIZED_NIL) ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)
end

#dumper_stream(keys = nil, no_options = false) ⇒ Object



548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# File 'lib/rbbt/tsv/accessor.rb', line 548

def dumper_stream(keys = nil, no_options = false)
  TSV::Dumper.stream self do |dumper|
    dumper.init unless no_options
    begin
      if keys
        keys.each do |key|
          dumper.add key, self[key]
        end
      else
        with_unnamed do
          each do |k,v|
            dumper.add k, v
          end
        end
      end
    rescue Exception
      Log.exception $!
      raise $!
    end
    dumper.close
  end
end

#eachObject



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/rbbt/tsv/accessor.rb', line 295

def each
  fields = self.fields

  serializer_module = self.serializer_module
  super do |key, value|
    next if ENTRY_KEYS.include? key

    # TODO Update this to be more efficient
    value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module

    # Annotated with Entity and NamedArray
    if not @unnamed
      if not fields.nil? 
        case type
        when :double, :list
          setup_array value, fields, key, entity_options, entity_templates if Array == value
        when :flat, :single
          prepare_entity(value, fields.first, entity_options)
        end
      end
      key = prepare_entity(key, key_field, entity_options)
    end

    yield key, value if block_given?
    [key, value]
  end
end

#empty?Boolean

Returns:

  • (Boolean)


207
208
209
# File 'lib/rbbt/tsv/accessor.rb', line 207

def empty?
  length == 0
end

#excel(filename, options = {}) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/rbbt/tsv/excel.rb', line 49

def excel(filename, options ={})
  name = Misc.process_options options, :name
  sort_by = Misc.process_options options, :sort_by
  sort_by_cast = Misc.process_options options, :sort_by_cast

  book = Spreadsheet::Workbook.new
  sheet1 = book.create_worksheet 
  sheet1.row(0).concat all_fields
  i = 1
  if sort_by
    if sort_by_cast
      data = self.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = self.sort_by sort_by
    end
  else
    data = self
  end

  data.through do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        cells.push v * ", "
      else
        cells.push v
      end
    end

    sheet1.row(i).concat cells
    i += 1
  end
  book.write filename
end

#field_index(field) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/tsv/field_index.rb', line 13

def field_index(field)
  @field_indices ||= {}
  @field_indices[field] ||= Persist.persist_tsv(self, filename, {:field => field}, :prefix => "FieldIndex", :dir => TSV.field_index_dir, :persist => true, :serializer => :list, :engine => "BDB" ) do |data|
    tsv = {}
    case type 
    when :single, :list
      through :key, [field] do |key, values|
        value = values.first
        tsv[value] ||= []
        tsv[value] << key
      end
    else
      through :key, [field] do |key, values|
        values.first.each do |value|
          tsv[value] ||= []
          tsv[value] << key
        end
      end
    end

    tsv.each do |v,keys|
      data[v] = keys.sort
    end

    data
  end
end

#field_index_select(matches) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rbbt/tsv/field_index.rb', line 41

def field_index_select(matches)
  final = nil
  matches.each do |field,values|
    i = field_index(field)

    if Array === values
      keys = values.inject([]){|acc,value| m = i[value]; acc = m.nil? ? acc : Misc.merge_sorted_arrays( acc, m) }
    else
      keys = i[values] || []
    end

    final = final.nil? ? keys : Misc.intersect_sorted_arrays(final, keys)
  end
  final
end

#fieldsObject



467
468
469
470
471
472
473
474
475
# File 'lib/rbbt/tsv/accessor.rb', line 467

def fields
  #@fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
  @fields ||= load_entry_value(self.send(:[], "__tsv_hash_fields", :entry_key))
  if true or @fields.nil? or @unnamed
    @fields
  else
    @named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
  end
end

#fields=(value) ⇒ Object



482
483
484
485
486
487
# File 'lib/rbbt/tsv/accessor.rb', line 482

def fields=(value)
  clean = true
  self.send(:[]=, "__tsv_hash_fields", dump_entry_value(value), clean)
  @fields = value
  @named_fields = nil
end

#filter(filter_dir = nil) ⇒ Object



280
281
282
283
284
285
# File 'lib/rbbt/tsv/filter.rb', line 280

def filter(filter_dir = nil)
  self.extend Filtered
  self.filter_dir = filter_dir
  self.filters = []
  self
end

#head(times = 10) ⇒ Object



607
608
609
610
611
612
613
614
615
# File 'lib/rbbt/tsv/accessor.rb', line 607

def head(times=10)
  stream = dumper_stream
  str = ""
  times.times do |i|
    break if stream.eof?
    str << stream.gets
  end
  str
end

#identifier_filesObject



497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
# File 'lib/rbbt/tsv/accessor.rb', line 497

def identifier_files
  case
  when (identifiers and TSV === identifiers)
    [identifiers]
  when (identifiers and Array === identifiers)
    case
    when (TSV === identifiers.first or identifiers.empty?)
      identifiers
    else
      identifiers.collect{|f| Path === f ? f : Path.setup(f)}
    end
  when identifiers
    [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
  when Path === filename
    filename.identifier_files
  when filename
    Path.setup(filename.dup).identifier_files
  else
    []
  end
end

#identify_field(field) ⇒ Object



180
181
182
# File 'lib/rbbt/tsv/util.rb', line 180

def identify_field(field)
  TSV.identify_field(key_field, fields, field)
end

#index(options = {}) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rbbt/tsv/index.rb', line 12

def index(options = {})
  options = Misc.add_defaults options, 
    :target => :key, :fields => nil, :type => :single, :order => false

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "Index[#{options[:target] || :key}]"

  Log.debug "Index: #{ filename } - #{options.inspect}"
  Persist.persist_tsv self, filename, options, persist_options do |new|
    with_unnamed do
      target, fields, index_type, order = Misc.process_options options, :target, :fields, :type, :order

      new.serializer = index_type if new.respond_to? :serializer and new.serializer == :type

      if order

        # Maybe best to do the stuff in memory first instead of the original
        # object, which could be persisted
        save = new
        new = {} 

        new_key_field, new_fields = through target, fields, true do |key, values|
          next if key.empty? 
          case type
          when :single
            values = [values]
            values.unshift key
          when :double
            values = values.dup
            values.unshift [key]
          when :list, :flat
            values = values.dup
            values.unshift key
          end

          values.each_with_index do |list, i|
            list = [list] unless type == :double

            list.uniq.each do |value|
              if new.include? value
                new_value = new[value]
              else
                new_value = []
              end

              if new_value[i].nil?
                new_value[i] =  key
              else
                new_value[i] += "|" <<  key 
              end
              new[value] = new_value
            end
          end
        end

        # Update original object
        new.each do |key, values|
          case
          when index_type == :double
            save[key] = [values.compact.collect{|v| v.split "|"}.flatten.uniq]
          when index_type == :flat
            save[key] = values.compact.collect{|v| v.split "|"}.flatten.uniq
          when index_type == :single
            save[key] = values.compact.collect{|v| v.split "|"}.flatten.first
          end
        end

        new = save
      else
        new_key_field, new_fields = through target, fields, true do |key, values|
          case
          when type == :single
            values = [values]
          when type == :double
            values = values.flatten
          else
            values = values.dup
          end

          values.unshift key

          values.uniq.each do |value|
            case index_type
            when :double
              if not new.include? value
                new[value] = [[key]]
              else
                current = new[value]
                current[0] << key
                new[value] = current
              end
            when :flat
              if not new.include? value
                new[value] = [key]
              else
                current = new[value]
                current << key
                new[value] = current
              end

            else
              new[value] = key unless new.include? value
            end
          end
        end
      end

      TSV.setup(new, :type => index_type, :filename => filename, :fields => [new_key_field], :key_field => new_fields * ", ")
    end
  end
end

#infoObject



13
14
15
# File 'lib/rbbt/tsv/accessor.rb', line 13

def info
  {:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
end

#keysObject



272
273
274
275
276
277
# File 'lib/rbbt/tsv/accessor.rb', line 272

def keys
  keys = super - ENTRY_KEYS.to_a
  return keys if @unnamed or key_field.nil?

  prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
end

#lengthObject



356
357
358
# File 'lib/rbbt/tsv/accessor.rb', line 356

def length
  keys.length
end

#load_entry_value(value) ⇒ Object



144
145
146
147
# File 'lib/rbbt/tsv/accessor.rb', line 144

def load_entry_value(value)
  return value unless respond_to? :persistence_path
  (value.nil? or value == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(value)
end

#marshal_dumpObject



297
298
299
# File 'lib/rbbt/tsv/util.rb', line 297

def marshal_dump
  [info, to_hash]
end

#matrix_melt(*args) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/rbbt/tsv/matrix.rb', line 35

def matrix_melt(*args)
  tsv = TSV.read_matrix(self, *args)

  melt = Association.index tsv, :persist => false, :recycle => true
  source_field,_sep,target_field = melt.key_field.partition "~"
  melt.add_field source_field do |k,v|
    k.partition("~").first
  end
  melt.add_field target_field do |k,v|
    k.partition("~").last
  end
  melt
end

#melt(header_field, *info_fields, &block) ⇒ Object



21
22
23
# File 'lib/rbbt/tsv/melt.rb', line 21

def melt(header_field, *info_fields, &block)
  TSV.melt self, key_field, header_field, fields, *info_fields, &block
end

#merge_different_fields(other, options = {}) ⇒ Object



226
227
228
229
230
231
232
233
234
# File 'lib/rbbt/tsv/attach.rb', line 226

def merge_different_fields(other, options = {})
  TmpFile.with_file do |output|
    TSV.merge_different_fields(self, other, output, options)
    tsv = TSV.open output, options
    tsv.key_field = self.key_field unless self.key_field.nil?
    tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
    tsv
  end
end

#merge_zip(other) ⇒ Object



236
237
238
239
240
# File 'lib/rbbt/tsv/attach.rb', line 236

def merge_zip(other)
  other.each do |k,v|
    self.zip_new k, v
  end
end

#namespace=(value) ⇒ Object



477
478
479
480
# File 'lib/rbbt/tsv/accessor.rb', line 477

def namespace=(value)
  self.send(:[]=, "__tsv_hash_namespace", dump_entry_value(value), true)
  @namespace = value
end

#optionsObject



519
520
521
522
523
524
525
# File 'lib/rbbt/tsv/accessor.rb', line 519

def options
  options = {}
  ENTRIES.each do |entry|
    options[entry.to_sym] = self.send(entry)
  end
  IndiferentHash.setup options
end

#page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object

Starts in page 1



452
453
454
455
456
457
458
459
460
461
462
463
464
# File 'lib/rbbt/tsv/accessor.rb', line 452

def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
  pstart = psize * (pnum - 1)
  pend = psize * pnum - 1
  field = :key if field == "key"
  keys = sort_by(field || :key, true, &block)
  keys.reverse! if reverse

  if just_keys
    keys[pstart..pend]
  else
    select :key => keys[pstart..pend]
  end
end

#pos_index(pos_field = nil, options = {}) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/rbbt/tsv/index.rb', line 138

def pos_index(pos_field = nil, options = {})
  pos_field ||= "Position"

  options = Misc.add_defaults options,
    :persist => false, :persist_file => nil, :persist_update => false 

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "PosIndex[#{pos_field}]"

  Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do 
    max_key_size = 0
    index_data = []
    with_unnamed do
      with_monitor :desc => "Creating Index Data", :step => 10000 do
        through :key, pos_field do |key, values|
          key_size = key.length
          max_key_size = key_size if key_size > max_key_size

          pos = values.first
          if Array === pos
            pos.each do |p|
              index_data << [key, p.to_i]
            end
          else
            index_data << [key, pos.to_i]
          end
        end
      end
    end

    index = FixWidthTable.get(:memory, max_key_size, false)
    index.add_point index_data
    index.read
    index
  end
end

#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/rbbt/tsv/parallel/through.rb', line 23

def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)

  q = RbbtProcessQueue.new num_procs

  q.callback &@ppthrough_callback
  @ppthrough_callback = nil

  q.init do |k,v|
    block.call k,v
  end

  begin
    res = through(new_key_field, new_fields, uniq, zipped) do |*p|
      q.process q
    end
    q.join
  ensure
    q.clean
  end

  res
end

#ppthrough_callback(&block) ⇒ Object



19
20
21
# File 'lib/rbbt/tsv/parallel/through.rb', line 19

def ppthrough_callback(&block)
  @ppthrough_callback = block
end

#prepare_entity(entity, field, options = {}) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/tsv/accessor.rb', line 43

def prepare_entity(entity, field, options = {})
  return entity if entity.nil?
  return entity unless defined? Entity
  entity = entity if options.delete :dup_array
  if (template = entity_templates[field]) and template.respond_to?(:annotate)
    if String === entity or Array === entity
      entity = entity.dup if entity.frozen? 
      template.annotate entity
      entity.extend AnnotatedArray if Array === entity
    end
    entity
  else
    if entity_templates.include? field
      entity
    else
      template = Misc.prepare_entity("TEMPLATE", field, options)
      if template.respond_to?(:annotate)
        entity_templates[field] = template
        if String === entity or Array === entity
          entity = entity.dup if entity.frozen? 
          template.annotate entity
          entity.extend AnnotatedArray if Array === entity
        end
        entity
      else
        entity_templates[field] = nil
        entity
      end
    end
  end
end

#prepare_value(key, value) ⇒ Object

{{{ GETTERS AND SETTERS



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/rbbt/tsv/accessor.rb', line 213

def prepare_value(key, value)
  value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module

  return value if @unnamed or fields.nil?

  case type
  when :double, :list
    setup_array value, fields, key, entity_options, entity_templates
  when :flat, :single
    begin value = value.dup; rescue; end if value.frozen?

    value = prepare_entity(value, fields.first, entity_options)
  end
  value
end

#process(field, &block) ⇒ Object



558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
# File 'lib/rbbt/tsv/manipulate.rb', line 558

def process(field, &block)
  field_pos = identify_field field

  through do |key, values|

    case
    when type == :single
      field_values = values
    when type == :flat
      field_values = values
    else
      next if values.nil?
      field_values = values[field_pos]
    end

    new_values = case 
                 when block.arity == 1
                   yield(field_values)
                 when block.arity == 2
                   yield(field_values, key)
                 when block.arity == 3
                   yield(field_values, key, values)
                 else
                   raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
                 end

    case
    when type == :single
      self[key] = new_values
    when type == :flat
      self[key] = new_values
    else
      if (String === values[field_pos] and String === new_values) or
        (Array === values[field_pos] and Array === new_values) 
         values[field_pos].replace new_values
      else
        values[field_pos] = new_values
      end
      self[key] = values
    end
  end

  self
end

#process_key(&block) ⇒ Object



542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'lib/rbbt/tsv/manipulate.rb', line 542

def process_key(&block)
  new = annotate({})
  through do |key, values|
    key = case 
          when block.arity == 1
            yield(key)
          when block.arity == 2
            yield(key, values)
          else
            raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
          end
    new[key] = values
  end
  new
end

#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/rbbt/tsv/parallel/through.rb', line 3

def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
  q = RbbtThreadQueue.new num_threads

  q.init(true, &block)

  begin
    res = through(new_key_field, new_fields, uniq, zipped) do |*p|
      q.process p
    end
    q.join
  ensure
    q.clean
  end

end

#R(script, source = nil, open_options = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/rbbt/util/R.rb', line 120

def R(script, source = nil, open_options = {})
  open_options, source = source, nil if Hash === source

  source ||= Misc.process_options open_options, :source
  source = [source] if String === source

  require_sources  = source.collect{|source|
    "source('#{source}');"
  } * ";\n" if Array === source and source.any?

  script = require_sources + "\n\n" + script if require_sources

  r_options = Misc.pull_keys open_options, :R
  r_options[:debug] = true if r_options[:method] == :debug
  if r_options.delete :debug
    r_options[:monitor] = true
    r_options[:method] = :shell
    erase = false
  else
    erase = true
  end

  tsv_R_option_str = r_options.delete :open
  tsv_R_option_str = ", "  + tsv_R_option_str if String === tsv_R_option_str and not tsv_R_option_str.empty?

  raw = open_options.delete :raw
  TmpFile.with_file nil, erase do |f|
    Open.write(f, self.to_s)

    script = <<-EOF
## Loading tsv into data
data = rbbt.tsv('#{f}'#{tsv_R_option_str});

#{script.strip}

## Resaving data
if (! is.null(data)){ rbbt.tsv.write('#{f}', data); }
NULL
    EOF


    case r_options.delete :method
    when :eval
      R.eval_run script
    else 
      R.run script, r_options
    end

    open_options = Misc.add_defaults open_options, :type => :list
    if raw
      Open.read(f)
    else
      tsv = TSV.open(f, open_options) unless open_options[:ignore_output]
      tsv.key_field = open_options[:key] if open_options.include? :key
      tsv.namespace ||= self.namespace if self.namespace
      tsv
    end
  end
end

#R_console(pre_script = nil) ⇒ Object



192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/util/R.rb', line 192

def R_console(pre_script = nil)
  TmpFile.with_file do |f|
    Log.debug{"R Console:\n" << pre_script } if pre_script
    TmpFile.with_file(pre_script) do |script_file|
      Open.write(f, self.to_s)
      script = "data_file = '#{f}';\n"
      script <<  "\n#\{{{Pre-script:\n\n" << pre_script << "\n#}}}Pre-script\n\n"
      R.console(script)
    end
  end
end

#R_interactive(pre_script = nil) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
# File 'lib/rbbt/util/R.rb', line 180

def R_interactive(pre_script = nil)
  TmpFile.with_file do |f|
    Log.debug{"R Interactive:\n" << pre_script } if pre_script
    TmpFile.with_file(pre_script) do |script_file|
      Open.write(f, self.to_s)
      script = "data_file = '#{f}';\n"
      script << "script_file = '#{script_file}';\n" if pre_script
      R.interactive(script)
    end
  end
end

#range_index(start_field = nil, end_field = nil, options = {}) ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/rbbt/tsv/index.rb', line 208

def range_index(start_field = nil, end_field = nil, options = {})
  start_field ||= "Start"
  end_field ||= "End"

  options = Misc.add_defaults options,
    :persist => false, :persist_file => nil, :persist_update => false 

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"

  Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do 
    max_key_size = 0
    index_data = []
    with_unnamed do
      with_monitor :desc => "Creating Index Data", :step => 10000 do
        through :key, [start_field, end_field] do |key, values|
          key_size = key.length
          max_key_size = key_size if key_size > max_key_size

          start_pos, end_pos = values
          if Array === start_pos
            start_pos.zip(end_pos).each do |s,e|
              index_data << [key, [s.to_i, e.to_i]]
            end
          else
            index_data << [key, [start_pos.to_i, end_pos.to_i]]
          end
        end
      end
    end

    index = FixWidthTable.get(:memory, max_key_size, true)
    index.add_range index_data
    index.read
    index
  end
end

#read(force = false) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'lib/rbbt/tsv/accessor.rb', line 105

def read(force = false)
  begin
    super
  rescue Exception
    Log.exception $!
    @writable = false
    self
  end
end

#rename_field(field, new) ⇒ Object



184
185
186
187
# File 'lib/rbbt/tsv/util.rb', line 184

def rename_field(field, new)
  self.fields = self.fields.collect{|f| f == field ? new : f }
  self
end

#reorder(new_key_field = nil, new_fields = nil, options = {}) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/rbbt/tsv/manipulate.rb', line 234

def reorder(new_key_field = nil, new_fields = nil, options = {}) 
  zipped, uniq = Misc.process_options options, :zipped, :uniq

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] = "Reorder"

  Persist.persist_tsv self, self.filename, {:key_field => new_key_field, :fields => new_fields}, persist_options do |data|
    if data.respond_to? :persistence_path
      real_data = data 
      data = {}
    end

    new_key_field_name, new_field_names = nil, nil
    with_unnamed do
      if zipped or (type != :double and type != :flat)
        new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
          data[key] = value.clone if Array === value
        end
      else
        case type 
        when :double
          new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
            if data[key] 
              current = data[key].dup
              value.each_with_index do |v, i|
                if current[i]
                  current[i] += v if v
                else
                  current[i] = v || []
                end
              end
              data[key] = current 
            else
              data[key] = value.collect{|v| v.nil? ? nil : v.dup}
            end
          end
        when :flat
          new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
            data[key] ||= []
            data[key] += value
          end
        end
      end
    end

    if real_data and real_data.respond_to? :persistence_path
      real_data.serializer = type if real_data.respond_to? :serializer
      real_data.merge!(data)
      data = real_data
    end

    data.extend TSV unless TSV === data
    self.annotate(data)

    data.key_field = new_key_field_name
    data.fields = new_field_names
    data.fields.each do |field|
      data.entity_templates[field] = entity_templates[field] if entity_templates.include? field
    end
    data.type = zipped ? :list : type
  end
end

#reset_filtersObject



287
288
289
290
291
292
293
294
295
296
# File 'lib/rbbt/tsv/filter.rb', line 287

def reset_filters
  if @filter_dir.nil? or @filter_dir.empty?
    @filters.each do |filter| filter.reset end if Array === @filters
    return
  end

  Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
    FileUtils.rm f
  end
end

#select(method = nil, invert = false, &block) ⇒ Object



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
# File 'lib/rbbt/tsv/manipulate.rb', line 324

def select(method = nil, invert = false, &block)
  new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers)

  new.key_field = key_field
  new.fields    = fields.dup unless fields.nil?
  new.type      = type
  new.filename  = filename
  new.namespace = namespace
  new.entity_options = entity_options
  new.entity_templates = entity_templates
  
 case
  when (method.nil? and block_given?)
    through do |key, values|
      new[key] = values if invert ^ (yield key, values)
    end
  when Array === method
    method = Set.new method
    with_unnamed do
      case type
      when :single
        through do |key, value|
          new[key] = value if invert ^ (method.include? key or method.include? value)
        end
      when :list, :flat
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values).any?)
        end
      else
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values.flatten).any?)
        end
      end
    end
  when Regexp === method
    with_unnamed do
      through do |key, values|
        new[key] = values if invert ^ ([key,values].flatten.select{|v| v =~ method}.any?)
      end
    end
  when String === method
    if block_given?
      case 
      when block.arity == 1
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(value))
            end
          else
            pos = identify_field method
            raise "Field #{ method } not identified. Available: #{ fields * ", " }" if pos.nil?

            through do |key, values|
              new[key] = values if invert ^ (yield(values[pos]))
            end
          end
        end
      when block.arity == 2
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key, key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(key, value))
            end
          else
            pos = identify_field method
            through do |key, values|
              new[key] = values if invert ^ (yield(key, values[pos]))
            end
          end

        end
      end

    else
      with_unnamed do
        through do |key, values|
          new[key] = values if invert ^ ([key,values].flatten.select{|v| v == method}.any?)
        end
      end
    end
  when Hash === method
    key  = method.keys.first
    method = method.values.first
    case
    when (Array === method and (key == :key or key_field == key))
      with_unnamed do
        Annotated.purge(method).each{|key| 
          new[key] = self[key] if invert ^ (self.include? key)
        }
      end
    when Array === method
      with_unnamed do
        method = Set.new method unless Set === method
        case type
        when :single
          through :key, key do |key, value|
            new[key] = self[key] if invert ^ (method.include? value)
          end
        when :list
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ (method.include? values.first)
          end
        when :flat #untested
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        else
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        end
      end

    when Regexp === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v =~ method}.any?)
        end
      end

    when (String === method and method =~ /name:(.*)/)
      name = $1
      old_unnamed = self.unnamed
      self.unnamed = false
      if name.strip =~ /^\/(.*)\/$/
        regexp = Regexp.new $1
        through :key, key do |key, values|
          case type
          when :single
            values = values.annotate([values])
          when :double
            values = values[0]
          end
          new[key] = self[key] if invert ^ (values.select{|v| v.name =~ regexp}.any?)
        end
      else
        through :key, key do |key, values|
          case type
          when :single
            values = values.annotate([values])
          when :double
            values = values[0]
          end
          new[key] = self[key] if invert ^ (values.select{|v| v.name == name}.any?)
        end
      end
      self.unnamed = old_unnamed

    when String === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v == method}.any?)
        end
      end

    when Fixnum === method
      with_unnamed do
        through :key, key do |key, values|
          new[key] = self[key] if invert ^ (values.flatten.length >= method)
        end
      end
    when Proc === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| method.call(v)}.any?)
        end
      end
    end
  end

  new
end

#serializer=(serializer) ⇒ Object



191
192
193
194
195
# File 'lib/rbbt/tsv/accessor.rb', line 191

def serializer=(serializer)
  @serializer = serializer
  self.send(:[]=, KEY_PREFIX + 'serializer', dump_entry_value(serializer), :entry_key)
  @serializar_module = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
end

#setup_array(*args) ⇒ Object



75
76
77
78
79
# File 'lib/rbbt/tsv/accessor.rb', line 75

def setup_array(*args)
  res = NamedArray.setup(*args)
  res.instance_variable_set(:@entity_templates, entity_templates)
  res
end

#sizeObject



352
353
354
# File 'lib/rbbt/tsv/accessor.rb', line 352

def size
  super - ENTRY_KEYS.select{|k| self.include? k}.length
end

#slice(fields) ⇒ Object



297
298
299
# File 'lib/rbbt/tsv/manipulate.rb', line 297

def slice(fields)
  reorder :key, fields
end

#sort(*fields) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/rbbt/tsv/manipulate.rb', line 301

def sort(*fields)
  fields = nil if fields.empty?

  elems = []
  through :key, fields do |key, value|
    elems << case
    when block_given?
      [key, yield(*value)]
    else
      case
      when type == :single
        [key, value]
      when type == :double
        [key, value.first.first]
      else
        [key, value.first]
      end
    end
  end

  elems.sort_by{|k,v| v}.collect{|k,v| k}
end

#sort_by(field = nil, just_keys = false, &block) ⇒ Object

{{{ Sorting



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
# File 'lib/rbbt/tsv/accessor.rb', line 376

def sort_by(field = nil, just_keys = false, &block)
  field = :all if field.nil?

  if field == :all
    elems = collect
  else
    elems = []
    case type
    when :single
      through :key, field do |key, field|
        elems << [key, field]
      end
    when :list, :flat
      through :key, field do |key, fields|
        elems << [key, fields.first]
      end
    when :double
      through :key, field do |key, fields|
        elems << [key, fields.first]
      end
    end
  end

  if not block_given?
    if fields == :all
      if just_keys
        keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
        keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
      else
        elems.sort_by{|key, value| key }
      end
    else
      sorted = elems.sort do |a, b| 
        a_value = a.last
        b_value = b.last
        case
        when ((a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)) and (b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)))
          0
        when (a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?))
          -1
        when (b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?))
          1
        when Array === a_value
          if a_value.length == 1 and b_value.length == 1
            a_value.first <=> b_value.first
          else
            a_value.length <=> b_value.length
          end
        else
          a_value <=> b_value
        end
      end
      if just_keys
        keys = sorted.collect{|key, value| key}
        keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
        keys
      else
        sorted.collect{|key, value| [key, self[key]]}
      end
    end
  else
    if just_keys
      keys = elems.sort_by(&block).collect{|key, value| key}
      keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
      keys
    else
      elems.sort_by(&block).collect{|key, value| [key, self[key]]}
    end
  end
end

#summaryObject



617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
# File 'lib/rbbt/tsv/accessor.rb', line 617

def summary

  key = nil
  values = nil
  self.each do |k, v|
    key = k
    values = v
    break
  end

  with_unnamed do
    <<-EOF
Filename = #{Path === filename ? filename.find : (filename || "No filename")}
Key field = #{key_field || "*No key field*"}
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
Type = #{type}
Serializer = #{serializer.inspect}
Size = #{size}
namespace = #{namespace}
identifiers = #{Misc.fingerprint identifiers}
Example:
- #{key} -- #{Misc.fingerprint values }
    EOF
  end
end

#swap_id(*args) ⇒ Object



85
86
87
# File 'lib/rbbt/tsv/change_id.rb', line 85

def swap_id(*args)
  TSV.swap_id(self, *args)
end

#through(new_key_field = nil, new_fields = nil, uniq = false, zipped = false) ⇒ Object

{{{ Methods



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/rbbt/tsv/manipulate.rb', line 154

def through(new_key_field = nil, new_fields = nil, uniq = false, zipped = false)

  traverser = Traverser.new key_field, fields, new_key_field, new_fields, type, uniq

  if @monitor
    desc = "Iterating TSV"
    step = 100
    if Hash === @monitor
      desc = @monitor[:desc] if @monitor.include? :desc 
      step = @monitor[:step] if @monitor.include? :step 
    end
    progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc)
  else
    progress_monitor = nil
  end

  each do |key, value|
    progress_monitor.tick if progress_monitor
    next if value.nil?

    keys, value = traverser.process(key, value)

    next if keys.nil?
    
    keys = [keys].compact unless Array === keys

    # Annotated with Entity and NamedArray
    if not @unnamed and not traverser.new_field_names.nil? 

      case type
      when :double, :list
        Log.warn "Value frozen: #{ value }" if value.frozen?

        value.nil? ?
          nil :
          NamedArray.setup(value, traverser.new_field_names, key, entity_options, entity_templates)

      when :flat, :single
        prepare_entity(value, traverser.new_field_names.first, entity_options)
      end
    end



    if zipped

      keys.each_with_index do |k,i|
        v = value.collect{|v|
          r = v[i]
          r = v[0] if r.nil?
          r
        }

        if not @unnamed 
          k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
        end
        v.key = k if NamedArray === v
        yield k, v
 
      end

    else

      keys.each do |key|
        if not @unnamed
          k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
        end
        value.key = key if NamedArray === value
        yield key, value
      end

    end

  end

  Log::ProgressBar.remove_bar progress_monitor if progress_monitor

  [traverser.new_key_field_name, traverser.new_field_names]
end

#to_doubleObject



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/rbbt/tsv/util.rb', line 218

def to_double
  new = {}
  case type
  when :double
    self
  when :flat
    through do |k,v|
      new[k] = [v]
    end
  when :single
    through do |k,v|
      new[k] = [[v]]
    end
  when :list
    if block_given?
      through do |k,v|
        new[k] = v.collect{|e| yield e}
      end
    else
      through do |k,v|
        new[k] = v.collect{|e| [e]}
      end
    end
  end
  self.annotate(new)
  new.type = :double
  new
end

#to_flat(field = nil) ⇒ Object



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/rbbt/tsv/util.rb', line 247

def to_flat(field = nil)
  new = {}
  case type
  when :double
    if field.nil?
      through do |k,v| new[k] = v.first end
    else
      pos = identify_field field
      through do |k,v| new[k] = v[pos] end
    end
  when :flat
    self
  when :single
    through do |k,v|
      new[k] = [v]
    end
  when :list
    through do |k,v|
      new[k] = [v.first]
    end
  end
  self.annotate(new)
  new.fields = new.fields[0..0] if new.fields
  new.type = :flat
  new
end

#to_hashObject



643
644
645
646
647
# File 'lib/rbbt/tsv/accessor.rb', line 643

def to_hash
  new = self.dup
  ENTRY_KEYS.each{|entry| new.delete entry}
  new
end

#to_listObject



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/rbbt/tsv/util.rb', line 189

def to_list
  new = {}
  case type
  when :double
    if block_given?
      through do |k,v|
        new[k] = v.collect{|e| yield e}
      end
    else
      through do |k,v|
        new[k] = v.collect{|e| e.first}
      end
    end
  when :flat
    through do |k,v|
      new[k] = [v.first]
    end
  when :single
    through do |k,v|
      new[k] = [v]
    end
  when :list
    self
  end
  self.annotate(new)
  new.type = :list
  new
end

#to_s(keys = nil, no_options = false) ⇒ Object



571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
# File 'lib/rbbt/tsv/accessor.rb', line 571

def to_s(keys = nil, no_options = false)
  if FalseClass === keys or TrueClass === keys
    no_options = keys
    keys = nil
  end

  if keys == :sort
    with_unnamed do
      keys = self.keys.sort
    end
  end

  io = dumper_stream(keys, no_options)

  str = ''
  while block = io.read(2048)
    str << block
  end

  str
end

#to_singleObject



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/rbbt/tsv/util.rb', line 274

def to_single
  new = {}
  case type
  when :double
    through do |k,v|
      new[k] = v.first.first
    end
  when :flat
    through do |k,v|
      new[k] = v.first
    end
  when :single
    self
  when :list
    through do |k,v|
      new[k] = v.first
    end
  end
  self.annotate(new)
  new.type = :single
  new
end

#transpose(key_field = "Unkown ID") ⇒ Object



634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
# File 'lib/rbbt/tsv/manipulate.rb', line 634

def transpose(key_field="Unkown ID")
  raise "Transposing only works for TSVs of type :list" unless type == :list
  new_fields = keys
  new = self.annotate({})
  TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers)

  through do |key, values|
    fields.zip(values) do |new_key, value|
      new[new_key] ||= []
      new[new_key][new_fields.index key] = value
    end
  end

  new
end

#tsv_sort(&block) ⇒ Object



447
448
449
# File 'lib/rbbt/tsv/accessor.rb', line 447

def tsv_sort(&block)
  collect.sort &block
end

#unzip(field = 0, merge = false) ⇒ Object



649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
# File 'lib/rbbt/tsv/accessor.rb', line 649

def unzip(field = 0, merge = false)
  new = {}
  field_pos = self.identify_field field

  self.through do |key,values|
    field_values = values.delete_at field_pos
    zipped = values.zip_fields
    field_values.zip(zipped).each do |value, *rest|
      k = [key,value]*":"
      if merge and new.include? k
        new[k] = Misc.zip_fields(rest)
      else
        new[k] = Misc.zip_fields(rest)
      end
    end
  end

  self.annotate new
  new.type = :list

  new.key_field = [self.key_field, self.fields[field_pos]] * ":"
  new_fields = self.fields.dup
  new_fields.delete_at field_pos
  new.fields = new_fields

  new
end

#value_peekObject



593
594
595
596
597
598
599
600
601
602
603
604
605
# File 'lib/rbbt/tsv/accessor.rb', line 593

def value_peek
  peek = {}
  i = 0
  begin
    through do |k,v|
      peek[k] = v 
      i += 1
      raise "STOP" if i > 10
    end
  rescue
  end
  peek
end

#valuesObject



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/rbbt/tsv/accessor.rb', line 279

def values
  values = chunked_values_at(keys)
  return values if @unnamed or fields.nil?

  case type
  when :double, :list
    values.each{|value| setup_array value, fields, nil, entity_options}
  when :single
    values = prepare_entity(values, fields.first, entity_options)
  when :flat
    values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
  end
    
  values
end

#values_at(*keys) ⇒ Object



360
361
362
363
364
# File 'lib/rbbt/tsv/accessor.rb', line 360

def values_at(*keys)
  keys.collect do |key|
    self[key]
  end
end

#values_to_s(values) ⇒ Object



533
534
535
536
537
538
539
540
541
542
543
544
545
546
# File 'lib/rbbt/tsv/accessor.rb', line 533

def values_to_s(values)
  case values
  when nil
    if fields.nil? or fields.empty?
      "\n"
    else
      "\t" << ([""] * fields.length) * "\t" << "\n"
    end
  when Array
    "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
  else
    "\t" << values.to_s << "\n"
  end
end

#with_monitor(value = true) ⇒ Object



89
90
91
92
93
94
95
# File 'lib/rbbt/tsv/accessor.rb', line 89

def with_monitor(value = true)
  saved_monitor = @monitor
  @monitor = value.nil? ? false : value
  res = yield
  @monitor = saved_monitor
  res
end

#with_unnamedObject



81
82
83
84
85
86
87
# File 'lib/rbbt/tsv/accessor.rb', line 81

def with_unnamed
  saved_unnamed = @unnamed 
  @unnamed = true
  res = yield
  @unnamed = saved_unnamed
  res
end

#write(force = false) ⇒ Object



115
116
117
118
119
120
121
122
# File 'lib/rbbt/tsv/accessor.rb', line 115

def write(force = false)
  begin
    super
  rescue Exception
    @writable = true
    self
  end
end

#write?Boolean

Returns:

  • (Boolean)


124
125
126
# File 'lib/rbbt/tsv/accessor.rb', line 124

def write?
  @writable
end

#zip_new(key, values) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/rbbt/tsv/accessor.rb', line 248

def zip_new(key, values)
  values = [values] unless Array === values
  case type
  when :double
    if self.include? key
      new = []
      self[key, true].each_with_index do |v,i|
        new << (v << values[i])
      end
      self[key] == new
    else
      self[key] = Array === values.first ? values.dup : values.collect{|v| [v] }
    end
  when :flat
    if self.include? key
      self[key] = (self[key] + values).uniq
    else
      self[key] = values
    end
  else
    raise "Cannot zip_new for type: #{type}"
  end
end