Module: TSV

Defined in:
lib/rbbt/tsv.rb,
lib/rbbt/util/R.rb,
lib/rbbt/tsv/util.rb,
lib/rbbt/tsv/excel.rb,
lib/rbbt/tsv/index.rb,
lib/rbbt/tsv/attach.rb,
lib/rbbt/tsv/filter.rb,
lib/rbbt/tsv/parser.rb,
lib/rbbt/tsv/accessor.rb,
lib/rbbt/tsv/change_id.rb,
lib/rbbt/tsv/manipulate.rb,
lib/rbbt/util/excel2tsv.rb,
lib/rbbt/tsv/attach/util.rb,
lib/rbbt/tsv/serializers.rb

Defined Under Namespace

Classes: FloatArraySerializer, FloatSerializer, IntegerArraySerializer, IntegerSerializer, Parser, StringArraySerializer, StringDoubleArraySerializer, StringSerializer, TSVMarshalSerializer, TSVSerializer, Traverser

Constant Summary collapse

TSV_SERIALIZER =

extend ChainMethods self.chain_prefix = :tsv

YAML
SERIALIZED_NIL =
TSV_SERIALIZER.dump nil
KEY_PREFIX =
"__tsv_hash_"
ENTRIES =
[]
ENTRY_KEYS =
[]
SERIALIZER_ALIAS =
{
  :integer => IntegerSerializer, 
  :float => FloatSerializer, 
  :integer_array => IntegerArraySerializer,
  :float_array => FloatArraySerializer,
  :marshal => Marshal,
  :single => StringSerializer,
  :string => StringSerializer,
  :list => StringArraySerializer,
  :flat => StringArraySerializer,
  :double => StringDoubleArraySerializer,
  :tsv => TSVSerializer,
  :marshal_tsv => TSVMarshalSerializer
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.lock_dirObject

Returns the value of attribute lock_dir.



19
20
21
# File 'lib/rbbt/tsv.rb', line 19

def lock_dir
  @lock_dir
end

Instance Attribute Details

#entity_optionsObject

Returns the value of attribute entity_options.



9
10
11
# File 'lib/rbbt/tsv/accessor.rb', line 9

def entity_options
  @entity_options
end

#entity_templatesObject

Returns the value of attribute entity_templates.



9
10
11
# File 'lib/rbbt/tsv/accessor.rb', line 9

def entity_templates
  @entity_templates
end

#monitorObject

Returns the value of attribute monitor.



5
6
7
# File 'lib/rbbt/tsv/manipulate.rb', line 5

def monitor
  @monitor
end

#serializer_moduleObject

Returns the value of attribute serializer_module.



9
10
11
# File 'lib/rbbt/tsv/accessor.rb', line 9

def serializer_module
  @serializer_module
end

#unnamedObject

Returns the value of attribute unnamed.



9
10
11
# File 'lib/rbbt/tsv/accessor.rb', line 9

def unnamed
  @unnamed
end

Class Method Details

.build_traverse_index(files, options = {}) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/rbbt/tsv/attach/util.rb', line 288

def self.build_traverse_index(files, options = {})
  options       = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  in_namespace  = options[:in_namespace]
  persist_input = options[:persist_input]

  path = find_path(files, options)

  return nil if path.nil?

  traversal_ids = path.collect{|p| p.first}

  Log.debug "Found Traversal: #{traversal_ids * " => "}"

  index_for_traversal path, persist_input
end

.change_key(tsv, format, options = {}) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/rbbt/tsv/change_id.rb', line 4

def self.change_key(tsv, format, options = {})
  options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers

  identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input

  if not tsv.fields.include? format
    tsv = tsv.annotate(Hash[*tsv.keys.zip(tsv.values.collect{|l| l.dup}).flatten(1)]) 

    orig_type = tsv.type 
    tsv = tsv.to_double if orig_type != :double

    tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
    tsv = tsv.reorder(format, tsv.fields - [format])

    tsv = tsv.to_flat  if orig_type == :flat

    tsv
  else
    tsv.reorder(format)
  end
end

.entry(*entries) ⇒ Object



340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
# File 'lib/rbbt/tsv/accessor.rb', line 340

def self.entry(*entries)
  entries = entries.collect{|entry| entry.to_s}
  ENTRIES.concat entries
  entries.each do |entry|
    key = KEY_PREFIX + entry
    ENTRY_KEYS << key
    line = __LINE__; self.module_eval "
attr_accessor :#{entry}

def #{ entry }
if not defined? @#{entry}
  # @#{entry} = (value = self.clean_get_brackets('#{key}')).nil? ? nil : TSV_SERIALIZER.load(value)
  @#{entry} = (value = self.send(:[], '#{key}', :entry_key)).nil? ? nil : TSV_SERIALIZER.load(value)
end
@#{entry}
end


if '#{entry}' == 'serializer'

def #{ entry }=(value)
  @#{entry} = value
  #self.tsv_clean_set_brackets '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml
  self.send(:[]=, '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml, true)

  return if value.nil?

  self.serializer_module = SERIALIZER_ALIAS[value.to_sym]

  if serializer_module.nil?
    #class << self
    #  alias serialized_get tsv_clean_get_brackets
    #  alias serialized_set tsv_clean_set_brackets
    #end

  else
    class << self

      define_method :serialized_get do |key|
        return nil unless self.include? key
        res = self.send(:[], key, true)
        return res if res.nil?
        self.serializer_module.load(res)
      end

      define_method :serialized_set do |key, value|
        if value.nil?
          self.send(:[]=, key, value, true)
          #tsv_clean_set_brackets key, value
        else
          self.send(:[]=, key, self.serializer_module.dump(value), true)
          #tsv_clean_set_brackets key, self.serializer_module.dump(value)
        end
      end
    end
  end

end
else
def #{ entry }=(value)
  @#{entry} = value
  self.send(:[]=, '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
  #self.tsv_clean_set_brackets '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml
end
end
", __FILE__, line
  end
end

.excel(tsv, filename, options = {}) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/rbbt/tsv/excel.rb', line 3

def self.excel(tsv, filename, options ={})
  name = Misc.process_options options, :name
  sort_by = Misc.process_options options, :sort_by
  sort_by_cast = Misc.process_options options, :sort_by_cast
  fields = Misc.process_options(options, :fields) || tsv.all_fields

  book = Spreadsheet::Workbook.new
  sheet1 = book.create_worksheet 
  sheet1.row(0).concat fields
  i = 1
  if sort_by
    if sort_by_cast
      data = tsv.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = tsv.sort_by sort_by
    end
  else
    data = tsv
  end

  data.each do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        cells.push v * ", "
      else
        cells.push v
      end
    end

    sheet1.row(i).concat cells
    i += 1
  end
  book.write filename
end

.excel2tsv(file, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rbbt/util/excel2tsv.rb', line 6

def self.excel2tsv(file, options = {})
  sheet = options.delete :sheet
  header = options.delete :header
  header = true unless header == false
  sheet ||= 0
  TmpFile.with_file do |filename|
    workbook = Spreadsheet.open Open.open(file)
    sheet    = workbook.worksheet sheet

    rows = []

    sheet.each do |row|
      rows << row.values_at(0..(row.size - 1))
    end

    File.open(filename, 'w') do |f|
      if header
        header = rows.shift
        f.puts "#" + header * "\t"
      end

      rows.each do |row| f.puts row * "\t" end
    end

    TSV.open(filename, options)
  end
end

.extended(data) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/rbbt/tsv/accessor.rb', line 81

def self.extended(data)
  #setup_chains(data)

  if not data.respond_to? :write
    class << data
      attr_accessor :writable

      def close
      end

      def read(force = false)
        @writable = false
        self
      end

      def write(force = false)
        @writable = true
        self
      end

      def write?
        @writable
      end
    end
  end

  if not data.respond_to? :serialized_get
    #class << data
    #  alias serialized_get []
    #  alias serialized_set []=
    #end
  end
end

.field_match_counts(file, values, options = {}) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/rbbt/tsv/util.rb', line 4

def self.field_match_counts(file, values, options = {})
  options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
  persist_options = Misc.pull_keys options, :persist

  filename = TSV === file ? file.filename : file
  text = Persist.persist filename, :string, persist_options do
    tsv = TSV === file ? file : TSV.open(file)

    text = ""
    fields = nil
    tsv.tap{|e| e.unnamed =  true; fields = e.fields}.through do |gene, names|
      names.zip(fields).each do |list, format|
        list.delete_if do |name| name.empty? end
        next if list.empty?
        text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
      end
    end
    text
  end

  path = Persist.persistence_path(filename, persist_options)
  TmpFile.with_file(values.uniq * "\n") do |value_file|
    cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' |cut -f 2 | sed 's/HEADERNOMATCH//' | sort|uniq -c|sed 's/^ *//;s/ /\t/'"
    begin
      TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
    rescue
      TSV.setup({}, :type => :single, :cast => :to_i)
    end
  end
end

.find_path(files, options = {}) ⇒ Object

May make an extra index!



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/rbbt/tsv/attach/util.rb', line 208

def self.find_path(files, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  if in_namespace
    if files.first.all_fields.include? in_namespace
      ids = [[in_namespace]]
    else
      ids = [files.first.all_namespace_fields(in_namespace)]
    end
    ids += files[1..-1].collect{|f| f.all_fields}
  else
    ids = files.collect{|f| f.all_fields }
  end

  id_list = []

  ids.each_with_index do |list, i|
    break if i == ids.length - 1
    match = list.select{|field| 
      ids[i + 1].select{|f| field == f}.any?
    }
    return nil if match.empty?
    id_list << match.first
  end

  if id_list.last != files.last.all_fields.first
    id_list << files.last.all_fields.first
    id_list.zip(files)
  else
    id_list.zip(files[0..-1])
  end
end

.find_traversal(tsv1, tsv2, options = {}) ⇒ Object



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/rbbt/tsv/attach/util.rb', line 305

def self.find_traversal(tsv1, tsv2, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  identifiers1 = tsv1.identifier_files || []
  identifiers2 = tsv2.identifier_files || []

  identifiers1.unshift tsv1
  identifiers2.unshift tsv2

  files1 = []
  files2 = []
  while identifiers1.any?
    files1.push identifiers1.shift
    identifiers2.each_with_index do |e,i|
      files2 = identifiers2[(0..i)]
      index  = build_traverse_index(files1 + files2.reverse, options)
      return index if not index.nil?
    end
  end

  return nil
end

.get_filename(file) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/tsv/util.rb', line 35

def self.get_filename(file)
  case
  when String === file
    file
  when file.respond_to?(:filename)
    file.filename
  when file.respond_to?(:gets)
    nil
  else
    raise "Cannot get filename from: #{file.inspect}"
  end
end

.get_stream(file, open_options = {}) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/rbbt/tsv/util.rb', line 48

def self.get_stream(file, open_options = {})
  case
  when Path === file
    file.open(open_options)
  when String === file
    Open.open(file, open_options)
  when file.respond_to?(:gets)
    file
  else
    raise "Cannot get stream from: #{file.inspect}"
  end
end

.identify_field(key_field, fields, field) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/rbbt/tsv/util.rb', line 61

def self.identify_field(key_field, fields, field)
  case
  when Integer === field
    field
  when (field.nil? or field == :key)
    :key
  when (String === field and not fields.nil?)
    pos = fields.index field
    pos ||= :key if key_field == field
    Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
    pos
  when key_field == field
    :key
  else
    raise "No fields specified in TSV.identify_field" if fields.nil?
    Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
  end
end

.index(file, options = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/rbbt/tsv/index.rb', line 120

def self.index(file, options = {})
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
   
  Log.debug "Static Index: #{ file } - #{options.inspect}"
  Persist.persist_tsv nil, file, options, persist_options do |data|
    data_options = Misc.pull_keys options, :data
    identifiers = TSV.open(file, data_options)
    identifiers.with_monitor :desc => "Creating Index for #{ file }" do
      identifiers.index(options.merge :persist_data => data, :persist => persist_options[:persist])
    end
  end
end

.index_for_traversal(path, persist_input = false) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/rbbt/tsv/attach/util.rb', line 242

def self.index_for_traversal(path, persist_input = false)
  data_key, data_file = path.shift
  data_index = if data_key == data_file.key_field
                 Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
                 nil
               else
                 Log.debug "Data index required"
                 data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false, :type => (data_file.type == :single ? :single : :flat)
               end

  current_index = data_index
  current_key   = data_key
  while not path.empty?
    next_key, next_file = path.shift

    if current_index.nil?
      current_index = next_file.index(:target => next_key, :fields => [current_key], :persist => persist_input)
      current_index = current_index.select :key => data_file.keys
    else
      next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input

      next_index.with_unnamed do
        current_index.with_unnamed do
          current_index.process current_index.fields.first do |values|
            if values.nil?
              nil
            else
              new_values = next_index.values_at(*values).flatten
              if current_index.type == :single
                new_values.first
              else
                new_values
              end
            end
          end
          current_index.fields = [next_key]
        end
      end
    end
    current_key = next_key
  end

  current_index

end

.merge_different_fields(file1, file2, output, sep = "\t", monitor = false) ⇒ Object

Merge two files with the same keys and different fields



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rbbt/tsv/attach.rb', line 51

def self.merge_different_fields(file1, file2, output, sep = "\t", monitor = false)
  case
  when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
    size = CMD.cmd("wc -l '#{file1}'").read.to_f if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
  when (String === file1 or StringIO === file1)
    size = file1.length if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
  when TSV === file1
    size = file1.size if monitor
    file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
  end

  case
  when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exists?(file2))
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
  when (String === file2 or StringIO === file2)
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
  when TSV === file2
    file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
  end

  output = File.open(output, 'w') if String === output

  cols1 = nil
  cols2 = nil

  done1 = false
  done2 = false

  key1 = key2 = nil
  while key1.nil?
    while (line1 = file1.gets) =~ /#/; end
    key1, *parts1 = line1.sub("\n",'').split(sep, -1)
    cols1 = parts1.length
  end

  while key2.nil?
    while (line2 = file2.gets) =~ /#/; end
    key2, *parts2 = line2.sub("\n",'').split(sep, -1)
    cols2 = parts2.length
  end

  progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor

  key = key1 < key2 ? key1 : key2
  parts = [""] * (cols1 + cols2)
  while not (done1 and done2)
    while (not done1 and key1 == key)
      parts1.each_with_index do |part, i|
        parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
      end
      key1 = nil
      while key1.nil? and not done1
        if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
      end
      progress_monitor.tick if monitor
    end
    while (not done2 and key2 == key)
      parts2.each_with_index do |part, i|
        i += cols1
        parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
      end
      key2 = nil
      while key2.nil? and not done2
        if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
      end
    end

    output.puts [key, parts].flatten * sep
    parts = [""] * (cols1 + cols2)

    case
    when done1
      key = key2
    when done2
      key = key1
    else
      key = key1 < key2 ? key1 : key2
    end
  end

  output.close
end

.merge_paste(files, delim = "$") ⇒ Object

Merge columns from different files



137
138
139
# File 'lib/rbbt/tsv/attach.rb', line 137

def self.merge_paste(files, delim = "$")
  CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
end

.merge_row_fields(input, output, sep = "\t") ⇒ Object

Merge columns from different rows of a file



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/rbbt/tsv/attach.rb', line 6

def self.merge_row_fields(input, output, sep = "\t")
  is = case
       when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
         CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
       when (String === input or StringIO === input)
         CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
       else
         input
       end
 
  current_key  = nil
  current_parts = []

  done = false
  Open.write(output) do |os|

    done = is.eof?
    while not done
      key, *parts = is.gets.sub("\n",'').split(sep, -1)
      current_key ||= key
      case
      when key.nil?
      when current_key == key
        parts.each_with_index do |part,i|
          if current_parts[i].nil?
            current_parts[i] = part
          else
            current_parts[i] = current_parts[i] << "|" << part
          end
        end
      when current_key != key
        os.puts [current_key, current_parts].flatten * sep
        current_key = key
        current_parts = parts
      end

      done = is.eof?
    end

    os.puts [current_key, current_parts].flatten * sep unless current_key.nil?

  end
end

.open(source, type = nil, options = nil) ⇒ Object

options shift if type.nil?



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/tsv.rb', line 44

def self.open(source, type = nil, options = nil)
  type, options = nil, type if options.nil? and Hash === type
  options ||= {}
  options[:type] ||= type unless type.nil?

  persist_options = Misc.pull_keys options, :persist

  filename = get_filename source
  serializer = Misc.process_options options, :serializer
  unnamed = Misc.process_options options, :unnamed
  entity_options = Misc.process_options options, :entity_options

  Log.debug "TSV open: #{ filename } - #{options.inspect}.#{unnamed ? " [unnamed]" : "[not unnamed]"}"

  data = nil

  lock_filename = filename.nil? ? nil : Persist.persistence_path(filename, {:dir => TSV.lock_dir})
  Misc.lock lock_filename  do
    data = Persist.persist_tsv source, filename, options, persist_options do |data|
      if serializer
        data.extend TSV unless TSV === data
        data.serializer = serializer
      end

      open_options = Misc.pull_keys options, :open

      stream = get_stream source, open_options
      parse stream, data, options

      data.filename = filename.to_s unless filename.nil?
      if data.identifiers.nil? and Path === filename and filename.identifier_file_path
        data.identifiers = filename.identifier_file_path.to_s
      end

      data
    end
  end

  data.unnamed = unnamed unless unnamed.nil?

  data.entity_options = entity_options

  if Path === source and data.identifiers
    data.identifiers = Path.setup(data.identifiers, source.pkgdir, source.resource)
  end

  data
end

.parse(stream, data, options = {}) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/rbbt/tsv.rb', line 110

def self.parse(stream, data, options = {})
  monitor, grep, invert_grep, head = Misc.process_options options, :monitor, :grep, :invert_grep, :head

  parser = Parser.new stream, options

  if grep
    stream.rewind
    stream = Open.grep(stream, grep, invert_grep)
    parser.first_line = stream.gets
  end

  line = parser.rescue_first_line

  if TokyoCabinet::HDB === data and parser.straight and
    data.close
    begin
      CMD.cmd('tchmgr', :log => false)
      FileUtils.mkdir_p File.dirname(data.persistence_path)
      CMD.cmd("tchmgr importtsv '#{data.persistence_path}'", :in => stream, :log => false, :dont_close_in => true)
    rescue
      Log.debug("tchmgr importtsv failed for: #{data.persistence_path}")
    end
    data.write
  end

  data.extend TSV unless TSV === data
  data.unnamed = true

  if data.serializer == :type
    data.serializer = case
                      when parser.cast.nil?
                        data.serializer = parser.type
                      when (parser.cast == :to_i and (parser.type == :list or parser.type == :flat))
                        data.serializer = :integer_array
                      when (parser.cast == :to_i and parser.type == :single)
                        data.serializer = :integer
                      when (parser.cast == :to_f and parser.type == :single)
                        data.serializer = :float
                      when (parser.cast == :to_f and (parser.type == :list or parser.type == :flat))
                        data.serializer = :float_array
                      end
  end

  if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
    size = case
           when stream.respond_to?(:size)
             stream.size
           else
             stream.stat.size
           end
    desc = "Parsing Stream"
    step = 100
    if Hash === monitor
      desc = monitor[:desc] if monitor.include? :desc 
      step = monitor[:step] if monitor.include? :step 
    end
    progress_monitor = Progress::Bar.new(size, 0, step, desc)
  else
    progress_monitor = nil
  end

  line_num = 1
  while not line.nil? 
    begin
      progress_monitor.tick(stream.pos) if progress_monitor 

      raise Parser::SKIP_LINE if line.empty?

      line = Misc.fixutf8(line)
      line = parser.process line
      parts = parser.chop_line line
      key, values = parser.get_values parts
      values = parser.cast_values values if parser.cast?
      parser.add_to_data data, key, values
      line = stream.gets
      line_num += 1
      raise Parser::END_PARSING if head and line_num > head.to_i
    rescue Parser::SKIP_LINE
      begin
        line = stream.gets
        next
      rescue IOError
        break
      end
    rescue Parser::END_PARSING
      break
    rescue IOError
      break
    end
  end

  parser.setup data

  data.unnamed = false

  data
end

.parse_header(stream, options = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rbbt/tsv.rb', line 93

def self.parse_header(stream, options = {})
  case
  when Path === stream 
    stream.open do |f|
      Parser.new f, options
    end
  when (String === stream and stream.length < 300 and Open.exists? stream or Open.remote? stream)
    Open.open(stream) do |f|
      Parser.new f, options
    end
  else
    filename = stream.respond_to?(:filename) ? stream.filename : Misc.fingerprint(stream)
    Log.debug("Parsing header of open stream: #{filename}")
    Parser.new stream, options
  end
end

.pos_index(file, pos_field = nil, options = {}) ⇒ Object



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/tsv/index.rb', line 171

def self.pos_index(file, pos_field = nil, options = {})
  pos_field ||= "Position"

  data_options = Misc.pull_keys options, :data
  filename = case
             when (String === file or Path === file)
               file
             when file.respond_to?(:filename)
               file.filename
             else
               file.object_id.to_s
             end
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticPosIndex[#{pos_field}]"

  filters = Misc.process_options options, :filters

  if filters
    filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
  end

  Persist.persist(filename, :fwt, persist_options) do
    tsv = TSV.open(file, data_options)
    if filters
      tsv.filter
      filters.each do |match, value|
        tsv.add_filter match, value
      end
    end
    tsv.pos_index(pos_field, options)
  end
end

.range_index(file, start_field = nil, end_field = nil, options = {}) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/rbbt/tsv/index.rb', line 242

def self.range_index(file, start_field = nil, end_field = nil, options = {})
  start_field ||= "Start"
  end_field ||= "End"

  data_options = Misc.pull_keys options, :data
  filename = case
             when (String === file or Path === file)
               file
             when file.respond_to?(:filename)
               file.filename
             else
               file.object_id.to_s
             end
  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"

  filters = Misc.process_options options, :filters

  if filters
    filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
  end

  Persist.persist(filename, :fwt, persist_options) do
    tsv = TSV.open(file, data_options)
    if filters
      tsv.filter
      filters.each do |match, value|
        tsv.add_filter match, value
      end
    end
 
    tsv.range_index(start_field, end_field, options)
  end
end

.setup(hash, options = {}) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/rbbt/tsv.rb', line 26

def self.setup(hash, options = {})
  options = Misc.add_defaults options, :default_value => []
  default_value = Misc.process_options options, :default_value
  hash = Misc.array2hash(hash, default_value) if Array === hash
  hash.extend TSV

  IndiferentHash.setup(options)
  ENTRIES.each do |entry|
    hash.send("#{ entry }=", options[entry]) if options.include? entry
    hash.send("#{ entry }=", options[entry.to_sym]) if options.include? entry.to_sym
  end

  hash.unnamed = options[:unnamed]

  hash
end

.swap_id(tsv, field, format, options = {}) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/rbbt/tsv/change_id.rb', line 30

def self.swap_id(tsv, field, format, options = {})
  options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers

  identifiers, persist_input = Misc.process_options options, :identifiers, :persist

  index = identifiers.index :target => format, :fields => [field], :persist => persist_input

  orig_type = tsv.type 
  tsv = tsv.to_double if orig_type != :double

  pos = tsv.fields.index field
  tsv.with_unnamed do
    if tsv.type == :list or tsv.type == :single
      tsv.through do |k,v|
        v[pos] = index[v[pos]]
        tsv[k] = v
      end
    else
      tsv.through do |k,v|
        v[pos] = index.values_at(*v[pos])
        tsv[k] = v
      end
    end
    
    tsv.fields = tsv.fields.collect{|f| f == field ? format : f}
  end

  tsv = tsv.to_flat  if orig_type == :flat

  tsv
end

.zip_fields(list, fields = nil) ⇒ Object



442
443
444
445
446
447
448
# File 'lib/rbbt/tsv/accessor.rb', line 442

def self.zip_fields(list, fields = nil)
  return [] if list.nil? || list.empty?
  fields ||= list.fields if list.respond_to? :fields
  zipped = list[0].zip(*list[1..-1])
  zipped = zipped.collect{|v| setup_array(v, fields)} if fields 
  zipped 
end

Instance Method Details

#[](key, clean = false) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/rbbt/tsv/accessor.rb', line 125

def [](key, clean = false)
  value = (self.respond_to?(:serialized_get) and not clean) ? serialized_get(key) : super(key)
  return value if value.nil? or @unnamed or clean == :entry_key or fields.nil?

  case type
  when :double, :list
    setup_array value, fields, key, entity_options, entity_templates
  when :flat, :single
    value = value.dup if value.frozen?

    value = prepare_entity(value, fields.first, entity_options)
  end
  value
end

#[]=(key, value, clean = false) ⇒ Object



140
141
142
143
# File 'lib/rbbt/tsv/accessor.rb', line 140

def []=(key, value, clean = false)
  return super(key, value) if clean or not self.respond_to?(:serialized_set)
  serialized_set(key, value)
end

#add_field(name = nil) ⇒ Object



574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
# File 'lib/rbbt/tsv/manipulate.rb', line 574

def add_field(name = nil)
  old_monitor = @monitor
  @monitor = {:desc => "Adding field #{ name }"} if TrueClass === monitor

  through do |key, values|
    new_values = yield(key, values)
    new_values = [new_values] if type == :double and not Array === new_values

    case
    when (values.nil? and (fields.nil? or fields.empty?))
      values = [new_values]
    when values.nil?  
      values = [nil] * fields.length + [new_values]
    when Array === values
      values += [new_values]
    else
      values << new_values
    end

    self[key] = values
  end
  @monitor = old_monitor

  if not fields.nil? and not name.nil?
    new_fields = self.fields + [name]
    self.fields = new_fields
  end

  self
end

#all_fieldsObject



481
482
483
484
# File 'lib/rbbt/tsv/accessor.rb', line 481

def all_fields
  return nil if key_field.nil? or fields.nil?
  [key_field] + fields
end

#annotate(tsv) ⇒ Object



11
12
13
# File 'lib/rbbt/tsv/accessor.rb', line 11

def annotate(tsv)
  TSV.setup(tsv, :key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers)
end

#attach(other, options = {}) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/rbbt/tsv/attach.rb', line 141

def attach(other, options = {})
  options      = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  fields, one2one = Misc.process_options options, :fields, :one2one
  in_namespace = options[:in_namespace]

  unless TSV === other
    other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
    other = TSV.open(other, :persist => options[:persist_input] == true)
    other.identifiers = other_identifier_file
  end

  fields = other.fields - [key_field].concat(self.fields) if fields.nil?  or fields == :all 
  if in_namespace
    fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
  else
    fields = other.fields - [key_field].concat(self.fields) if fields.nil?
  end

  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")

  case
  when key_field == other.key_field 
    Log.debug "Attachment with same key: #{other.key_field}"
    attach_same_key other, fields
  when (not in_namespace and self.fields.include?(other.key_field))
    Log.debug "Found other's key field: #{other.key_field}"
    attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
    Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
    attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
  else
    index = TSV.find_traversal(self, other, options)
    raise "Cannot traverse identifiers" if index.nil?
    Log.debug "Attachment with index: #{other.key_field}"
    attach_index other, index, fields
  end
  Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")

  self
end

#attach_index(other, index, fields = nil) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/rbbt/tsv/attach/util.rb', line 127

def attach_index(other, index, fields = nil)
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
  fields = [fields] unless Array === fields

  other = other.tsv unless TSV === other
  field_positions = fields.collect{|field| other.identify_field field}
  field_names     = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }

  length = self.fields.length
  other.with_unnamed do
    index.with_unnamed do
      with_unnamed do
        through do |key, values|
          source_keys = index[key]
          source_keys = [source_keys] unless Array === source_keys
          if source_keys.nil? or source_keys.empty?
            all_new_values = []
          else
            all_new_values = []
            source_keys.each do |source_key|
              next unless other.include? source_key
              new_values = field_positions.collect do |pos|
                if pos == :key
                  if other.type == :double
                    [source_key]
                  else
                    source_key
                  end
                else
                  if other.type == :flat
                    other[source_key]
                  else
                    other[source_key][pos]
                  end
                end
              end
              new_values.collect!{|v| v.nil? ? [[]] : [v]}    if     type == :double and not other.type == :double
              new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and     other.type == :double
              new_values.flatten! if type == :flat
              all_new_values << new_values
            end
          end

          if all_new_values.empty?
            if type == :double
              all_new_values = [[[]] * field_positions.length]
            else
              all_new_values = [[nil] * field_positions.length]
            end
          end

          current = self[key] || [[]] * fields.length

          current = [current] unless Array === current

          if current.length > length
            all_new_values << current.slice!(length..current.length - 1)
          end

          if type == :double
            all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
          else
            all_new_values = all_new_values.first
          end

          current += all_new_values

          self[key].replace current
        end
      end
    end
  end

  self.type = :list if self.type == :single

  self.fields = self.fields.concat field_names
end

#attach_same_key(other, fields = nil) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/rbbt/tsv/attach/util.rb', line 3

def attach_same_key(other, fields = nil)
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?

  fields = [fields].compact unless Array === fields

  field_positions = fields.collect{|field| other.identify_field field}
  other.with_unnamed do
    with_unnamed do
      through do |key, values|
        self[key] = [] if self[key].nil?
        current = self[key]
        current = [current] unless Array === current
        if other.include? key
          case
          when other.type == :flat
            if type == :flat
              new_values = other[key]
            else
              new_values = [other[key]]
            end
          when other.type == :single
            new_values = [other[key]]
          else
            new_values = other[key].values_at *field_positions
          end

          new_values.collect!{|v| [v]}     if     type == :double and not other.type == :double
          new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and     other.type == :double

          new_values.flatten if type == :flat

          self[key] = current.concat new_values
        else
          if type == :double
            self[key] = current.concat [[]] * fields.length
          else
            self[key] = current.concat [nil] * fields.length
          end
        end
      end
    end
  end

  self.type = :list if self.type == :single

  self.fields = self.fields.concat fields

  self
end

#attach_source_key(other, source, options = {}) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/rbbt/tsv/attach/util.rb', line 53

def attach_source_key(other, source, options = {})
  fields = Misc.process_options options, :fields
  one2one = Misc.process_options options, :one2one

  fields = other.fields - [key_field].concat(self.fields) if fields.nil?

  other = other.tsv(:persistence => :no_create) unless TSV === other
  field_positions = fields.collect{|field| other.identify_field field}
  field_names     = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }

  source_pos = identify_field source

  other.with_unnamed do
    with_unnamed do
      through do |key, values|
        source_keys = values[source_pos]

        case
        when (source_keys.nil? or (Array === source_keys and source_keys.empty?))
          if type == :double
            self[key] = values.concat field_positions.collect{|v| []}
          else
            self[key] = values.concat [nil] * field_positions
          end
        when Array === source_keys
          all_new_values = source_keys.collect do |source_key|
            positions = field_positions.collect do |pos|
              if pos == :key
                [source_key]
              else
                if other.include? source_key
                  v = other[source_key][pos]
                  Array === v ? v : [v]
                else
                  [nil]
                end
              end
            end

            positions.collect!{|v| v[0..0]} if one2one
            positions
          end

          new = Misc.zip_fields(all_new_values).each{|field_entry|
            field_entry.flatten!
          }

          self[key] = values.concat new
        else
          source_key = source_keys
          all_new_values = field_positions.collect do |pos|
            if pos == :key
              source_key
            else
              if other.include? source_key
                v = other[source_key][pos]
                Array === v ? v.first : v
              else
                nil
              end
            end
          end

          self[key] = values.concat all_new_values
        end

      end
    end
  end

  self.fields = self.fields.concat field_names
  self
end

#change_key(*args) ⇒ Object



26
27
28
# File 'lib/rbbt/tsv/change_id.rb', line 26

def change_key(*args)
  TSV.change_key(self, *args)
end

#chunked_values_at(keys, max = 5000) ⇒ Object



241
242
243
244
245
246
247
# File 'lib/rbbt/tsv/accessor.rb', line 241

def chunked_values_at(keys, max = 5000)
  Misc.ordered_divide(keys, max).inject([]) do |acc,c|
    new = self.values_at(*c)
    new.annotate acc if new.respond_to? :annotate and acc.empty?
    acc.concat(new)
  end
end

#collectObject



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/rbbt/tsv/accessor.rb', line 197

def collect
  serializer = self.serializer
  serializer_module = SERIALIZER_ALIAS[serializer] unless serializer.nil?
  super do |key, value|
    next if ENTRY_KEYS.include? key

    # TODO Update this to be more efficient
    value = serializer_module.load(value) unless serializer.nil?

    # Annotated with Entity and NamedArray
    if not @unnamed
      if not fields.nil? 
        case type
        when :double, :list
          setup_array value, fields, key, entity_options if Array === value 
        when :flat, :single
          value = prepare_entity(value, fields.first, entity_options)
        end
      end
      key = prepare_entity(key, key_field, entity_options)
    end

    if block_given?
      yield key, value
    else
      [key, value]
    end
  end
end

#column(field, cast = nil) ⇒ Object



491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
# File 'lib/rbbt/tsv/manipulate.rb', line 491

def column(field, cast = nil)
  new = slice(field)

  new.with_unnamed do
    new.each do |k,v|
      nv = v.first 
      nv = nv.send(cast) unless cast.nil?
      new[k] = nv
    end
  end

  case type
  when :double, :flat
    new.type = :flat
  else
    new.type = :single
  end

  new
end

#detach(file) ⇒ Object



183
184
185
186
187
188
# File 'lib/rbbt/tsv/attach.rb', line 183

def detach(file)
  file_fields = file.fields.collect{|field| field.fullname}
  detached_fields = []
  self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
  reorder :key, detached_fields
end

#eachObject



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/rbbt/tsv/accessor.rb', line 168

def each
  fields = self.fields

  serializer = self.serializer
  serializer_module = SERIALIZER_ALIAS[serializer] unless serializer.nil?
  super do |key, value|
    next if ENTRY_KEYS.include? key

    # TODO Update this to be more efficient
    value = serializer_module.load(value) unless serializer.nil? or FalseClass === serializer

    # Annotated with Entity and NamedArray
    if not @unnamed
      if not fields.nil? 
        case type
        when :double, :list
          setup_array value, fields, key, entity_options, entity_templates if Array === value
        when :flat, :single
          prepare_entity(value, fields.first, entity_options)
        end
      end
      key = prepare_entity(key, key_field, entity_options)
    end

    yield key, value if block_given?
    [key, value]
  end
end

#empty?Boolean

{{{ Chained Methods

Returns:

  • (Boolean)


121
122
123
# File 'lib/rbbt/tsv/accessor.rb', line 121

def empty?
  length == 0
end

#excel(filename, options = {}) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/rbbt/tsv/excel.rb', line 49

def excel(filename, options ={})
  name = Misc.process_options options, :name
  sort_by = Misc.process_options options, :sort_by
  sort_by_cast = Misc.process_options options, :sort_by_cast

  book = Spreadsheet::Workbook.new
  sheet1 = book.create_worksheet 
  sheet1.row(0).concat all_fields
  i = 1
  if sort_by
    if sort_by_cast
      data = self.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = self.sort_by sort_by
    end
  else
    data = self
  end

  data.each do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        cells.push v * ", "
      else
        cells.push v
      end
    end

    sheet1.row(i).concat cells
    i += 1
  end
  book.write filename
end

#fieldsObject



418
419
420
421
422
423
424
425
426
# File 'lib/rbbt/tsv/accessor.rb', line 418

def fields
  #@fields ||= TSV_SERIALIZER.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || SERIALIZED_NIL)
  @fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
  if true or @fields.nil? or @unnamed
    @fields
  else
    @named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
  end
end

#fields=(value) ⇒ Object



435
436
437
438
439
440
# File 'lib/rbbt/tsv/accessor.rb', line 435

def fields=(value)
  #self.tsv_clean_set_brackets "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml
  self.send(:[]=, "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
  @fields = value
  @named_fields = nil
end

#filter(filter_dir = nil) ⇒ Object



274
275
276
277
278
279
# File 'lib/rbbt/tsv/filter.rb', line 274

def filter(filter_dir = nil)
  self.extend Filtered
  self.filter_dir = filter_dir
  self.filters = []
  self
end

#identifier_filesObject



450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/rbbt/tsv/accessor.rb', line 450

def identifier_files
  case
  when (identifiers and TSV === identifiers)
    [identifiers]
  when (identifiers and Array === identifiers)
    case
    when (TSV === identifiers.first or identifiers.empty?)
      identifiers
    else
      identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
    end
  when identifiers
    [ Path === identifiers ? identifiers : Path.setup(identifiers, nil, namespace) ]
  when Path === filename
    filename.identifier_files
  when filename
    Path.setup(filename.dup).identifier_files
  else
    []
  end
end

#identify_field(field) ⇒ Object



80
81
82
# File 'lib/rbbt/tsv/util.rb', line 80

def identify_field(field)
  TSV.identify_field(key_field, fields, field)
end

#index(options = {}) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rbbt/tsv/index.rb', line 12

def index(options = {})
  options = Misc.add_defaults options, 
    :target => :key, :fields => nil, :type => :single, :order => false

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "Index[#{options[:target] || :key}]"

  Log.debug "Index: #{ filename } - #{options.inspect}"
  Persist.persist_tsv self, filename, options, persist_options do |new|
    with_unnamed do
      target, fields, index_type, order = Misc.process_options options, :target, :fields, :type, :order

      new.serializer = index_type if new.respond_to? :serializer and new.serializer == :type

      if order

        # Maybe best to do the stuff in memory first instead of the original
        # object, which could be persisted
        save = new
        new = {} 

        new_key_field, new_fields = through target, fields, true do |key, values|
          next if key.empty?
          if type == :single
            values = [values]
            values.unshift key
          else
            values = values.dup
            values.unshift [key]
          end

          values.each_with_index do |list, i|
            list = [list] unless type == :double

            list.uniq.each do |value|
              if new.include? value
                new_value = new[value]
              else
                new_value = []
              end

              if new_value[i].nil?
                new_value[i] =  key
              else
                new_value[i] += "|" <<  key 
              end
              new[value] = new_value
            end
          end
        end

        # Update original object
        new.each do |key, values|
          case
          when index_type == :double
            save[key] = [values.compact.collect{|v| v.split "|"}.flatten.uniq]
          when index_type == :flat
            save[key] = values.compact.collect{|v| v.split "|"}.flatten.uniq
          when index_type == :single
            save[key] = values.compact.collect{|v| v.split "|"}.flatten.first
          end
        end

        new = save
      else
        new_key_field, new_fields = through target, fields, true do |key, values|
          case
          when type == :single
            values = [values]
          when type == :double
            values = values.flatten
          else
            values = values.dup
          end

          values.unshift key

          values.uniq.each do |value|
            case index_type
            when :double
              if not new.include? value
                new[value] = [[key]]
              else
                current = new[value]
                current[0] << key
                new[value] = current
              end
            when :flat
              if not new.include? value
                new[value] = [key]
              else
                current = new[value]
                current << key
                new[value] = current
              end

            else
              new[value] = key unless new.include? value
            end
          end
        end
      end

      TSV.setup(new, :type => index_type, :filename => filename, :fields => [new_key_field], :key_field => new_fields * ", ")
    end
  end
end

#keysObject



145
146
147
148
149
150
# File 'lib/rbbt/tsv/accessor.rb', line 145

def keys
  keys = super - ENTRY_KEYS
  return keys if @unnamed or key_field.nil?

  prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
end

#lengthObject



231
232
233
# File 'lib/rbbt/tsv/accessor.rb', line 231

def length
  keys.length
end

#merge_different_fields(other, options = {}) ⇒ Object



190
191
192
193
194
195
196
197
198
# File 'lib/rbbt/tsv/attach.rb', line 190

def merge_different_fields(other, options = {})
  TmpFile.with_file do |output|
    TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
    tsv = TSV.open output, options
    tsv.key_field = self.key_field unless self.key_field.nil?
    tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
    tsv
  end
end

#namespace=(value) ⇒ Object



428
429
430
431
432
433
# File 'lib/rbbt/tsv/accessor.rb', line 428

def namespace=(value)
  #self.tsv_clean_set_brackets "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml
  self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
  @namespace = value
  @entity_options = nil
end

#optionsObject



472
473
474
475
476
477
478
# File 'lib/rbbt/tsv/accessor.rb', line 472

def options
  options = {}
  ENTRIES.each do |entry|
    options[entry] = self.send(entry)
  end
  IndiferentHash.setup options
end

#page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object

Starts in page 1



325
326
327
328
329
330
331
332
333
334
335
336
337
# File 'lib/rbbt/tsv/accessor.rb', line 325

def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
  pstart = psize * (pnum - 1)
  pend = psize * pnum - 1
  field = :key if field == "key"
  keys = sort_by(field || :key, true, &block)
  keys.reverse! if reverse

  if just_keys
    keys[pstart..pend]
  else
    select :key => keys[pstart..pend]
  end
end

#pos_index(pos_field = nil, options = {}) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/rbbt/tsv/index.rb', line 134

def pos_index(pos_field = nil, options = {})
  pos_field ||= "Position"

  options = Misc.add_defaults options,
    :persist => false, :persist_file => nil, :persist_update => false 

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "PosIndex[#{pos_field}]"

  Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do 
    max_key_size = 0
    index_data = []
    with_unnamed do
      with_monitor :desc => "Creating Index Data", :step => 10000 do
        through :key, pos_field do |key, values|
          key_size = key.length
          max_key_size = key_size if key_size > max_key_size

          pos = values.first
          if Array === pos
            pos.each do |p|
              index_data << [key, p.to_i]
            end
          else
            index_data << [key, pos.to_i]
          end
        end
      end
    end

    index = FixWidthTable.get(:memory, max_key_size, false)
    index.add_point index_data
    index.read
    index
  end
end

#prepare_entity(entity, field, options = {}) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/rbbt/tsv/accessor.rb', line 33

def prepare_entity(entity, field, options = {})
  return entity if entity.nil?
  return entity unless defined? Entity
  entity = entity if options.delete :dup_array
  if (template = entity_templates[field])
    entity = template.annotate(entity.frozen? ? entity.dup : entity)
    entity.extend AnnotatedArray if Array === entity
    entity
  else
    if entity_templates.include? field
      entity
    else
      template = Misc.prepare_entity("TEMPLATE", field, options)
      if Annotated === template
        entity_templates[field] = template
        entity = template.annotate(entity.frozen? ? entity.dup : entity)
        entity.extend AnnotatedArray if Array === entity
        entity
      else
        entity_templates[field] = nil
        entity
      end
    end
  end
end

#process(field, &block) ⇒ Object



528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
# File 'lib/rbbt/tsv/manipulate.rb', line 528

def process(field, &block)
  field_pos = identify_field field

  through do |key, values|
    next if values.nil?

    case
    when type == :single
      field_values = values
    when type == :flat
      field_values = values
    else
      next if values[field_pos].nil? 
      field_values = values[field_pos]
    end

    new_values = case 
                 when block.arity == 1
                   yield(field_values)
                 when block.arity == 2
                   yield(field_values, key)
                 when block.arity == 3
                   yield(field_values, key, values)
                 else
                   raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
                 end

    case
    when type == :single
      self[key] = new_values
    when type == :flat
      self[key] = new_values
    else
      if (String === values[field_pos] and String === new_values) or
        (Array === values[field_pos] and Array === new_values) 
         values[field_pos].replace new_values
      else
        values[field_pos] = new_values
      end
      self[key] = values
    end
  end

  self
end

#process_key(&block) ⇒ Object



512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
# File 'lib/rbbt/tsv/manipulate.rb', line 512

def process_key(&block)
  new = annotate({})
  through do |key, values|
    key = case 
          when block.arity == 1
            yield(key)
          when block.arity == 2
            yield(key, values)
          else
            raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
          end
    new[key] = values
  end
  new
end

#R(script, open_options = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/rbbt/util/R.rb', line 75

def R(script, open_options = {})
  TmpFile.with_file do |f|
    Open.write(f, self.to_s)
    Log.debug(R.run(
    <<-EOF
## Loading tsv into data
data = rbbt.tsv('#{f}');

#{script.strip}

## Resaving data
if (! is.null(data)){ rbbt.tsv.write('#{f}', data); }
    EOF
    ).read)
    open_options = Misc.add_defaults open_options, :type => :list
    if open_options[:raw]
      Open.read(f)
    else
      TSV.open(f, open_options) unless open_options[:ignore_output]
    end
  end
end

#R_interactive(pre_script = nil) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/R.rb', line 98

def R_interactive(pre_script = nil)
  TmpFile.with_file do |f|
    Log.debug{"R Script:\n" << pre_script }
    TmpFile.with_file(pre_script) do |script_file|
      Open.write(f, self.to_s)
      script = "data_file = '#{f}';\n"
      script << "script_file = '#{script_file}';\n" if pre_script
      R.interactive(script)
    end
  end
end

#range_index(start_field = nil, end_field = nil, options = {}) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/rbbt/tsv/index.rb', line 204

def range_index(start_field = nil, end_field = nil, options = {})
  start_field ||= "Start"
  end_field ||= "End"

  options = Misc.add_defaults options,
    :persist => false, :persist_file => nil, :persist_update => false 

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"

  Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do 
    max_key_size = 0
    index_data = []
    with_unnamed do
      with_monitor :desc => "Creating Index Data", :step => 10000 do
        through :key, [start_field, end_field] do |key, values|
          key_size = key.length
          max_key_size = key_size if key_size > max_key_size

          start_pos, end_pos = values
          if Array === start_pos
            start_pos.zip(end_pos).each do |s,e|
              index_data << [key, [s.to_i, e.to_i]]
            end
          else
            index_data << [key, [start_pos.to_i, end_pos.to_i]]
          end
        end
      end
    end

    index = FixWidthTable.get(:memory, max_key_size, true)
    index.add_range index_data
    index.read
    index
  end
end

#reorder(new_key_field = nil, new_fields = nil, options = {}) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/rbbt/tsv/manipulate.rb', line 229

def reorder(new_key_field = nil, new_fields = nil, options = {}) 
  zipped, uniq = Misc.process_options options, :zipped, :uniq

  persist_options = Misc.pull_keys options, :persist
  persist_options[:prefix] = "Reorder"

  Persist.persist_tsv self, self.filename, {:key_field => new_key_field, :fields => new_fields}, persist_options do |data|
    if data.respond_to? :persistence_path
      real_data = data 
      data = {}
    end

    with_unnamed do
      if zipped or (type != :double and type != :flat)
        new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
          data[key] = value.clone if Array === value
        end
      else
        case type 
        when :double
          new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
            if data[key].nil?
              #data[key] = value.collect{|v| v.dup}
              data[key] = value.collect{|v| v.dup}
            else
              current = data[key]
              value.each_with_index do |v, i|
                current[i].concat v
              end
              data[key] = current if data.respond_to? :tokyocabinet_class
            end
          end
        when :flat
          new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
            data[key] ||= []
            data[key].concat value
          end
        end
      end

      if real_data and real_data.respond_to? :persistence_path
        real_data.serializer = type if real_data.respond_to? :serializer
        real_data.merge!(data)
        data = real_data
      end

      data.extend TSV unless TSV === data
      data.key_field = new_key_field_name
      data.fields = new_field_names
      data.filename = filename
      data.namespace = namespace
      data.entity_options = entity_options
      data.entity_templates = {}
      data.fields.each do |field|
        data.entity_templates[field] = entity_templates[field] if entity_templates.include? field
      end
      data.type = zipped ? :list : type
    end
  end
end

#reset_filtersObject



281
282
283
284
285
286
287
288
289
290
# File 'lib/rbbt/tsv/filter.rb', line 281

def reset_filters
  if @filter_dir.nil? or @filter_dir.empty?
    @filters.each do |filter| filter.reset end if Array === @filters
    return
  end

  Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
    FileUtils.rm f
  end
end

#select(method = nil, invert = false, &block) ⇒ Object



317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
# File 'lib/rbbt/tsv/manipulate.rb', line 317

def select(method = nil, invert = false, &block)
  new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers)

  new.key_field = key_field
  new.fields    = fields.dup unless fields.nil?
  new.type      = type
  new.filename  = filename
  new.namespace = namespace
  new.entity_options = entity_options
  new.entity_templates = entity_templates
  
 case
  when (method.nil? and block_given?)
    through do |key, values|
      new[key] = values if invert ^ (yield key, values)
    end
  when Array === method
    method = Set.new method
    with_unnamed do
      case type
      when :single
        through do |key, value|
          new[key] = value if invert ^ (method.include? key or method.include? value)
        end
      when :list, :flat
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values).any?)
        end
      else
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values.flatten).any?)
        end
      end
    end
  when Regexp === method
    with_unnamed do
      through do |key, values|
        new[key] = values if invert ^ ([key,values].flatten.select{|v| v =~ method}.any?)
      end
    end
  when String === method
    if block_given?
      case 
      when block.arity == 1
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(value))
            end
          else
            pos = identify_field method
            raise "Field #{ method } not identified. Available: #{ fields * ", " }" if pos.nil?

            through do |key, values|
              new[key] = values if invert ^ (yield(values[pos]))
            end
          end
        end
      when block.arity == 2
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key, key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(key, value))
            end
          else
            pos = identify_field method
            through do |key, values|
              new[key] = values if invert ^ (yield(key, values[pos]))
            end
          end

        end
      end

    else
      with_unnamed do
        through do |key, values|
          new[key] = values if invert ^ ([key,values].flatten.select{|v| v == method}.any?)
        end
      end
    end
  when Hash === method
    key  = method.keys.first
    method = method.values.first
    case
    when (Array === method and (key == :key or key_field == key))
      with_unnamed do
        method.each{|key| 
          new[key] = self[key] if invert ^ (self.include? key)
        }
      end
    when Array === method
      with_unnamed do
        method = Set.new method unless Set === method
        case type
        when :single
          through :key, key do |key, value|
            new[key] = self[key] if invert ^ (method.include? value)
          end
        when :list
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ (method.include? values.first)
          end
        when :flat #untested
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        else
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        end
      end

    when Regexp === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v =~ method}.any?)
        end
      end

    when (String === method and method =~ /name:(.*)/)
      name = $1
      if name.strip =~ /^\/(.*)\/$/
        regexp = Regexp.new $1
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v.name =~ regexp}.any?)
        end
      else
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v.name == name}.any?)
        end
      end

    when String === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v == method}.any?)
        end
      end

    when Fixnum === method
      with_unnamed do
        through :key, key do |key, values|
          new[key] = self[key] if invert ^ (values.flatten.length >= method)
        end
      end
    when Proc === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| method.call(v)}.any?)
        end
      end
    end
  end

  new
end

#setup_array(*args) ⇒ Object



59
60
61
62
63
# File 'lib/rbbt/tsv/accessor.rb', line 59

def setup_array(*args)
  res = NamedArray.setup(*args)
  res.instance_variable_set(:@entity_templates, entity_templates)
  res
end

#sizeObject



227
228
229
# File 'lib/rbbt/tsv/accessor.rb', line 227

def size
  keys.length
end

#slice(fields) ⇒ Object



290
291
292
# File 'lib/rbbt/tsv/manipulate.rb', line 290

def slice(fields)
  reorder :key, fields
end

#sort(*fields) ⇒ Object



294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/rbbt/tsv/manipulate.rb', line 294

def sort(*fields)
  fields = nil if fields.empty?

  elems = []
  through :key, fields do |key, value|
    elems << case
    when block_given?
      [key, yield(*value)]
    else
      case
      when type == :single
        [key, value]
      when type == :double
        [key, value.first.first]
      else
        [key, value.first]
      end
    end
  end

  elems.sort_by{|k,v| v}.collect{|k,v| k}
end

#sort_by(field = nil, just_keys = false, &block) ⇒ Object

{{{ Sorting



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/rbbt/tsv/accessor.rb', line 251

def sort_by(field = nil, just_keys = false, &block)
  field = :all if field.nil?
  if field == :all
    elems = collect
  else
    elems = []
    case type
    when :single
      through :key, field do |key, field|
        elems << [key, field]
      end
    when :list, :flat
      through :key, field do |key, fields|
        elems << [key, fields.first]
      end
    when :double
      through :key, field do |key, fields|
        elems << [key, fields.first]
      end
    end
  end

  if not block_given?
    if fields == :all
      if just_keys
        keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
        keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
      else
        elems.sort_by{|key, value| key }
      end
    else
      sorted = elems.sort do |a, b| 
        a_value = a.last
        b_value = b.last
        case
        when ((a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)) and (b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)))
          0
        when (a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?))
          -1
        when (b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?))
          1
        when Array === a_value
          if a_value.length == 1 and b_value.length == 1
            a_value.first <=> b_value.first
          else
            a_value.length <=> b_value.length
          end
        else
          a_value <=> b_value
        end
      end
      if just_keys
        #keys = elems.sort_by{|key, value| value }.collect{|key, value| key}
        keys = sorted.collect{|key, value| key}
        keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
        keys
      else
        sorted.collect{|key, value| [key, self[key]]}
      end
    end
  else
    if just_keys
      elems.sort_by(&block).collect{|key, value| key}
    else
      elems.sort_by(&block).collect{|key, value| [key, self[key]]}
    end
  end
end

#summaryObject



552
553
554
555
556
557
558
559
560
561
562
563
564
565
# File 'lib/rbbt/tsv/accessor.rb', line 552

def summary
  with_unnamed do
    <<-EOF
Key field = #{key_field || "*No key field*"}
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
Type = #{type}
Size = #{size}
namespace = #{namespace}
Example:
- #{key = keys.first}: #{Misc.fingerprint self[key] }

EOF
  end
end

#swap_id(*args) ⇒ Object



62
63
64
# File 'lib/rbbt/tsv/change_id.rb', line 62

def swap_id(*args)
  TSV.swap_id(self, *args)
end

#through(new_key_field = nil, new_fields = nil, uniq = false, zipped = false) ⇒ Object

{{{ Methods



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/rbbt/tsv/manipulate.rb', line 154

def through(new_key_field = nil, new_fields = nil, uniq = false, zipped = false)

  traverser = Traverser.new key_field, fields, new_key_field, new_fields, type, uniq

  if @monitor
    desc = "Iterating TSV"
    step = 100
    if Hash === @monitor
      desc = @monitor[:desc] if @monitor.include? :desc 
      step = @monitor[:step] if @monitor.include? :step 
    end
    progress_monitor = Progress::Bar.new(size, 0, step, desc)
  else
    progress_monitor = nil
  end

  each do |key, value|
    progress_monitor.tick if progress_monitor

    keys, value = traverser.process(key, value)
    
    keys = [keys].compact unless Array === keys

    # Annotated with Entity and NamedArray
    if not @unnamed
      if not traverser.new_field_names.nil? 
        case type
        when :double, :list
          if value.frozen?
            Log.warn "Value frozen: #{ value }"
          end
          if value.nil?
            nil
          else
            NamedArray.setup value, traverser.new_field_names, key, entity_options, entity_templates
          end
        when :flat, :single
          prepare_entity(value, traverser.new_field_names.first, entity_options)
        end
      end
    end

    next if keys.nil?

    if zipped

      keys.each_with_index do |k,i|
        v = value.collect{|v|
          r = v[i]
          r = v[0] if r.nil?
          r
        }

        if not @unnamed 
          k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
        end
        v.key = k if NamedArray === v
        yield k, v
 
      end

    else
      keys.each do |key|
        if not @unnamed
          k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
        end
        value.key = key if NamedArray === value
        yield key, value
      end
    end
  end

  [traverser.new_key_field_name, traverser.new_field_names]
end

#to_doubleObject



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/tsv/util.rb', line 84

def to_double
  new = {}
  case type
  when :double
    self
  when :flat
    through do |k,v|
      new[k] = [v]
    end
  when :single
    through do |k,v|
      new[k] = [[v]]
    end
  when :list
    through do |k,v|
      new[k] = v.collect{|e| [e]}
    end
  end
  self.annotate(new)
  new.type = :double
  new
end

#to_flat(field = nil) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/rbbt/tsv/util.rb', line 107

def to_flat(field = nil)
  new = {}
  case type
  when :double
    if field.nil?
      through do |k,v| new[k] = v.first end
    else
      pos = identify_field field
      through do |k,v| new[k] = v[pos] end
    end
  when :flat
    self
  when :single
    through do |k,v|
      new[k] = [v]
    end
  when :list
    through do |k,v|
      new[k] = [v.first]
    end
  end
  self.annotate(new)
  new.fields = new.fields[0..0] if new.fields
  new.type = :flat
  new
end

#to_hashObject



567
568
569
570
571
# File 'lib/rbbt/tsv/accessor.rb', line 567

def to_hash
  new = self.dup
  ENTRY_KEYS.each{|entry| new.delete entry}
  new
end

#to_s(keys = nil, no_options = false) ⇒ Object



501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
# File 'lib/rbbt/tsv/accessor.rb', line 501

def to_s(keys = nil, no_options = false)
  if FalseClass === keys or TrueClass === keys
    no_options = keys
    keys = nil
  end

  if keys == :sort
    with_unnamed do
      keys = self.keys.sort
    end
  end

  str = ""

  str << "#: " << Misc.hash2string((ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
  if fields
    str << "#" << key_field << "\t" << fields * "\t" << "\n"
  end

  with_unnamed do
    if keys.nil?
      each do |key, values|
        key = key.to_s if Symbol === key
        str << key.to_s
        str << values_to_s(values)
      end
    else
      keys.zip(values_at(*keys)).each do |key, values|
        key = key.to_s if Symbol === key
        str << key.to_s << values_to_s(values)
      end
    end

  end
  str
end

#transpose(key_field) ⇒ Object



605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
# File 'lib/rbbt/tsv/manipulate.rb', line 605

def transpose(key_field)
  raise "Transposing only works for TSVs of type :list" unless type == :list
  new_fields = keys
  new = TSV.setup({}, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers)

  through do |key, values|
    fields.zip(values) do |new_key, value|
      new[new_key] ||= []
      new[new_key][new_fields.index key] = value
    end
  end

  new.entity_options = entity_options
  new.entity_templates = entity_templates
  new.namespace = namespace

  new
end

#tsv_sort(&block) ⇒ Object



320
321
322
# File 'lib/rbbt/tsv/accessor.rb', line 320

def tsv_sort(&block)
  collect.sort &block
end

#value_peekObject



538
539
540
541
542
543
544
545
546
547
548
549
550
# File 'lib/rbbt/tsv/accessor.rb', line 538

def value_peek
  peek = {}
  i = 0
  begin
    through do |k,v|
      peek[k] = v 
      i += 1
      raise "STOP" if i > 10
    end
  rescue
  end
  peek
end

#valuesObject



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/rbbt/tsv/accessor.rb', line 152

def values
  values = chunked_values_at(keys)
  return values if @unnamed or fields.nil?

  case type
  when :double, :list
    values.each{|value| setup_array value, fields, nil, entity_options}
  when :single
    values = prepare_entity(values, fields.first, entity_options)
  when :flat
    values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
  end
    
  values
end

#values_at(*keys) ⇒ Object



235
236
237
238
239
# File 'lib/rbbt/tsv/accessor.rb', line 235

def values_at(*keys)
  keys.collect do |key|
    self[key]
  end
end

#values_to_s(values) ⇒ Object



486
487
488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/rbbt/tsv/accessor.rb', line 486

def values_to_s(values)
  case values
  when nil
    if fields.nil? or fields.empty?
      "\n"
    else
      "\t" << ([""] * fields.length) * "\t" << "\n"
    end
  when Array
    "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
  else
    "\t" << values.to_s << "\n"
  end
end

#with_monitor(value = true) ⇒ Object



73
74
75
76
77
78
79
# File 'lib/rbbt/tsv/accessor.rb', line 73

def with_monitor(value = true)
  saved_monitor = @monitor
  @monitor = value
  res = yield
  @monitor = saved_monitor
  res
end

#with_unnamedObject



65
66
67
68
69
70
71
# File 'lib/rbbt/tsv/accessor.rb', line 65

def with_unnamed
  saved_unnamed = @unnamed 
  @unnamed = true
  res = yield
  @unnamed = saved_unnamed
  res
end