Class: TabReader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/tabreader/version.rb,
lib/tabreader/reader.rb

Overview

note: for now TabReader is a class!!! NOT a module - change - why? why not?

Constant Summary collapse

MAJOR =

todo: namespace inside version or something - why? why not??

1
MINOR =
0
PATCH =
1
VERSION =
[MAJOR,MINOR,PATCH].join('.')

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ TabReader

Returns a new instance of TabReader.



154
155
156
157
158
159
160
# File 'lib/tabreader/reader.rb', line 154

def initialize( data )
  if data.is_a?( String )
    @input = data   # note: just needs each for each_line
  else  ## assume io
    @input = data
  end
end

Class Method Details



16
17
18
# File 'lib/tabreader/version.rb', line 16

def self.banner
  "tabreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
end

.build_loggerObject

add simple logger with debug flag/switch

use Parser.debug = true   # to turn on

todo/fix: use logutils instead of std logger - why? why not?


25
26
27
28
29
# File 'lib/tabreader/reader.rb', line 25

def self.build_logger()
  l = Logger.new( STDOUT )
  l.level = :info    ## set to :info on start; note: is 0 (debug) by default
  l
end

.foreach(path, &block) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/tabreader/reader.rb', line 103

def self.foreach( path, &block )
  tab = open( path )

  if block_given?
    begin
      tab.each( &block )
    ensure
      tab.close
    end
  else
    tab.to_enum    ## note: caller (responsible) must close file!!!
    ## remove version without block given - why? why not?
    ## use Tab.open().to_enum  or Tab.open().each
    ##   or Tab.new( File.new() ).to_enum or Tab.new( File.new() ).each ???
  end
end

.header(path) ⇒ Object

convenience helper for header (first row with column names)



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/tabreader/reader.rb', line 134

def self.header( path )   ## use header or headers - or use both (with alias)?
  # read first lines (only)

  records = []
  open( path ) do |tab|
    tab.each do |record|
      records << record
      break   ## only parse/read first record
    end
  end

  ## unwrap record if empty return nil - why? why not?
  ##  return empty record e.g. [] - why? why not?
  ##  returns nil for empty (for now) - why? why not?
  records.size == 0 ? nil : records.first
end

.loggerObject



30
# File 'lib/tabreader/reader.rb', line 30

def self.logger() @@logger ||= build_logger; end

.open(path, mode = nil, &block) ⇒ Object

rename path to filename or name - why? why not?



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/tabreader/reader.rb', line 79

def self.open( path, mode=nil, &block )   ## rename path to filename or name - why? why not?

    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    tab = new( f )

    # handle blocks like Ruby's open()
    if block_given?
      begin
        block.call( tab )
      ensure
        tab.close
      end
    else
      tab
    end
end

.parse(data, &block) ⇒ Object



121
122
123
124
125
126
127
128
129
# File 'lib/tabreader/reader.rb', line 121

def self.parse( data, &block )
  tab = new( data )

  if block_given?
    tab.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  else  # slurp contents, if no block is given
    tab.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  end
end

.parse_line(line, sep: "\t") ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/tabreader/reader.rb', line 36

def self.parse_line( line, sep: "\t" )
  ## check - can handle comments and blank lines too - why? why not?
  ## remove trailing newlines

  logger.debug  "line:"             if logger.debug?
  logger.debug line.pretty_inspect  if logger.debug?


  ##  note: chomp('') if is an empty string,
  ##    it will remove all trailing newlines from the string.
  ##    use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
  line = line.chomp( '' )

  ## line = line.strip         ## strip leading and trailing whitespaces (space/tab) too

  logger.debug line.pretty_inspect    if logger.debug?

#      if line.empty?             ## skip blank lines
#        logger.debug "skip blank line"    if logger.debug?
#        next
#      end

#      if line.start_with?( "#" )  ## skip comment lines
#        logger.debug "skip comment line"   if logger.debug?
#        next
#      end


    # note: trailing empty fields get (auto-)trimmed by split !!!!!!!
    #  Solution!!  change split( "\t" ) to split( "\t", -1 )
    #    If the limit parameter is omitted, trailing null fields are suppressed.
    #     If limit is a positive number, at most that number of fields will be returned
    #     (if limit is 1, the entire string is returned as the only entry in an array).
    #     If negative, there is no limit to the number of fields returned, and trailing null fields are not suppressed.
  values = line.split( sep, -1 )
  logger.debug values.pretty_inspect   if logger.debug?

  values
end

.read(path) ⇒ Object



98
99
100
# File 'lib/tabreader/reader.rb', line 98

def self.read( path )
    open( path ) { |tab| tab.read }
end

.rootObject



20
21
22
# File 'lib/tabreader/version.rb', line 20

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.versionObject



12
13
14
# File 'lib/tabreader/version.rb', line 12

def self.version
  VERSION
end

Instance Method Details

#closeObject



180
181
182
# File 'lib/tabreader/reader.rb', line 180

def close
  @input.close   if @input.respond_to?(:close)   ## note: string needs no close
end

#each(&block) ⇒ Object



165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/tabreader/reader.rb', line 165

def each( &block )
  if block_given?
    @input.each_line do |line|

      values = self.class.parse_line( line )

      block.call( values )
    end
  else
     to_enum
  end
end

#loggerObject



31
# File 'lib/tabreader/reader.rb', line 31

def logger()  self.class.logger; end

#readObject

method each



178
# File 'lib/tabreader/reader.rb', line 178

def read() to_a; end