Class: IOStreams::Delimited::Reader

Inherits:
Object
  • Object
show all
Defined in:
lib/io_streams/delimited/reader.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false) ⇒ Reader

Create a delimited UTF8 stream reader from the supplied input streams

The input stream should be binary with no text conversions performed since ‘strip_non_printable` will be applied to the binary stream before converting to UTF-8

Parameters

input_stream
  The input stream that implements #read

delimiter: [String]
  Line / Record delimiter to use to break the stream up into records
    Any string to break the stream up by
    The records when saved will not include this delimiter
  Default: nil
    Automatically detect line endings and break up by line
    Searches for the first "\r\n" or "\n" and then uses that as the
    delimiter for all subsequent records

buffer_size: [Integer]
  Maximum size of the buffer into which to read the stream into for
  processing.
  Must be large enough to hold the entire first line and its delimiter(s)
  Default: 65536 ( 64K )

strip_non_printable: [true|false]
  Strip all non-printable characters read from the file
  Default: false

encoding:
  Force encoding to this encoding for all data being read
  Default: UTF8_ENCODING
  Set to nil to disable encoding


50
51
52
53
54
55
56
57
58
59
# File 'lib/io_streams/delimited/reader.rb', line 50

def initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
  @input_stream        = input_stream
  @delimiter           = delimiter
  @buffer_size         = buffer_size
  @encoding            = encoding
  @strip_non_printable = strip_non_printable

  @delimiter.force_encoding(UTF8_ENCODING) if @delimiter && @encoding
  @buffer = ''
end

Instance Attribute Details

#buffer_sizeObject

Returns the value of attribute buffer_size.



4
5
6
# File 'lib/io_streams/delimited/reader.rb', line 4

def buffer_size
  @buffer_size
end

#delimiterObject

Returns the value of attribute delimiter.



4
5
6
# File 'lib/io_streams/delimited/reader.rb', line 4

def delimiter
  @delimiter
end

#encodingObject

Returns the value of attribute encoding.



4
5
6
# File 'lib/io_streams/delimited/reader.rb', line 4

def encoding
  @encoding
end

#strip_non_printableObject

Returns the value of attribute strip_non_printable.



4
5
6
# File 'lib/io_streams/delimited/reader.rb', line 4

def strip_non_printable
  @strip_non_printable
end

Class Method Details

.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false) ⇒ Object

Read from a file or stream



7
8
9
10
11
12
13
14
15
# File 'lib/io_streams/delimited/reader.rb', line 7

def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
  if IOStreams.reader_stream?(file_name_or_io)
    yield new(file_name_or_io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
  else
    ::File.open(file_name_or_io, 'rb') do |io|
      yield new(io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
    end
  end
end

Instance Method Details

#each(&block) ⇒ Object Also known as: each_line

Returns each line at a time to to the supplied block



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/io_streams/delimited/reader.rb', line 62

def each(&block)
  partial = nil
  loop do
    if read_chunk == 0
      block.call(partial) if partial
      return
    end

    self.delimiter ||= detect_delimiter
    end_index      ||= (delimiter.size + 1) * -1

    @buffer.each_line(delimiter) do |line|
      if line.end_with?(delimiter)
        # Strip off delimiter
        block.call(line[0..end_index])
        partial = nil
      else
        partial = line
      end
    end
    @buffer = partial.nil? ? '' : partial
  end
end

#read(length = nil, outbuf = nil) ⇒ Object

Reads length bytes from the I/O stream. Not recommended, but available if someone calls #read on this delimited reader



90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/io_streams/delimited/reader.rb', line 90

def read(length = nil, outbuf = nil)
  if length
    while (@buffer.size < length) && (read_chunk > 0)
    end
    data = @buffer.slice!(0, length)
    outbuf << data if outbuf
    data
  else
    while read_chunk > 0
    end
    @buffer
  end
end