Class: CSV::Parser::InputsScanner

Inherits:
Object
  • Object
show all
Defined in:
lib/csv/parser.rb

Overview

CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start, keep_end, keep_back, keep_drop.

CSV::InputsScanner.scan() tries to match with pattern at the current position. If there’s a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.

CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.

Instance Method Summary collapse

Constructor Details

#initialize(inputs, encoding, row_separator, chunk_size: 8192) ⇒ InputsScanner

Returns a new instance of InputsScanner.



87
88
89
90
91
92
93
94
95
# File 'lib/csv/parser.rb', line 87

def initialize(inputs, encoding, row_separator, chunk_size: 8192)
  @inputs = inputs.dup
  @encoding = encoding
  @row_separator = row_separator
  @chunk_size = chunk_size
  @last_scanner = @inputs.empty?
  @keeps = []
  read_chunk
end

Instance Method Details

#check(pattern) ⇒ Object



257
258
259
# File 'lib/csv/parser.rb', line 257

def check(pattern)
  @scanner.check(pattern)
end

#each_line(row_separator) {|buffer| ... } ⇒ Object

Yields:

  • (buffer)


97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/csv/parser.rb', line 97

def each_line(row_separator)
  return enum_for(__method__, row_separator) unless block_given?
  buffer = nil
  input = @scanner.rest
  position = @scanner.pos
  offset = 0
  n_row_separator_chars = row_separator.size
  # trace(__method__, :start, input)
  while true
    input.each_line(row_separator) do |line|
      @scanner.pos += line.bytesize
      if buffer
        if n_row_separator_chars == 2 and
          buffer.end_with?(row_separator[0]) and
          line.start_with?(row_separator[1])
          buffer << line[0]
          line = line[1..-1]
          position += buffer.bytesize + offset
          @scanner.pos = position
          offset = 0
          yield(buffer)
          buffer = nil
          next if line.empty?
        else
          buffer << line
          line = buffer
          buffer = nil
        end
      end
      if line.end_with?(row_separator)
        position += line.bytesize + offset
        @scanner.pos = position
        offset = 0
        yield(line)
      else
        buffer = line
      end
    end
    break unless read_chunk
    input = @scanner.rest
    position = @scanner.pos
    offset = -buffer.bytesize if buffer
  end
  yield(buffer) if buffer
end

#eos?Boolean

Returns:

  • (Boolean)


170
171
172
# File 'lib/csv/parser.rb', line 170

def eos?
  @scanner.eos?
end

#keep_backObject



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/csv/parser.rb', line 197

def keep_back
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if buffer
    # trace(__method__, :rescan, start, buffer)
    string = @scanner.string
    if scanner == @scanner
      keep = string.byteslice(start,
                              string.bytesize - @scanner.pos - start)
    else
      keep = string
    end
    if keep and not keep.empty?
      @inputs.unshift(StringIO.new(keep))
      @last_scanner = false
    end
    @scanner = StringScanner.new(buffer)
  else
    if @scanner != scanner
      message = "scanners are different but no buffer: "
      message += "#{@scanner.inspect}(#{@scanner.object_id}): "
      message += "#{scanner.inspect}(#{scanner.object_id})"
      raise UnexpectedError, message
    end
    # trace(__method__, :repos, start, buffer)
    @scanner.pos = start
    last_scanner, last_start, last_buffer = @keeps.last
    # Drop the last buffer when the last buffer is the same data
    # in the last keep. If we keep it, we have duplicated data
    # by the next keep_back.
    if last_scanner == @scanner and
      last_buffer and
      last_buffer == last_scanner.string.byteslice(last_start, start)
      @keeps.last[2] = nil
    end
  end
  read_chunk if @scanner.eos?
end

#keep_dropObject



236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/csv/parser.rb', line 236

def keep_drop
  _, _, buffer = @keeps.pop
  # trace(__method__, :done, :empty) unless buffer
  return unless buffer

  last_keep = @keeps.last
  # trace(__method__, :done, :no_last_keep) unless last_keep
  return unless last_keep

  if last_keep[2]
    last_keep[2] << buffer
  else
    last_keep[2] = buffer
  end
  # trace(__method__, :done)
end

#keep_endObject



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/csv/parser.rb', line 181

def keep_end
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if scanner == @scanner
    keep = @scanner.string.byteslice(start, @scanner.pos - start)
  else
    keep = @scanner.string.byteslice(0, @scanner.pos)
  end
  if buffer
    buffer << keep
    keep = buffer
  end
  # trace(__method__, :done, keep)
  keep
end

#keep_startObject



174
175
176
177
178
179
# File 'lib/csv/parser.rb', line 174

def keep_start
  # trace(__method__, :start)
  adjust_last_keep
  @keeps.push([@scanner, @scanner.pos, nil])
  # trace(__method__, :done)
end

#restObject



253
254
255
# File 'lib/csv/parser.rb', line 253

def rest
  @scanner.rest
end

#scan(pattern) ⇒ Object



143
144
145
146
147
148
149
150
151
152
# File 'lib/csv/parser.rb', line 143

def scan(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  read_chunk if value and @scanner.eos?
  # trace(__method__, pattern, :done, value)
  value
end

#scan_all(pattern) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/csv/parser.rb', line 154

def scan_all(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  # trace(__method__, pattern, :done, :nil) if value.nil?
  return nil if value.nil?
  while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
    # trace(__method__, pattern, :sub, sub_value)
    value << sub_value
  end
  # trace(__method__, pattern, :done, value)
  value
end