class CSV::Parser::InputsScanner

CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start, keep_end, keep_back, keep_drop.

CSV::InputsScanner.scan() tries to match with pattern at the current position. If there's a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.

CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.

Public Class Methods

new(inputs, encoding, chunk_size: 8192) click to toggle source
# File lib/csv/parser.rb, line 87
def initialize(inputs, encoding, chunk_size: 8192)
  @inputs = inputs.dup
  @encoding = encoding
  @chunk_size = chunk_size
  @last_scanner = @inputs.empty?
  @keeps = []
  read_chunk
end

Public Instance Methods

each_line(row_separator) { |buffer| ... } click to toggle source
# File lib/csv/parser.rb, line 96
def each_line(row_separator)
  buffer = nil
  input = @scanner.rest
  position = @scanner.pos
  offset = 0
  n_row_separator_chars = row_separator.size
  while true
    input.each_line(row_separator) do |line|
      @scanner.pos += line.bytesize
      if buffer
        if n_row_separator_chars == 2 and
          buffer.end_with?(row_separator[0]) and
          line.start_with?(row_separator[1])
          buffer << line[0]
          line = line[1..-1]
          position += buffer.bytesize + offset
          @scanner.pos = position
          offset = 0
          yield(buffer)
          buffer = nil
          next if line.empty?
        else
          buffer << line
          line = buffer
          buffer = nil
        end
      end
      if line.end_with?(row_separator)
        position += line.bytesize + offset
        @scanner.pos = position
        offset = 0
        yield(line)
      else
        buffer = line
      end
    end
    break unless read_chunk
    input = @scanner.rest
    position = @scanner.pos
    offset = -buffer.bytesize if buffer
  end
  yield(buffer) if buffer
end
eos?() click to toggle source
# File lib/csv/parser.rb, line 163
def eos?
  @scanner.eos?
end
keep_back() click to toggle source
# File lib/csv/parser.rb, line 181
def keep_back
  start, buffer = @keeps.pop
  if buffer
    string = @scanner.string
    keep = string.byteslice(start, string.bytesize - start)
    if keep and not keep.empty?
      @inputs.unshift(StringIO.new(keep))
      @last_scanner = false
    end
    @scanner = StringScanner.new(buffer)
  else
    @scanner.pos = start
  end
  read_chunk if @scanner.eos?
end
keep_drop() click to toggle source
# File lib/csv/parser.rb, line 197
def keep_drop
  @keeps.pop
end
keep_end() click to toggle source
# File lib/csv/parser.rb, line 171
def keep_end
  start, buffer = @keeps.pop
  keep = @scanner.string.byteslice(start, @scanner.pos - start)
  if buffer
    buffer << keep
    keep = buffer
  end
  keep
end
keep_start() click to toggle source
# File lib/csv/parser.rb, line 167
def keep_start
  @keeps.push([@scanner.pos, nil])
end
rest() click to toggle source
# File lib/csv/parser.rb, line 201
def rest
  @scanner.rest
end
scan(pattern) click to toggle source
# File lib/csv/parser.rb, line 140
def scan(pattern)
  value = @scanner.scan(pattern)
  return value if @last_scanner

  if value
    read_chunk if @scanner.eos?
    return value
  else
    nil
  end
end
scan_all(pattern) click to toggle source
# File lib/csv/parser.rb, line 152
def scan_all(pattern)
  value = @scanner.scan(pattern)
  return value if @last_scanner

  return nil if value.nil?
  while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
    value << sub_value
  end
  value
end

Private Instance Methods

read_chunk() click to toggle source
# File lib/csv/parser.rb, line 206
def read_chunk
  return false if @last_scanner

  unless @keeps.empty?
    keep = @keeps.last
    keep_start = keep[0]
    string = @scanner.string
    keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
    if keep_data
      keep_buffer = keep[1]
      if keep_buffer
        keep_buffer << keep_data
      else
        keep[1] = keep_data.dup
      end
    end
    keep[0] = 0
  end

  input = @inputs.first
  case input
  when StringIO
    string = input.read
    raise InvalidEncoding unless string.valid_encoding?
    @scanner = StringScanner.new(string)
    @inputs.shift
    @last_scanner = @inputs.empty?
    true
  else
    chunk = input.gets(nil, @chunk_size)
    if chunk
      raise InvalidEncoding unless chunk.valid_encoding?
      @scanner = StringScanner.new(chunk)
      if input.respond_to?(:eof?) and input.eof?
        @inputs.shift
        @last_scanner = @inputs.empty?
      end
      true
    else
      @scanner = StringScanner.new("".encode(@encoding))
      @inputs.shift
      @last_scanner = @inputs.empty?
      if @last_scanner
        false
      else
        read_chunk
      end
    end
  end
end