class CSV::Parser

Constants

SCANNER_TEST

Public Class Methods

new(input, options) click to toggle source
# File lib/csv/parser.rb, line 169
def initialize(input, options)
  @input = input
  @options = options
  @samples = []
  @parsed = false

  prepare
end

Public Instance Methods

column_separator() click to toggle source
# File lib/csv/parser.rb, line 178
def column_separator
  @column_separator
end
field_size_limit() click to toggle source
# File lib/csv/parser.rb, line 190
def field_size_limit
  @field_size_limit
end
header_row?() click to toggle source
# File lib/csv/parser.rb, line 206
def header_row?
  @use_headers and @headers.nil?
end
headers() click to toggle source
# File lib/csv/parser.rb, line 202
def headers
  @headers
end
liberal_parsing?() click to toggle source
# File lib/csv/parser.rb, line 218
def liberal_parsing?
  @liberal_parsing
end
line() click to toggle source
# File lib/csv/parser.rb, line 226
def line
  last_line
end
lineno() click to toggle source
# File lib/csv/parser.rb, line 222
def lineno
  @lineno
end
parse() { |headers| ... } click to toggle source
# File lib/csv/parser.rb, line 230
def parse(&block)
  return to_enum(__method__) unless block_given?

  return if @parsed

  if @return_headers and @headers
    headers = Row.new(@headers, @raw_headers, true)
    if @unconverted_fields
      headers = add_unconverted_fields(headers, [])
    end
    yield headers
  end

  row = []
  begin
    @scanner = build_scanner
    skip_needless_lines
    start_row
    while true
      @quoted_column_value = false
      @unquoted_column_value = false
      value = parse_column_value
      if value and @field_size_limit and value.size >= @field_size_limit
        raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
      end
      if parse_column_end
        row << value
      elsif parse_row_end
        if row.empty? and value.nil?
          emit_row([], &block) unless @skip_blanks
        else
          row << value
          emit_row(row, &block)
          row = []
        end
        skip_needless_lines
        start_row
      elsif @scanner.eos?
        break if row.empty? and value.nil?
        row << value
        emit_row(row, &block)
        break
      else
        if @quoted_column_value
          message = "Do not allow except col_sep_split_separator " +
            "after quoted fields"
          raise MalformedCSVError.new(message, @lineno + 1)
        elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
          message = "Unquoted fields do not allow \\r or \\n"
          raise MalformedCSVError.new(message, @lineno + 1)
        elsif @scanner.rest.start_with?(@quote_character)
          message = "Illegal quoting"
          raise MalformedCSVError.new(message, @lineno + 1)
        else
          raise MalformedCSVError.new("TODO: Meaningful message",
                                      @lineno + 1)
        end
      end
    end
  rescue InvalidEncoding
    message = "Invalid byte sequence in #{@encoding}"
    raise MalformedCSVError.new(message, @lineno + 1)
  end

  @parsed = true
end
quote_character() click to toggle source
# File lib/csv/parser.rb, line 186
def quote_character
  @quote_character
end
return_headers?() click to toggle source
# File lib/csv/parser.rb, line 210
def return_headers?
  @return_headers
end
row_separator() click to toggle source
# File lib/csv/parser.rb, line 182
def row_separator
  @row_separator
end
skip_blanks?() click to toggle source
# File lib/csv/parser.rb, line 214
def skip_blanks?
  @skip_blanks
end
skip_lines() click to toggle source
# File lib/csv/parser.rb, line 194
def skip_lines
  @skip_lines
end
unconverted_fields?() click to toggle source
# File lib/csv/parser.rb, line 198
def unconverted_fields?
  @unconverted_fields
end
use_headers?() click to toggle source
# File lib/csv/parser.rb, line 297
def use_headers?
  @use_headers
end

Private Instance Methods

add_unconverted_fields(row, fields) click to toggle source

This method injects an instance variable unconverted_fields into row and an accessor method for row called unconverted_fields(). The variable is set to the contents of fields.

# File lib/csv/parser.rb, line 754
def add_unconverted_fields(row, fields)
  class << row
    attr_reader :unconverted_fields
  end
  row.instance_variable_set(:@unconverted_fields, fields)
  row
end
adjust_headers(headers) click to toggle source
# File lib/csv/parser.rb, line 506
def adjust_headers(headers)
  adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
  adjusted_headers.each {|h| h.freeze if h.is_a? String}
  adjusted_headers
end
build_scanner() click to toggle source
# File lib/csv/parser.rb, line 542
def build_scanner
  inputs = @samples.collect do |sample|
    UnoptimizedStringIO.new(sample)
  end
  if @input.is_a?(StringIO)
    inputs << UnoptimizedStringIO.new(@input.string)
  else
    inputs << @input
  end
  InputsScanner.new(inputs, @encoding, chunk_size: 1)
end
detect_row_separator(sample, cr, lf) click to toggle source
# File lib/csv/parser.rb, line 436
def detect_row_separator(sample, cr, lf)
  lf_index = sample.index(lf)
  if lf_index
    cr_index = sample[0, lf_index].index(cr)
  else
    cr_index = sample.index(cr)
  end
  if cr_index and lf_index
    if cr_index + 1 == lf_index
      cr + lf
    elsif cr_index < lf_index
      cr
    else
      lf
    end
  elsif cr_index
    cr
  elsif lf_index
    lf
  else
    :auto
  end
end
emit_row(row) { |row| ... } click to toggle source
# File lib/csv/parser.rb, line 725
def emit_row(row, &block)
  @lineno += 1

  raw_row = row
  if @use_headers
    if @headers.nil?
      @headers = adjust_headers(row)
      return unless @return_headers
      row = Row.new(@headers, row, true)
    else
      row = Row.new(@headers,
                    @fields_converter.convert(raw_row, @headers, @lineno))
    end
  else
    # convert fields, if needed...
    row = @fields_converter.convert(raw_row, nil, @lineno)
  end

  # inject unconverted fields and accessor, if requested...
  if @unconverted_fields and not row.respond_to?(:unconverted_fields)
    add_unconverted_fields(row, raw_row)
  end

  yield(row)
end
last_line() click to toggle source
# File lib/csv/parser.rb, line 466
def last_line
  if @scanner
    @last_line ||= @scanner.keep_end
  else
    @last_line
  end
end
may_quoted?() click to toggle source
# File lib/csv/parser.rb, line 516
def may_quoted?
  if @input.is_a?(StringIO)
    sample = @input.string
  else
    return false if @samples.empty?
    sample = @samples.first
  end
  sample[0, 128].index(@quote_character)
end
parse_column_end() click to toggle source
# File lib/csv/parser.rb, line 689
def parse_column_end
  return true if @scanner.scan(@column_end)
  return false unless @column_ends

  @scanner.keep_start
  if @column_ends.all? {|column_end| @scanner.scan(column_end)}
    @scanner.keep_drop
    true
  else
    @scanner.keep_back
    false
  end
end
parse_column_value() click to toggle source
# File lib/csv/parser.rb, line 604
def parse_column_value
  if @liberal_parsing
    quoted_value = parse_quoted_column_value
    if quoted_value
      unquoted_value = parse_unquoted_column_value
      if unquoted_value
        if @double_quote_outside_quote
          unquoted_value = unquoted_value.gsub(@quote_character * 2,
                                               @quote_character)
          if quoted_value.empty? # %Q{""...} case
            return @quote_character + unquoted_value
          end
        end
        @quote_character + quoted_value + @quote_character + unquoted_value
      else
        quoted_value
      end
    else
      parse_unquoted_column_value
    end
  elsif @may_quoted
    parse_quoted_column_value ||
      parse_unquoted_column_value
  else
    parse_unquoted_column_value ||
      parse_quoted_column_value
  end
end
parse_headers(row) click to toggle source
# File lib/csv/parser.rb, line 499
def parse_headers(row)
  CSV.parse_line(row,
                 col_sep:    @column_separator,
                 row_sep:    @row_separator,
                 quote_char: @quote_character)
end
parse_quoted_column_value() click to toggle source
# File lib/csv/parser.rb, line 657
def parse_quoted_column_value
  quotes = @scanner.scan_all(@quotes)
  return nil unless quotes

  @quoted_column_value = true
  n_quotes = quotes.size
  if (n_quotes % 2).zero?
    quotes[0, (n_quotes - 2) / 2]
  else
    value = quotes[0, (n_quotes - 1) / 2]
    while true
      quoted_value = @scanner.scan_all(@quoted_value)
      value << quoted_value if quoted_value
      quotes = @scanner.scan_all(@quotes)
      unless quotes
        message = "Unclosed quoted field"
        raise MalformedCSVError.new(message, @lineno + 1)
      end
      n_quotes = quotes.size
      if n_quotes == 1
        break
      elsif (n_quotes % 2) == 1
        value << quotes[0, (n_quotes - 1) / 2]
        break
      else
        value << quotes[0, n_quotes / 2]
      end
    end
    value
  end
end
parse_row_end() click to toggle source
# File lib/csv/parser.rb, line 703
def parse_row_end
  return true if @scanner.scan(@row_end)
  return false unless @row_ends
  @scanner.keep_start
  if @row_ends.all? {|row_end| @scanner.scan(row_end)}
    @scanner.keep_drop
    true
  else
    @scanner.keep_back
    false
  end
end
parse_unquoted_column_value() click to toggle source
# File lib/csv/parser.rb, line 633
def parse_unquoted_column_value
  value = @scanner.scan_all(@unquoted_value)
  return nil unless value

  @unquoted_column_value = true
  if @first_column_separators
    while true
      @scanner.keep_start
      is_column_end = @column_ends.all? do |column_end|
        @scanner.scan(column_end)
      end
      @scanner.keep_back
      break if is_column_end
      sub_separator = @scanner.scan_all(@first_column_separators)
      break if sub_separator.nil?
      value << sub_separator
      sub_value = @scanner.scan_all(@unquoted_value)
      break if sub_value.nil?
      value << sub_value
    end
  end
  value
end
prepare() click to toggle source
# File lib/csv/parser.rb, line 302
def prepare
  prepare_variable
  prepare_regexp
  prepare_line
  prepare_header
  prepare_parser
end
prepare_header() click to toggle source
# File lib/csv/parser.rb, line 474
def prepare_header
  @return_headers = @options[:return_headers]

  headers = @options[:headers]
  case headers
  when Array
    @raw_headers = headers
    @use_headers = true
  when String
    @raw_headers = parse_headers(headers)
    @use_headers = true
  when nil, false
    @raw_headers = nil
    @use_headers = false
  else
    @raw_headers = nil
    @use_headers = true
  end
  if @raw_headers
    @headers = adjust_headers(@raw_headers)
  else
    @headers = nil
  end
end
prepare_line() click to toggle source
# File lib/csv/parser.rb, line 460
def prepare_line
  @lineno = 0
  @last_line = nil
  @scanner = nil
end
prepare_parser() click to toggle source
# File lib/csv/parser.rb, line 512
def prepare_parser
  @may_quoted = may_quoted?
end
prepare_regexp() click to toggle source
# File lib/csv/parser.rb, line 331
def prepare_regexp
  @column_separator = @options[:column_separator].to_s.encode(@encoding)
  @row_separator =
    resolve_row_separator(@options[:row_separator]).encode(@encoding)
  @quote_character = @options[:quote_character].to_s.encode(@encoding)
  if @quote_character.length != 1
    raise ArgumentError, ":quote_char has to be a single character String"
  end

  escaped_column_separator = Regexp.escape(@column_separator)
  escaped_first_column_separator = Regexp.escape(@column_separator[0])
  escaped_row_separator = Regexp.escape(@row_separator)
  escaped_quote_character = Regexp.escape(@quote_character)

  skip_lines = @options[:skip_lines]
  case skip_lines
  when String
    @skip_lines = skip_lines.encode(@encoding)
  when Regexp, nil
    @skip_lines = skip_lines
  else
    unless skip_lines.respond_to?(:match)
      message =
        ":skip_lines has to respond to \#match: #{skip_lines.inspect}"
      raise ArgumentError, message
    end
    @skip_lines = skip_lines
  end

  @column_end = Regexp.new(escaped_column_separator)
  if @column_separator.size > 1
    @column_ends = @column_separator.each_char.collect do |char|
      Regexp.new(Regexp.escape(char))
    end
    @first_column_separators = Regexp.new(escaped_first_column_separator +
                                          "+".encode(@encoding))
  else
    @column_ends = nil
    @first_column_separators = nil
  end
  @row_end = Regexp.new(escaped_row_separator)
  if @row_separator.size > 1
    @row_ends = @row_separator.each_char.collect do |char|
      Regexp.new(Regexp.escape(char))
    end
  else
    @row_ends = nil
  end
  @quotes = Regexp.new(escaped_quote_character +
                       "+".encode(@encoding))
  @quoted_value = Regexp.new("[^".encode(@encoding) +
                             escaped_quote_character +
                             "]+".encode(@encoding))
  if @liberal_parsing
    @unquoted_value = Regexp.new("[^".encode(@encoding) +
                                 escaped_first_column_separator +
                                 "\r\n]+".encode(@encoding))
  else
    @unquoted_value = Regexp.new("[^".encode(@encoding) +
                                 escaped_quote_character +
                                 escaped_first_column_separator +
                                 "\r\n]+".encode(@encoding))
  end
  @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
end
prepare_variable() click to toggle source
# File lib/csv/parser.rb, line 310
def prepare_variable
  @encoding = @options[:encoding]
  liberal_parsing = @options[:liberal_parsing]
  if liberal_parsing
    @liberal_parsing = true
    if liberal_parsing.is_a?(Hash)
      @double_quote_outside_quote =
        liberal_parsing[:double_quote_outside_quote]
    else
      @double_quote_outside_quote = false
    end
  else
    @liberal_parsing = false
  end
  @unconverted_fields = @options[:unconverted_fields]
  @field_size_limit = @options[:field_size_limit]
  @skip_blanks = @options[:skip_blanks]
  @fields_converter = @options[:fields_converter]
  @header_fields_converter = @options[:header_fields_converter]
end
resolve_row_separator(separator) click to toggle source
# File lib/csv/parser.rb, line 398
def resolve_row_separator(separator)
  if separator == :auto
    cr = "\r".encode(@encoding)
    lf = "\n".encode(@encoding)
    if @input.is_a?(StringIO)
      separator = detect_row_separator(@input.string, cr, lf)
    elsif @input.respond_to?(:gets)
      if @input.is_a?(File)
        chunk_size = 32 * 1024
      else
        chunk_size = 1024
      end
      begin
        while separator == :auto
          #
          # if we run out of data, it's probably a single line
          # (ensure will set default value)
          #
          break unless sample = @input.gets(nil, chunk_size)

          # extend sample if we're unsure of the line ending
          if sample.end_with?(cr)
            sample << (@input.gets(nil, 1) || "")
          end

          @samples << sample

          separator = detect_row_separator(sample, cr, lf)
        end
      rescue IOError
        # do nothing:  ensure will set default
      end
    end
    separator = $INPUT_RECORD_SEPARATOR if separator == :auto
  end
  separator.to_s.encode(@encoding)
end
skip_line?(line) click to toggle source
# File lib/csv/parser.rb, line 593
def skip_line?(line)
  case @skip_lines
  when String
    line.include?(@skip_lines)
  when Regexp
    @skip_lines.match?(line)
  else
    @skip_lines.match(line)
  end
end
skip_needless_lines() click to toggle source
# File lib/csv/parser.rb, line 577
def skip_needless_lines
  return unless @skip_lines

  while true
    @scanner.keep_start
    line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
    line << @row_separator if parse_row_end
    if skip_line?(line)
      @scanner.keep_drop
    else
      @scanner.keep_back
      return
    end
  end
end
start_row() click to toggle source
# File lib/csv/parser.rb, line 716
def start_row
  if @last_line
    @last_line = nil
  else
    @scanner.keep_drop
  end
  @scanner.keep_start
end