|
class CSV
|
|
def shift
|
|
#########################################################################
|
|
### This method is purposefully kept a bit long as simple conditional ###
|
|
### checks are faster than numerous (expensive) method calls. ###
|
|
#########################################################################
|
|
|
|
# handle headers not based on document content
|
|
if header_row? and @return_headers and
|
|
[Array, String].include? @use_headers.class
|
|
if @unconverted_fields
|
|
return add_unconverted_fields(parse_headers, Array.new)
|
|
else
|
|
return parse_headers
|
|
end
|
|
end
|
|
|
|
#
|
|
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
|
# because of \r and/or \n characters embedded in quoted fields
|
|
#
|
|
in_extended_col = false
|
|
csv = Array.new
|
|
|
|
loop do
|
|
# add another read to the line
|
|
unless parse = @io.gets(@row_sep)
|
|
return nil
|
|
end
|
|
|
|
parse.sub!(@parsers[:line_end], "")
|
|
|
|
if csv.empty?
|
|
#
|
|
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
|
|
# CSV's <tt>[nil]</tt>
|
|
#
|
|
if parse.empty?
|
|
@lineno += 1
|
|
if @skip_blanks
|
|
next
|
|
elsif @unconverted_fields
|
|
return add_unconverted_fields(Array.new, Array.new)
|
|
elsif @use_headers
|
|
return self.class::Row.new(Array.new, Array.new)
|
|
else
|
|
return Array.new
|
|
end
|
|
end
|
|
end
|
|
|
|
next if @skip_lines and @skip_lines.match parse
|
|
|
|
parts = parse.split(@col_sep, -1)
|
|
if parts.empty?
|
|
if in_extended_col
|
|
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
|
|
else
|
|
csv << nil
|
|
end
|
|
end
|
|
|
|
# This loop is the hot path of csv parsing. Some things may be non-dry
|
|
# for a reason. Make sure to benchmark when refactoring.
|
|
parts.each do |part|
|
|
if in_extended_col
|
|
# If we are continuing a previous column
|
|
if part[-1] == @quote_char && part.count(@quote_char) % 2 != 0
|
|
# extended column ends
|
|
csv.last << part[0..-2]
|
|
if csv.last =~ @parsers[:stray_quote]
|
|
raise MalformedCSVError,
|
|
"Missing or stray quote in line #{lineno + 1}"
|
|
end
|
|
csv.last.gsub!(@quote_char * 2, @quote_char)
|
|
in_extended_col = false
|
|
else
|
|
csv.last << part
|
|
csv.last << @col_sep
|
|
end
|
|
elsif part[0] == @quote_char
|
|
# If we are starting a new quoted column
|
|
if part.count(@quote_char) % 2 != 0
|
|
# start an extended column
|
|
csv << part[1..-1]
|
|
csv.last << @col_sep
|
|
in_extended_col = true
|
|
elsif part[-1] == @quote_char
|
|
# regular quoted column
|
|
csv << part[1..-2]
|
|
if csv.last =~ @parsers[:stray_quote]
|
|
raise MalformedCSVError,
|
|
"Missing or stray quote in line #{lineno + 1}"
|
|
end
|
|
csv.last.gsub!(@quote_char * 2, @quote_char)
|
|
elsif @liberal_parsing
|
|
csv << part
|
|
else
|
|
raise MalformedCSVError,
|
|
"Missing or stray quote in line #{lineno + 1}"
|
|
end
|
|
elsif part =~ @parsers[:quote_or_nl]
|
|
# Unquoted field with bad characters.
|
|
if part =~ @parsers[:nl_or_lf]
|
|
raise MalformedCSVError, "Unquoted fields do not allow " +
|
|
"\\r or \\n (line #{lineno + 1})."
|
|
else
|
|
if @liberal_parsing
|
|
csv << part
|
|
else
|
|
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
|
|
end
|
|
end
|
|
else
|
|
# Regular ole unquoted field.
|
|
csv << (part.empty? ? nil : part)
|
|
end
|
|
end
|
|
|
|
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
|
# column.
|
|
csv[-1][-1] = @row_sep if in_extended_col
|
|
|
|
if in_extended_col
|
|
# if we're at eof?(), a quoted field wasn't closed...
|
|
if @io.eof?
|
|
raise MalformedCSVError,
|
|
"Unclosed quoted field on line #{lineno + 1}."
|
|
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
|
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
|
end
|
|
# otherwise, we need to loop and pull some more data to complete the row
|
|
else
|
|
@lineno += 1
|
|
|
|
# save fields unconverted fields, if needed...
|
|
unconverted = csv.dup if @unconverted_fields
|
|
|
|
# convert fields, if needed...
|
|
csv = convert_fields(csv) unless @use_headers or @converters.empty?
|
|
# parse out header rows and handle CSV::Row conversions...
|
|
csv = parse_headers(csv) if @use_headers
|
|
|
|
# inject unconverted fields and accessor, if requested...
|
|
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
|
add_unconverted_fields(csv, unconverted)
|
|
end
|
|
|
|
# return the results
|
|
break csv
|
|
end
|
|
end
|
|
end
|
|
end
|