Feature #11839 » csv_liberal_parsing.diff
| lib/csv.rb | ||
|---|---|---|
|
# <b><tt>:skip_blanks</tt></b>:: +false+
|
||
|
# <b><tt>:force_quotes</tt></b>:: +false+
|
||
|
# <b><tt>:skip_lines</tt></b>:: +nil+
|
||
|
# <b><tt>:liberal_parsing</tt></b>:: +false+
|
||
|
#
|
||
|
DEFAULT_OPTIONS = {
|
||
|
col_sep: ",",
|
||
| ... | ... | |
|
skip_blanks: false,
|
||
|
force_quotes: false,
|
||
|
skip_lines: nil,
|
||
|
liberal_parsing: false,
|
||
|
}.freeze
|
||
|
#
|
||
| ... | ... | |
|
# a comment. If the passed object does
|
||
|
# not respond to <tt>match</tt>,
|
||
|
# <tt>ArgumentError</tt> is thrown.
|
||
|
# <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will
|
||
|
# attempt to parse input not conformant
|
||
|
# with RFC 4180, such as double quotes
|
||
|
# in unquoted fields.
|
||
|
#
|
||
|
# See CSV::DEFAULT_OPTIONS for the default settings.
|
||
|
#
|
||
| ... | ... | |
|
def skip_blanks?() @skip_blanks end
|
||
|
# Returns +true+ if all output fields are quoted. See CSV::new for details.
|
||
|
def force_quotes?() @force_quotes end
|
||
|
# Returns +true+ if illegal input is handled. See CSV::new for details.
|
||
|
def liberal_parsing?() @liberal_parsing end
|
||
|
#
|
||
|
# The Encoding CSV is parsing or writing in. This will be the Encoding you
|
||
| ... | ... | |
|
end
|
||
|
elsif part[0] == @quote_char
|
||
|
# If we are starting a new quoted column
|
||
|
if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
|
||
|
if part.count(@quote_char) % 2 != 0
|
||
|
# start an extended column
|
||
|
csv << part[1..-1]
|
||
|
csv.last << @col_sep
|
||
|
in_extended_col = true
|
||
|
else
|
||
|
elsif part[-1] == @quote_char
|
||
|
# regular quoted column
|
||
|
csv << part[1..-2]
|
||
|
if csv.last =~ @parsers[:stray_quote]
|
||
| ... | ... | |
|
"Missing or stray quote in line #{lineno + 1}"
|
||
|
end
|
||
|
csv.last.gsub!(@quote_char * 2, @quote_char)
|
||
|
elsif @liberal_parsing
|
||
|
csv << part
|
||
|
else
|
||
|
raise MalformedCSVError,
|
||
|
"Missing or stray quote in line #{lineno + 1}"
|
||
|
end
|
||
|
elsif part =~ @parsers[:quote_or_nl]
|
||
|
# Unquoted field with bad characters.
|
||
| ... | ... | |
|
raise MalformedCSVError, "Unquoted fields do not allow " +
|
||
|
"\\r or \\n (line #{lineno + 1})."
|
||
|
else
|
||
|
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
|
||
|
if @liberal_parsing
|
||
|
csv << part
|
||
|
else
|
||
|
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
|
||
|
end
|
||
|
end
|
||
|
else
|
||
|
# Regular ole unquoted field.
|
||
| ... | ... | |
|
str << " encoding:" << @encoding.name
|
||
|
# show other attributes
|
||
|
%w[ lineno col_sep row_sep
|
||
|
quote_char skip_blanks ].each do |attr_name|
|
||
|
quote_char skip_blanks liberal_parsing ].each do |attr_name|
|
||
|
if a = instance_variable_get("@#{attr_name}")
|
||
|
str << " " << attr_name << ":" << a.inspect
|
||
|
end
|
||
| ... | ... | |
|
# store the parser behaviors
|
||
|
@skip_blanks = options.delete(:skip_blanks)
|
||
|
@field_size_limit = options.delete(:field_size_limit)
|
||
|
@liberal_parsing = options.delete(:liberal_parsing)
|
||
|
# prebuild Regexps for faster parsing
|
||
|
esc_row_sep = escape_re(@row_sep)
|
||
| test/csv/test_features.rb | ||
|---|---|---|
|
assert_equal(3, count)
|
||
|
end
|
||
|
def test_liberal_parsing
|
||
|
input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
|
||
|
assert_raise(CSV::MalformedCSVError) do
|
||
|
CSV.parse_line(input)
|
||
|
end
|
||
|
assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
|
||
|
CSV.parse_line(input, liberal_parsing: true))
|
||
|
input = '"quoted" field'
|
||
|
assert_raise(CSV::MalformedCSVError) do
|
||
|
CSV.parse_line(input)
|
||
|
end
|
||
|
assert_equal(['"quoted" field'],
|
||
|
CSV.parse_line(input, liberal_parsing: true))
|
||
|
assert_raise(CSV::MalformedCSVError) do
|
||
|
CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
|
||
|
end
|
||
|
assert_equal(["is", 'this "three', ' or four"', "fields"],
|
||
|
CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
|
||
|
end
|
||
|
def test_csv_behavior_readers
|
||
|
%w[ unconverted_fields return_headers write_headers
|
||
|
skip_blanks force_quotes ].each do |behavior|
|
||
- « Previous
- 1
- 2
- 3
- Next »