Bug #15908 ยป 0001-Enable-BOM-detection-with-non-UTF-encodings.patch
| io.c | ||
|---|---|---|
|
}
|
||
|
static const char bom_prefix[] = "bom|";
|
||
|
static const char utf_prefix[] = "utf-";
|
||
|
enum {bom_prefix_len = (int)sizeof(bom_prefix) - 1};
|
||
|
enum {utf_prefix_len = (int)sizeof(utf_prefix) - 1};
|
||
|
static int
|
||
|
io_encname_bom_p(const char *name, long len)
|
||
| ... | ... | |
|
if ((fmode & FMODE_SETENC_BY_BOM) || io_encname_bom_p(estr, len)) {
|
||
|
estr += bom_prefix_len;
|
||
|
len -= bom_prefix_len;
|
||
|
if (!STRNCASECMP(estr, utf_prefix, utf_prefix_len)) {
|
||
|
fmode |= FMODE_SETENC_BY_BOM;
|
||
|
}
|
||
|
else {
|
||
|
rb_enc_warn(estr_enc, "BOM with non-UTF encoding %s is nonsense", estr);
|
||
|
fmode &= ~FMODE_SETENC_BY_BOM;
|
||
|
}
|
||
|
fmode |= FMODE_SETENC_BY_BOM;
|
||
|
}
|
||
|
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
||
|
idx = -1;
|
||
| test/ruby/test_io_m17n.rb | ||
|---|---|---|
|
with_tmpdir {
|
||
|
text = "\uFEFFa"
|
||
|
stripped = "a"
|
||
|
%w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
|
||
|
%w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.product(%w[UTF-8 CP932]) do |name, enc|
|
||
|
path = '%s-bom.txt' % name
|
||
|
content = text.encode(name)
|
||
|
generate_file(path, content)
|
||
|
result = File.read(path, mode: 'rb:BOM|UTF-8')
|
||
|
assert_equal(content[1].force_encoding("ascii-8bit"),
|
||
|
result.force_encoding("ascii-8bit"))
|
||
|
result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8')
|
||
|
assert_equal(Encoding::UTF_8, result.encoding)
|
||
|
assert_equal(stripped, result)
|
||
|
result = File.read(path, mode: "rb:BOM|#{enc}")
|
||
|
assert_equal(Encoding.find(name), result.encoding, name)
|
||
|
assert_equal(stripped.encode(name), result, name)
|
||
|
result = File.read(path, mode: "rb:BOM|#{enc}:UTF-8")
|
||
|
assert_equal(Encoding::UTF_8, result.encoding, name)
|
||
|
assert_equal(stripped, result, name)
|
||
|
end
|
||
|
bug3407 = '[ruby-core:30641]'
|
||
|
path = 'UTF-8-bom.txt'
|
||
|
result = File.read(path, encoding: 'BOM|UTF-8')
|
||
|
assert_equal("a", result.force_encoding("ascii-8bit"), bug3407)
|
||
|
assert_equal("a", result, bug3407)
|
||
|
bug8323 = '[ruby-core:54563] [Bug #8323]'
|
||
|
expected = "a\xff".force_encoding("utf-8")
|
||
|
open(path, 'ab') {|f| f.write("\xff")}
|
||
|
result = File.read(path, encoding: 'BOM|UTF-8')
|
||
|
result = File.read(path, encoding: 'BOM|CP932')
|
||
|
assert_not_predicate(result, :valid_encoding?, bug8323)
|
||
|
assert_equal(expected, result, bug8323)
|
||
|
result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
|
||
|
result = File.read(path, encoding: 'BOM|CP932:UTF-8')
|
||
|
assert_not_predicate(result, :valid_encoding?, bug8323)
|
||
|
assert_equal(expected, result, bug8323)
|
||
| ... | ... | |
|
def test_bom_non_utf
|
||
|
enc = nil
|
||
|
assert_warn(/BOM/) {
|
||
|
assert_warn('') {
|
||
|
open(__FILE__, "r:bom|us-ascii") {|f| enc = f.external_encoding}
|
||
|
}
|
||
|
assert_equal(Encoding::US_ASCII, enc)
|
||
|
enc = nil
|
||
|
assert_warn(/BOM/) {
|
||
|
assert_warn('') {
|
||
|
open(__FILE__, "r", encoding: "bom|us-ascii") {|f| enc = f.external_encoding}
|
||
|
}
|
||
|
assert_equal(Encoding::US_ASCII, enc)
|
||
|
enc = nil
|
||
|
assert_warn(/BOM/) {
|
||
|
assert_warn('') {
|
||
|
open(IO::NULL, "w:bom|us-ascii") {|f| enc = f.external_encoding}
|
||
|
}
|
||
|
assert_equal(Encoding::US_ASCII, enc)
|
||
|
enc = nil
|
||
|
assert_warn(/BOM/) {
|
||
|
assert_warn('') {
|
||
|
open(IO::NULL, "w", encoding: "bom|us-ascii") {|f| enc = f.external_encoding}
|
||
|
}
|
||
|
assert_equal(Encoding::US_ASCII, enc)
|
||