Project

General

Profile

Feature #11094 ยป 0001-enc-utf_8.c-pack.c-limit-UTF-8.patch

nobu (Nobuyoshi Nakada), 04/25/2015 04:42 AM

View differences:

enc/utf_8.c
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
#define VALID_CODE_LIMIT 0x7fffffff
#endif
#define VALID_CODE_LIMIT 0x0010ffff
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
......
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
else if (code <= VALID_CODE_LIMIT) return 4;
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
......
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xffe00000) == 0) {
else if (code <= VALID_CODE_LIMIT) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xfc000000) == 0) {
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0x80000000) == 0) {
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
*p++ = UTF8_TRAILS(code, 24);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;
pack.c
buf[2] = castchar((uv&0x3f)|0x80);
return 3;
}
if (uv <= 0x1fffff) {
if (uv <= 0x10ffff) {
buf[0] = castchar(((uv>>18)&0xff)|0xf0);
buf[1] = castchar(((uv>>12)&0x3f)|0x80);
buf[2] = castchar(((uv>>6)&0x3f)|0x80);
buf[3] = castchar((uv&0x3f)|0x80);
return 4;
}
if (uv <= 0x3ffffff) {
buf[0] = castchar(((uv>>24)&0xff)|0xf8);
buf[1] = castchar(((uv>>18)&0x3f)|0x80);
buf[2] = castchar(((uv>>12)&0x3f)|0x80);
buf[3] = castchar(((uv>>6)&0x3f)|0x80);
buf[4] = castchar((uv&0x3f)|0x80);
return 5;
}
if (uv <= 0x7fffffff) {
buf[0] = castchar(((uv>>30)&0xff)|0xfc);
buf[1] = castchar(((uv>>24)&0x3f)|0x80);
buf[2] = castchar(((uv>>18)&0x3f)|0x80);
buf[3] = castchar(((uv>>12)&0x3f)|0x80);
buf[4] = castchar(((uv>>6)&0x3f)|0x80);
buf[5] = castchar((uv&0x3f)|0x80);
return 6;
}
rb_raise(rb_eRangeError, "pack(U): value out of range");
UNREACHABLE;
......
0x80, /* 2 */
0x800, /* 3 */
0x10000, /* 4 */
0x200000, /* 5 */
0x4000000, /* 6 */
0x80000000, /* 7 */
};
static unsigned long
......
if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
else {
*lenp = 1;
rb_raise(rb_eArgError, "malformed UTF-8 character");
......
if (uv < utf8_limits[n]) {
rb_raise(rb_eArgError, "redundant UTF-8 sequence");
}
else if (uv > 0x10ffff) {
rb_raise(rb_eRangeError, "unpack(U): value out of range");
}
return uv;
}
test/ruby/test_integer_comb.rb
def test_pack_utf8
template = "U"
VS.reverse_each {|a|
if a < 0 || 0x7fffffff < a
if a < 0 || 0x10ffff < a
assert_raise(RangeError) { [a].pack(template) }
else
s = [a].pack(template)
test/ruby/test_pack.rb
assert_raise(RangeError) { [-0x40000000].pack("U") }
assert_raise(RangeError) { [-1].pack("U") }
assert_equal "\000", [0].pack("U")
assert_equal "\374\277\277\277\277\277".force_encoding(Encoding::UTF_8), [0x3fffffff].pack("U")
assert_equal "\375\200\200\200\200\200".force_encoding(Encoding::UTF_8), [0x40000000].pack("U")
assert_equal "\375\277\277\277\277\277".force_encoding(Encoding::UTF_8), [0x7fffffff].pack("U")
assert_equal "\364\217\277\277".force_encoding(Encoding::UTF_8), [0x10ffff].pack("U")
assert_raise(RangeError) { [0x3fffffff].pack("U") }
assert_raise(RangeError) { [0x40000000].pack("U") }
assert_raise(RangeError) { [0x110000].pack("U") }
assert_raise(RangeError) { [0x80000000].pack("U") }
assert_raise(RangeError) { [0x100000000].pack("U") }
end
......
assert_equal([0x80], [0x80].pack("U").unpack("U"))
assert_equal([0x800], [0x800].pack("U").unpack("U"))
assert_equal([0x10000], [0x10000].pack("U").unpack("U"))
assert_equal([0x400000], [0x400000].pack("U").unpack("U"))
assert_equal([0x10ffff], [0x10ffff].pack("U").unpack("U"))
assert_raise(ArgumentError) { "\x80".unpack("U") }
assert_raise(ArgumentError) { "\xff".unpack("U") }
assert_raise(ArgumentError) { "\xfc\x00".unpack("U") }
assert_raise(ArgumentError) { "\xc0\xc0".unpack("U") }
assert_raise(ArgumentError) { "\xe0\x80\x80".unpack("U") }
assert_raise(RangeError) { "\xf4\x90\x80\x80".unpack("U") }
end
def test_pack_unpack_u
    (1-1/1)