Project

General

Profile

Feature #6670 » string_bytes_to_array.patch

zzak (zzak _), 11/19/2012 11:59 AM

View differences:

string.c
return str;
}
static VALUE
rb_str_bytes(VALUE str)
{
long i;
VALUE ary = rb_ary_new();
for (i=0; i<RSTRING_LEN(str); i++) {
rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
return ary;
}
/*
* call-seq:
......
rb_define_method(rb_cString, "oct", rb_str_oct, 0);
rb_define_method(rb_cString, "split", rb_str_split_m, -1);
rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0);
rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
rb_define_method(rb_cString, "chars", rb_str_each_char, 0);
rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
-
string.c
*/
static VALUE
rb_str_each_byte(VALUE str)
rb_str_enumerate_bytes(VALUE str, VALUE return_enumerator_p)
{
long i;
VALUE ary, yieldp;
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
yieldp = Qfalse;
ary = rb_ary_new2(RSTRING_LEN(str));
}
RETURN_ENUMERATOR(str, 0, 0);
for (i=0; i<RSTRING_LEN(str); i++) {
rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
if (yieldp) {
rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
else {
rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
}
return str;
return ary;
}
static VALUE
rb_str_bytes(VALUE str)
rb_str_each_byte(VALUE str)
{
long i;
VALUE ary = rb_ary_new();
return rb_str_enumerate_bytes(str, Qtrue);
}
for (i=0; i<RSTRING_LEN(str); i++) {
rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
return ary;
static VALUE
rb_str_bytes(VALUE str)
{
return rb_str_enumerate_bytes(str, Qfalse);
}
/*
test/ruby/test_string.rb
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
assert_equal 65, S("ABC").each_byte.next
end
def test_bytes
res = []
S("ABC").bytes {|x| res << x }
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
assert_equal [65, 66, 67], S("ABC").bytes
end
def test_each_line
-
string.c
rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
}
return ary;
if (yieldp) {
return str;
}
else {
return ary;
}
}
static VALUE
test/ruby/test_string.rb
def test_each_byte
res = []
S("ABC").each_byte {|x| res << x }
s = S("ABC")
assert_equal s.object_id, s.each_byte {|x| res << x }.object_id
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
......
def test_bytes
res = []
S("ABC").bytes {|x| res << x }
s = S("ABC")
assert_equal s.object_id, s.bytes {|x| res << x }.object_id
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
-
string.c
*/
static VALUE
rb_str_each_char(VALUE str)
rb_str_enumerate_chars(VALUE str, VALUE return_enumerator_p)
{
VALUE orig = str;
long i, len, n;
const char *ptr;
rb_encoding *enc;
VALUE ary, yieldp;
RETURN_ENUMERATOR(str, 0, 0);
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
yieldp = Qfalse;
ary = rb_ary_new();
}
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
......
case ENC_CODERANGE_7BIT:
for (i = 0; i < len; i += n) {
n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
rb_yield(rb_str_subseq(str, i, n));
if (yieldp) {
rb_yield(rb_str_subseq(str, i, n));
}
else {
rb_ary_push(ary, rb_str_subseq(str, i, n));
}
}
break;
default:
for (i = 0; i < len; i += n) {
n = rb_enc_mbclen(ptr + i, ptr + len, enc);
rb_yield(rb_str_subseq(str, i, n));
if (yieldp) {
rb_yield(rb_str_subseq(str, i, n));
}
else {
rb_ary_push(ary, rb_str_subseq(str, i, n));
}
}
}
return orig;
if (yieldp) {
return orig;
}
else {
return ary;
}
}
static VALUE
rb_str_each_char(VALUE str)
{
return rb_str_enumerate_chars(str, Qtrue);
}
static VALUE
rb_str_chars(VALUE str)
{
return rb_str_enumerate_chars(str, Qfalse);
}
/*
......
rb_define_method(rb_cString, "split", rb_str_split_m, -1);
rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
rb_define_method(rb_cString, "chars", rb_str_each_char, 0);
rb_define_method(rb_cString, "chars", rb_str_chars, 0);
rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
test/ruby/test_string.rb
assert_equal [65, 66, 67], S("ABC").bytes
end
def test_each_char
res = []
s = S("ABC")
assert_equal s.object_id, s.each_char {|x| res << x }.object_id
assert_equal("A", res[0])
assert_equal("B", res[1])
assert_equal("C", res[2])
assert_equal "A", S("ABC").each_char.next
end
def test_chars
res = []
s = S("ABC")
assert_equal s.object_id, s.chars {|x| res << x }.object_id
assert_equal("A", res[0])
assert_equal("B", res[1])
assert_equal("C", res[2])
assert_equal ["A", "B", "C"], S("ABC").chars
end
def test_each_line
save = $/
$/ = "\n"
-
string.c
return rb_str_enumerate_chars(str, Qfalse);
}
static VALUE
rb_str_enumerate_codepoints(VALUE str, VALUE return_enumerator_p)
{
VALUE orig = str;
int n;
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
VALUE ary, yieldp;
if (single_byte_optimizable(str))
return rb_str_enumerate_bytes(str, return_enumerator_p);
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
yieldp = Qfalse;
ary = rb_ary_new();
}
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
while (ptr < end) {
c = rb_enc_codepoint_len(ptr, end, &n, enc);
if (yieldp) {
rb_yield(UINT2NUM(c));
}
else {
rb_ary_push(ary, UINT2NUM(c));
}
ptr += n;
}
if (yieldp) {
return orig;
}
else {
return ary;
}
}
/*
* call-seq:
* str.codepoints {|integer| block } -> str
......
static VALUE
rb_str_each_codepoint(VALUE str)
{
VALUE orig = str;
int n;
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
return rb_str_enumerate_codepoints(str, Qtrue);
}
if (single_byte_optimizable(str)) return rb_str_each_byte(str);
RETURN_ENUMERATOR(str, 0, 0);
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
while (ptr < end) {
c = rb_enc_codepoint_len(ptr, end, &n, enc);
rb_yield(UINT2NUM(c));
ptr += n;
}
return orig;
static VALUE
rb_str_codepoints(VALUE str)
{
return rb_str_enumerate_codepoints(str, Qfalse);
}
static long
......
rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
rb_define_method(rb_cString, "chars", rb_str_chars, 0);
rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
rb_define_method(rb_cString, "concat", rb_str_concat, 1);
test/ruby/test_string.rb
assert_equal ["A", "B", "C"], S("ABC").chars
end
def test_each_codepoint
# Single byte optimization
assert_equal 65, S("ABC").each_codepoint.next
res = []
s = S("\u3042\u3044\u3046")
assert_equal s.object_id, s.each_codepoint {|x| res << x }.object_id
assert_equal(0x3042, res[0])
assert_equal(0x3044, res[1])
assert_equal(0x3046, res[2])
assert_equal 0x3042, S("\u3042\u3044\u3046").each_codepoint.next
end
def test_codepoints
# Single byte optimization
assert_equal [65, 66, 67], S("ABC").codepoints
res = []
s = S("\u3042\u3044\u3046")
assert_equal s.object_id, s.codepoints {|x| res << x }.object_id
assert_equal(0x3042, res[0])
assert_equal(0x3044, res[1])
assert_equal(0x3046, res[2])
assert_equal [0x3042, 0x3044, 0x3046], S("\u3042\u3044\u3046").codepoints
end
def test_each_line
save = $/
$/ = "\n"
-
string.c
}
/*
* call-seq:
* str.bytes {|fixnum| block } -> str
* str.bytes -> an_enumerator
*
* str.each_byte {|fixnum| block } -> str
* str.each_byte -> an_enumerator
*
* Passes each byte in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
*
* "hello".each_byte {|c| print c, ' ' }
*
* <em>produces:</em>
*
* 104 101 108 108 111
*/
static VALUE
rb_str_enumerate_bytes(VALUE str, VALUE return_enumerator_p)
{
......
}
}
/*
* call-seq:
* str.bytes {|fixnum| block } -> str
* str.bytes -> an_enumerator
*
* str.each_byte {|fixnum| block } -> str
* str.each_byte -> an_enumerator
*
* Passes each byte in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
*
* "hello".each_byte {|c| print c, ' ' }
*
* <em>produces:</em>
*
* 104 101 108 108 111
*/
static VALUE
rb_str_each_byte(VALUE str)
{
......
return rb_str_enumerate_bytes(str, Qfalse);
}
/*
* call-seq:
* str.chars {|cstr| block } -> str
* str.chars -> an_enumerator
*
* str.each_char {|cstr| block } -> str
* str.each_char -> an_enumerator
*
* Passes each character in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
*
* "hello".each_char {|c| print c, ' ' }
*
* <em>produces:</em>
*
* h e l l o
*/
static VALUE
rb_str_enumerate_chars(VALUE str, VALUE return_enumerator_p)
......
}
}
/*
* call-seq:
* str.chars {|cstr| block } -> str
* str.chars -> an_enumerator
*
* str.each_char {|cstr| block } -> str
* str.each_char -> an_enumerator
*
* Passes each character in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
*
* "hello".each_char {|c| print c, ' ' }
*
* <em>produces:</em>
*
* h e l l o
*/
static VALUE
rb_str_each_char(VALUE str)
{
......
return rb_str_enumerate_chars(str, Qfalse);
}
static VALUE
rb_str_enumerate_codepoints(VALUE str, VALUE return_enumerator_p)
{
-
string.c
}
/*
* call-seq:
* str.each_line(separator=$/) {|substr| block } -> str
* str.each_line(separator=$/) -> an_enumerator
*
* str.lines(separator=$/) {|substr| block } -> str
* str.lines(separator=$/) -> an_enumerator
*
* Splits <i>str</i> using the supplied parameter as the record separator
* (<code>$/</code> by default), passing each substring in turn to the supplied
* block. If a zero-length record separator is supplied, the string is split
* into paragraphs delimited by multiple successive newlines.
*
* If no block is given, an enumerator is returned instead.
*
* print "Example one\n"
* "hello\nworld".each_line {|s| p s}
* print "Example two\n"
* "hello\nworld".each_line('l') {|s| p s}
* print "Example three\n"
* "hello\n\n\nworld".each_line('') {|s| p s}
*
* <em>produces:</em>
*
* Example one
* "hello\n"
* "world"
* Example two
* "hel"
* "l"
* "o\nworl"
* "d"
* Example three
* "hello\n\n\n"
* "world"
*/
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE return_enumerator_p)
{
rb_encoding *enc;
VALUE rs;
......
VALUE line;
int n;
VALUE orig = str;
VALUE ary, yieldp;
if (argc == 0) {
rs = rb_rs;
......
else {
rb_scan_args(argc, argv, "01", &rs);
}
RETURN_ENUMERATOR(str, argc, argv);
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, argc, argv);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
yieldp = Qfalse;
ary = rb_ary_new2(RSTRING_LEN(str));
}
if (NIL_P(rs)) {
rb_yield(str);
return orig;
if (yieldp) {
rb_yield(str);
return orig;
}
else {
rb_ary_push(ary, str);
return ary;
}
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
......
line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (yieldp) {
rb_yield(line);
}
else {
rb_ary_push(ary, line);
}
str_mod_check(str, ptr, len);
s = p;
}
......
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (yieldp) {
rb_yield(line);
}
else {
rb_ary_push(ary, line);
}
str_mod_check(str, ptr, len);
s = p + (rslen ? rslen : n);
}
......
line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (yieldp) {
rb_yield(line);
}
else {
rb_ary_push(ary, line);
}
}
if (yieldp) {
return orig;
}
else {
return ary;
}
}
/*
* call-seq:
* str.each_line(separator=$/) {|substr| block } -> str
* str.each_line(separator=$/) -> an_enumerator
*
* str.lines(separator=$/) {|substr| block } -> str
* str.lines(separator=$/) -> an_enumerator
*
* Splits <i>str</i> using the supplied parameter as the record separator
* (<code>$/</code> by default), passing each substring in turn to the supplied
* block. If a zero-length record separator is supplied, the string is split
* into paragraphs delimited by multiple successive newlines.
*
* If no block is given, an enumerator is returned instead.
*
* print "Example one\n"
* "hello\nworld".each_line {|s| p s}
* print "Example two\n"
* "hello\nworld".each_line('l') {|s| p s}
* print "Example three\n"
* "hello\n\n\nworld".each_line('') {|s| p s}
*
* <em>produces:</em>
*
* Example one
* "hello\n"
* "world"
* Example two
* "hel"
* "l"
* "o\nworl"
* "d"
* Example three
* "hello\n\n\n"
* "world"
*/
return orig;
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
return rb_str_enumerate_lines(argc, argv, str, Qtrue);
}
static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
return rb_str_enumerate_lines(argc, argv, str, Qfalse);
}
......
rb_define_method(rb_cString, "hex", rb_str_hex, 0);
rb_define_method(rb_cString, "oct", rb_str_oct, 0);
rb_define_method(rb_cString, "split", rb_str_split_m, -1);
rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
rb_define_method(rb_cString, "lines", rb_str_lines, -1);
rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
rb_define_method(rb_cString, "chars", rb_str_chars, 0);
rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
test/ruby/test_string.rb
save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
S("hello\nworld").each_line {|x| res << x}
assert_equal(S("hello\n"), res[0])
assert_equal(S("world"), res[1])
res=[]
S("hello\n\n\nworld").lines(S('')).each {|x| res << x}
S("hello\n\n\nworld").each_line(S('')) {|x| res << x}
assert_equal(S("hello\n\n\n"), res[0])
assert_equal(S("world"), res[1])
$/ = "!"
res=[]
S("hello!world").lines.each {|x| res << x}
S("hello!world").each_line {|x| res << x}
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
......
s = nil
"foo\nbar".each_line(nil) {|s2| s = s2 }
assert_equal("foo\nbar", s)
assert_equal "hello\n", S("hello\nworld").each_line.next
assert_equal "hello\nworld", S("hello\nworld").each_line(nil).next
end
def test_lines
res=[]
S("hello\nworld").lines {|x| res << x}
assert_equal(S("hello\n"), res[0])
assert_equal(S("world"), res[1])
assert_equal ["hello\n", "world"], S("hello\nworld").lines
assert_equal ["hello\nworld"], S("hello\nworld").lines(nil)
end
def test_empty?
-
string.c
}
else {
yieldp = Qfalse;
ary = rb_ary_new2(RSTRING_LEN(str));
ary = rb_ary_new();
}
if (NIL_P(rs)) {
-
string.c
rb_scan_args(argc, argv, "01", &rs);
}
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, argc, argv);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
if (return_enumerator_p)
RETURN_ENUMERATOR(str, argc, argv);
yieldp = Qfalse;
ary = rb_ary_new();
}
......
long i;
VALUE ary, yieldp;
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
if (return_enumerator_p)
RETURN_ENUMERATOR(str, 0, 0);
yieldp = Qfalse;
ary = rb_ary_new2(RSTRING_LEN(str));
}
......
rb_encoding *enc;
VALUE ary, yieldp;
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
if (return_enumerator_p)
RETURN_ENUMERATOR(str, 0, 0);
yieldp = Qfalse;
ary = rb_ary_new();
}
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
......
if (single_byte_optimizable(str))
return rb_str_enumerate_bytes(str, return_enumerator_p);
if (return_enumerator_p) {
RETURN_ENUMERATOR(str, 0, 0);
}
if (rb_block_given_p()) {
yieldp = Qtrue;
}
else {
if (return_enumerator_p)
RETURN_ENUMERATOR(str, 0, 0);
yieldp = Qfalse;
ary = rb_ary_new();
}
......
return rb_str_enumerate_codepoints(str, Qfalse);
}
static long
chopped_length(VALUE str)
{
-
string.c
/*
* call-seq:
* str.lines(separator=$/) {|substr| block } -> str
* str.lines(separator=$/) -> an_array
*
* str.each_line(separator=$/) {|substr| block } -> str
* str.each_line(separator=$/) -> an_enumerator
*
* str.lines(separator=$/) {|substr| block } -> str
* str.lines(separator=$/) -> an_enumerator
*
* Splits <i>str</i> using the supplied parameter as the record separator
* (<code>$/</code> by default), passing each substring in turn to the supplied
* block. If a zero-length record separator is supplied, the string is split
* into paragraphs delimited by multiple successive newlines.
*
* If no block is given, an enumerator is returned instead.
* If no block is given, an array or enumerator is returned instead.
*
* print "Example one\n"
* "hello\nworld".each_line {|s| p s}
......
/*
* call-seq:
* str.bytes {|fixnum| block } -> str
* str.bytes -> an_enumerator
* str.bytes -> an_array
*
* str.each_byte {|fixnum| block } -> str
* str.each_byte -> an_enumerator
*
* Passes each byte in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
* an array or enumerator if no block is given.
*
* "hello".each_byte {|c| print c, ' ' }
*
......
/*
* call-seq:
* str.chars {|cstr| block } -> str
* str.chars -> an_enumerator
* str.chars -> an_array
*
* str.each_char {|cstr| block } -> str
* str.each_char -> an_enumerator
*
* Passes each character in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
* an array or enumerator if no block is given.
*
* "hello".each_char {|c| print c, ' ' }
*
......
/*
* call-seq:
* str.codepoints {|integer| block } -> str
* str.codepoints -> an_enumerator
* str.codepoints -> an_array
*
* str.each_codepoint {|integer| block } -> str
* str.each_codepoint -> an_enumerator
......
* also known as a <i>codepoint</i> when applied to Unicode strings to the
* given block.
*
* If no block is given, an enumerator is returned instead.
* If no block is given, an array or enumerator is returned instead.
*
* "hello\u0639".each_codepoint {|c| print c, ' ' }
*
(2-2/3)