Feature #758
closedspeed up continuation in 1.9
Description
=begin
遠藤です。
1.9 の継続は 1.8 に比べて極端に遅いようです。
$ time ruby18 -e 'i = 0; callcc {|c| $c = c }; i += 1; $c.call if i < 1000000'
real 0m1.060s
user 0m1.050s
sys 0m0.010s
$ time ruby19 -rcontinuation -e 'i = 0; callcc {|c| $c = c }; i += 1;
$c.call if i < 1000000'
real 1m57.022s
user 1m56.780s
sys 0m0.180s
capture や call の際、VM のスタックを常に丸ごとコピーしているのが
原因で、必要なところ (先頭の sp 部分と終端の cfp 部分) だけコピー
するようにしたら、1.8 並に速くなりました。
$ time ./ruby.fast-cont -rcontinuation -e 'i = 0; callcc {|c| $c = c
}; i += 1; $c.call if i < 1000000'
real 0m0.660s
user 0m0.660s
sys 0m0.000s
私の環境で test-all が通ることは確認しています。
とくに異論がなければコミットしようと思います。
Index: cont.c¶
--- cont.c (revision 20241)
+++ cont.c (working copy)
@@ -14,6 +14,8 @@
#include "gc.h"
#include "eval_intern.h"
+#define CAPTURE_JUST_VALID_VM_STACK 1
+
enum context_type {
CONTINUATION_CONTEXT = 0,
FIBER_CONTEXT = 1,
@@ -25,6 +27,10 @@
VALUE self;
VALUE value;
VALUE *vm_stack;
+#ifdef CAPTURE_JUST_VALID_VM_STACK
- int vm_stack_slen; /* length of stack (head of th->stack) */
- int vm_stack_clen; /* length of control frames (tail of th->stack) */
+#endif
VALUE *machine_stack;
VALUE *machine_stack_src;
#ifdef __ia64
@@ -75,8 +81,13 @@
rb_thread_mark(&cont->saved_thread);
if (cont->vm_stack) {
+#ifdef CAPTURE_JUST_VALID_VM_STACK
rb_gc_mark_locations(cont->vm_stack,
-
cont->vm_stack + cont->saved_thread.stack_size);
-
cont->vm_stack + cont->vm_stack_slen + cont->vm_stack_clen);
+#elif
-
rb_gc_mark_localtion(cont->vm_stack,
-
cont->vm_stack, cont->saved_thread.stack_size);
+#endif
}
if (cont->machine_stack) {
@@ -247,8 +258,16 @@
contval = cont->self;
sth = &cont->saved_thread;
+#ifdef CAPTURE_JUST_VALID_VM_STACK
-
cont->vm_stack_slen = th->cfp->sp + th->mark_stack_len - th->stack;
-
cont->vm_stack_clen = th->stack + th->stack_size - (VALUE*)th->cfp;
-
cont->vm_stack = ALLOC_N(VALUE, cont->vm_stack_slen + cont->vm_stack_clen);
-
MEMCPY(cont->vm_stack, th->stack, VALUE, cont->vm_stack_slen);
-
MEMCPY(cont->vm_stack + cont->vm_stack_slen, (VALUE*)th->cfp,
VALUE, cont->vm_stack_clen);
+#elif
cont->vm_stack = ALLOC_N(VALUE, th->stack_size);
MEMCPY(cont->vm_stack, th->stack, VALUE, th->stack_size);
+#endif
sth->stack = 0;cont_save_machine_stack(th, cont);
@@ -288,7 +307,13 @@
th->stack_size = fcont->saved_thread.stack_size;
th->stack = fcont->saved_thread.stack;
}
+#ifdef CAPTURE_JUST_VALID_VM_STACK -
MEMCPY(th->stack, cont->vm_stack, VALUE, cont->vm_stack_slen);
-
MEMCPY(th->stack + sth->stack_size - cont->vm_stack_clen,
-
cont->vm_stack + cont->vm_stack_slen, VALUE, cont->vm_stack_clen);
+#elif
MEMCPY(th->stack, cont->vm_stack, VALUE, sth->stack_size);
+#endif
}
else {
/* fiber */
--
Yusuke ENDOH mame@tsg.ne.jp
=end