Project

General

Profile

Actions

Feature #758

closed

speed up continuation in 1.9

Added by mame (Yusuke Endoh) almost 17 years ago. Updated over 14 years ago.

Status:
Closed
Assignee:
-
Target version:
-
[ruby-dev:37106]

Description

=begin
遠藤です。

1.9 の継続は 1.8 に比べて極端に遅いようです。

$ time ruby18 -e 'i = 0; callcc {|c| $c = c }; i += 1; $c.call if i < 1000000'
real 0m1.060s
user 0m1.050s
sys 0m0.010s

$ time ruby19 -rcontinuation -e 'i = 0; callcc {|c| $c = c }; i += 1;
$c.call if i < 1000000'
real 1m57.022s
user 1m56.780s
sys 0m0.180s

capture や call の際、VM のスタックを常に丸ごとコピーしているのが
原因で、必要なところ (先頭の sp 部分と終端の cfp 部分) だけコピー
するようにしたら、1.8 並に速くなりました。

$ time ./ruby.fast-cont -rcontinuation -e 'i = 0; callcc {|c| $c = c
}; i += 1; $c.call if i < 1000000'

real 0m0.660s
user 0m0.660s
sys 0m0.000s

私の環境で test-all が通ることは確認しています。
とくに異論がなければコミットしようと思います。

Index: cont.c

--- cont.c (revision 20241)
+++ cont.c (working copy)
@@ -14,6 +14,8 @@
#include "gc.h"
#include "eval_intern.h"

+#define CAPTURE_JUST_VALID_VM_STACK 1
+
enum context_type {
CONTINUATION_CONTEXT = 0,
FIBER_CONTEXT = 1,
@@ -25,6 +27,10 @@
VALUE self;
VALUE value;
VALUE *vm_stack;
+#ifdef CAPTURE_JUST_VALID_VM_STACK

  • int vm_stack_slen; /* length of stack (head of th->stack) */
  • int vm_stack_clen; /* length of control frames (tail of th->stack) */
    +#endif
    VALUE *machine_stack;
    VALUE *machine_stack_src;
    #ifdef __ia64
    @@ -75,8 +81,13 @@
    rb_thread_mark(&cont->saved_thread);
if (cont->vm_stack) {

+#ifdef CAPTURE_JUST_VALID_VM_STACK
rb_gc_mark_locations(cont->vm_stack,

  •  		 cont->vm_stack + cont->saved_thread.stack_size);
    
  •  		 cont->vm_stack + cont->vm_stack_slen + cont->vm_stack_clen);
    

+#elif

  •  rb_gc_mark_localtion(cont->vm_stack,
    
  •  		 cont->vm_stack, cont->saved_thread.stack_size);
    

+#endif
}

if (cont->machine_stack) {

@@ -247,8 +258,16 @@
contval = cont->self;
sth = &cont->saved_thread;

+#ifdef CAPTURE_JUST_VALID_VM_STACK

  • cont->vm_stack_slen = th->cfp->sp + th->mark_stack_len - th->stack;

  • cont->vm_stack_clen = th->stack + th->stack_size - (VALUE*)th->cfp;

  • cont->vm_stack = ALLOC_N(VALUE, cont->vm_stack_slen + cont->vm_stack_clen);

  • MEMCPY(cont->vm_stack, th->stack, VALUE, cont->vm_stack_slen);

  • MEMCPY(cont->vm_stack + cont->vm_stack_slen, (VALUE*)th->cfp,
    VALUE, cont->vm_stack_clen);
    +#elif
    cont->vm_stack = ALLOC_N(VALUE, th->stack_size);
    MEMCPY(cont->vm_stack, th->stack, VALUE, th->stack_size);
    +#endif
    sth->stack = 0;

    cont_save_machine_stack(th, cont);
    @@ -288,7 +307,13 @@
    th->stack_size = fcont->saved_thread.stack_size;
    th->stack = fcont->saved_thread.stack;
    }
    +#ifdef CAPTURE_JUST_VALID_VM_STACK

  • MEMCPY(th->stack, cont->vm_stack, VALUE, cont->vm_stack_slen);

  • MEMCPY(th->stack + sth->stack_size - cont->vm_stack_clen,

  •     cont->vm_stack + cont->vm_stack_slen, VALUE, cont->vm_stack_clen);
    

+#elif
MEMCPY(th->stack, cont->vm_stack, VALUE, sth->stack_size);
+#endif
}
else {
/* fiber */

--
Yusuke ENDOH
=end

Actions

Also available in: Atom PDF

Like0
Like0Like0