Project

General

Profile

Actions

Feature #8214

closed

デッドロックチェックに全スレッドのバックトレースダンプの追加

Added by kosaki (Motohiro KOSAKI) about 11 years ago. Updated almost 8 years ago.

Status:
Closed
Target version:
-
[ruby-dev:47217]

Description

さいきん、くくぱど方面のデッドロックバグの調査を手伝っていて気づいたのですが、
現在のRubyのdeadlock detectorは開発者に優しくないです。

例外があがるとスタックが巻戻ってしまって証拠が消えてしまうので、例外をあげるまえに
全スレッドのバックトレースをダンプするオプションがあるべきではないでしょうか。
デッドロックが起きたときは99%、直さないといけない状況になるはずで、証拠を残す機能は
有意義なはずです。

TBD: ruby level だけで十分か、Cレベルも含めるか

Updated by nobu (Nobuyoshi Nakada) about 11 years ago

  • Subject changed from デッドロックチェックに前スレッドのバックトレースダンプの追加 to デッドロックチェックに全スレッドのバックトレースダンプの追加
  • Description updated (diff)

Updated by naruse (Yui NARUSE) almost 8 years ago

とりあえず以下のような感じで作ってみました。
やっぱりバックトレースも欲しいかな

% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
`miniruby' is up to date.
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..359dada 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }
 
 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
 	    return "aborting";
 	else
 	    return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+	if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
 	return "sleep";
       case THREAD_KILLED:
 	return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
 	}
 	return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }
 
 
@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;
 
     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, FALSE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
 	rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4728,26 +4729,36 @@ ruby_native_thread_p(void)
 }
 
 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;
 
-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+	    vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-	printf("th:%p %d %d", th, th->status, th->interrupt_flag);
 	if (th->locking_mutex) {
 	    rb_mutex_t *mutex;
+	    struct rb_thread_struct volatile *mth;
+	    int waiting;
 	    GetMutexPtr(th->locking_mutex, mutex);
 
 	    native_mutex_lock(&mutex->lock);
-	    printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+	    mth = mutex->th;
+	    waiting = mutex->cond_waiting;
 	    native_mutex_unlock(&mutex->lock);
+	    rb_str_catf(msg, "  rb_thread_t:%p %s %u waiting:%p cond_waiting:%d", th, thread_status_name(th, TRUE), th->interrupt_flag, mth, waiting);
+	}
+	else {
+	    rb_str_catf(msg, "  rb_thread_t:%p %s %u\n", th, thread_status_name(th, TRUE), th->interrupt_flag);
+	}
+	{
+	    rb_thread_list_t *list = th->join_list;
+	    while (list) {
+		rb_str_catf(msg, "    depended by: %p\n", list->th);
+		list = list->next;
+	    }
 	}
-	else
-	    puts("");
     }
-#endif
 }
 
 static void
@@ -4782,7 +4793,7 @@ rb_check_deadlock(rb_vm_t *vm)
 	VALUE argv[2];
 	argv[0] = rb_eFatal;
 	argv[1] = rb_str_new2("No live threads left. Deadlock?");
-	debug_deadlock_check(vm);
+	debug_deadlock_check(vm, argv[1]);
 	vm->sleeper--;
 	rb_threadptr_raise(vm->main_thread, 2, argv);
     }

Updated by naruse (Yui NARUSE) almost 8 years ago

バックトレースも欲しいらしいので

% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
--- thread.o ---
compiling ../../ruby/thread.c
--- miniruby ---
linking miniruby
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
   -e:1:in `join'
   -e:1:in `<main>'
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
   -e:1:in `stop'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
   -e:1:in `join'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
   -e:1:in `join'
   -e:1:in `block in <main>'
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..84f76d1 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }
 
 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
 	    return "aborting";
 	else
 	    return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+	if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
 	return "sleep";
       case THREAD_KILLED:
 	return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
 	}
 	return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }
 
 
@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;
 
     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, TRUE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
 	rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4727,27 +4728,46 @@ ruby_native_thread_p(void)
     return th != 0;
 }
 
+VALUE rb_vm_backtrace_str_ary(rb_thread_t *th, long lev, long n);
 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;
+    VALUE sep = rb_str_new_cstr("\n   ");
 
-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+	    vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-	printf("th:%p %d %d", th, th->status, th->interrupt_flag);
 	if (th->locking_mutex) {
 	    rb_mutex_t *mutex;
+	    struct rb_thread_struct volatile *mth;
+	    int waiting;
 	    GetMutexPtr(th->locking_mutex, mutex);
 
 	    native_mutex_lock(&mutex->lock);
-	    printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+	    mth = mutex->th;
+	    waiting = mutex->cond_waiting;
 	    native_mutex_unlock(&mutex->lock);
+	    rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u mutex:%p cond:%d\n",
+		    th->self, th, th->thread_id,
+		    th->interrupt_flag, mth, waiting);
 	}
-	else
-	    puts("");
+	else {
+	    rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u\n",
+		    th->self, th, th->thread_id,
+		    th->interrupt_flag);
+	}
+	{
+	    rb_thread_list_t *list = th->join_list;
+	    while (list) {
+		rb_str_catf(msg, "    depended by: tb_thread_id:%p\n", list->th);
+		list = list->next;
+	    }
+	}
+	rb_str_catf(msg, "   ");
+	rb_str_concat(msg, rb_ary_join(rb_vm_backtrace_str_ary(th, 0, 0), sep));
+	rb_str_catf(msg, "\n");
     }
-#endif
 }
 
 static void
@@ -4782,7 +4802,7 @@ rb_check_deadlock(rb_vm_t *vm)
 	VALUE argv[2];
 	argv[0] = rb_eFatal;
 	argv[1] = rb_str_new2("No live threads left. Deadlock?");
-	debug_deadlock_check(vm);
+	debug_deadlock_check(vm, argv[1]);
 	vm->sleeper--;
 	rb_threadptr_raise(vm->main_thread, 2, argv);
     }

Updated by tagomoris (Satoshi Tagomori) almost 8 years ago

デッドロック関連のデバッグであれば A -> B の順にロックを取ろうとしているスレッドと B -> A の順に
ロックを取ろうとしているスレッドが両方いる、という事実が重要だと思うので、バックトレースは極めて重要だと考えます。

Updated by shyouhei (Shyouhei Urabe) almost 8 years ago

  • Status changed from Open to Assigned
  • Assignee set to naruse (Yui NARUSE)
Actions #6

Updated by naruse (Yui NARUSE) almost 8 years ago

  • Status changed from Assigned to Closed

Applied in changeset r55397.


  • thread.c (debug_deadlock_check): show thread lock dependency and
    backtrace [Feature #8214] [ruby-dev:47217]

  • thread.c (thread_status_name): show "sleep_forever" instead of
    "sleep" if called from inspect.

Actions

Also available in: Atom PDF

Like0
Like0Like0Like0Like0Like0Like0