Bug #2724
closedfork from other than the main thread causes wrong pthread condition on NetBSD
Description
NetBSD 5.0.[01] において、main thread 以外の pthread から fork すると、
pthread とカーネルスレッド (lwp) との関連が壊れるという現象が確認されています。
後述のパッチがあまりにアレなのでこの問題は Third Party's Issue とし、
Ruby 側では修正を入れない事としますが、情報の共有と記録のために
ここにチケットを切っておきます。
なお、この workaround の作成には @_enamiさんの助けがありました。
追記:
NetBSD 側では kern/42772 として報告、修正されています。
http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=42772
Index: thread_pthread.c¶
--- thread_pthread.c (revision 26615)
+++ thread_pthread.c (working copy)
@@ -17,6 +17,93 @@
#include <sys/resource.h>
#endif
+#if defined(NetBSD_Version) && NetBSD_Version >= 500000000
+/* Hack for NetBSD 5.0.x's broken pthread->pt_lid /
+/ Copied from /src/lib/libpthread/pthread_int.h /
+#define BROKEN_PTHREAD_T_PT_LID
+#include <lwp.h>
+#include <pthread_queue.h>
+#include <sys/tree.h>
+
+#define PTHREAD_KEYS_MAX 256
+#define PTHREAD__UNPARK_MAX 32
+
+/
-
- The size of this structure needs to be no larger than struct
-
- __pthread_cleanup_store, defined in pthread.h.
- */
+struct pt_clean_t { -
PTQ_ENTRY(pt_clean_t) ptc_next;
-
void (*ptc_cleanup)(void *);
-
void *ptc_arg;
+};
+
+struct pthread_lock_ops {
-
void (*plo_init)(__cpu_simple_lock_t *);
-
int (*plo_try)(__cpu_simple_lock_t *);
-
void (*plo_unlock)(__cpu_simple_lock_t *);
-
void (*plo_lock)(__cpu_simple_lock_t *);
+};
+
+struct __pthread_st {
-
pthread_t pt_self; /* Must be first. */
-
unsigned int pt_magic; /* Magic number */
-
int pt_state; /* running, blocked, etc. */
-
pthread_mutex_t pt_lock; /* lock on state */
-
int pt_flags; /* see PT_FLAG_* below */
-
int pt_cancel; /* Deferred cancellation */
-
int pt_errno; /* Thread-specific errno. */
-
stack_t pt_stack; /* Our stack */
-
void *pt_exitval; /* Read by pthread_join() */
-
char *pt_name; /* Thread's name, set by the app. */
-
int pt_willpark; /* About to park */
-
lwpid_t pt_unpark; /* Unpark this when parking */
-
struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
-
pthread_mutex_t *pt_droplock; /* Drop this lock if cancelled */
-
pthread_cond_t pt_joiners; /* Threads waiting to join. */
-
/* Threads to defer waking, usually until pthread_mutex_unlock(). */
-
lwpid_t pt_waiters[PTHREAD__UNPARK_MAX];
-
size_t pt_nwaiters;
-
/* Stack of cancellation cleanup handlers and their arguments */
-
PTQ_HEAD(, pt_clean_t) pt_cleanup_stack;
-
/* LWP ID and entry on the list of all threads. */
-
lwpid_t pt_lid;
-
RB_ENTRY(__pthread_st) pt_alltree;
-
PTQ_ENTRY(__pthread_st) pt_allq;
-
PTQ_ENTRY(__pthread_st) pt_deadq;
-
/*
-
* General synchronization data. We try to align, as threads
-
* on other CPUs will access this data frequently.
-
*/
-
int pt_dummy1 __aligned(128);
-
struct lwpctl *pt_lwpctl; /* Kernel/user comms area */
-
volatile int pt_blocking; /* Blocking in userspace */
-
volatile int pt_rwlocked; /* Handed rwlock successfully */
-
volatile int pt_signalled; /* Received pthread_cond_signal() */
-
volatile int pt_mutexwait; /* Waiting to acquire mutex */
-
void * volatile pt_mutexnext; /* Next thread in chain */
-
void * volatile pt_sleepobj; /* Object slept on */
-
PTQ_ENTRY(__pthread_st) pt_sleep;
-
void (*pt_early)(void *);
-
int pt_dummy2 __aligned(128);
-
/* Thread-specific data. Large so it sits close to the end. */
-
int pt_havespecific;
-
void *pt_specific[PTHREAD_KEYS_MAX];
-
/*
-
* Context for thread creation. At the end as it's cached
-
* and then only ever passed to _lwp_create().
-
*/
-
ucontext_t pt_uc;
+};
+#endif /* NetBSD */
+
+
static void native_mutex_lock(pthread_mutex_t *lock);
static void native_mutex_unlock(pthread_mutex_t *lock);
static int native_mutex_trylock(pthread_mutex_t *lock);
@@ -833,6 +920,9 @@
native_reset_timer_thread(void)
{
timer_thread_id = 0;
+#ifdef BROKEN_PTHREAD_T_PT_LID
- ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self();
+#endif
}
#ifdef HAVE_SIGALTSTACK
Updated by skandragon (Michael Graff) over 14 years ago
=begin
I have made a patch (which was based upon this problem report) to NetBSD-current two weeks ago. I do not recommend this patch be accepted, but instead be marked as a "OS vendor problem" and fixed as such.
I have requested a pull-up request to the NetBSD release engineers as well, for NetBSD 5.x, which should make it in the next NetBSD 5.x release on that path.
=end
Updated by usa (Usaku NAKAMURA) over 14 years ago
=begin
Original reporter Naruse says that
"This is a problem of OS itself.
Following patch is only a sample of the workaround.
We shouldn't apply this patch to ruby.
I leave it only the record for the sharing of knowledge."
So, this ticket has been already marked as "Third Party's Issue".
BTW, it's very good news that this problem is fixed at NetBSD side.
Thank you for giving a useful information!
=end
Updated by naruse (Yui NARUSE) over 12 years ago
- Description updated (diff)
Updated by naruse (Yui NARUSE) over 12 years ago
- Description updated (diff)