Bug #2724
closedfork from other than the main thread causes wrong pthread condition on NetBSD
Description
NetBSD 5.0.[01] において、main thread 以外の pthread から fork すると、
pthread とカーネルスレッド (lwp) との関連が壊れるという現象が確認されています。
後述のパッチがあまりにアレなのでこの問題は Third Party's Issue とし、
Ruby 側では修正を入れない事としますが、情報の共有と記録のために
ここにチケットを切っておきます。
なお、この workaround の作成には @_enamiさんの助けがありました。
追記:
NetBSD 側では kern/42772 として報告、修正されています。
http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=42772
Index: thread_pthread.c¶
--- thread_pthread.c    (revision 26615)
+++ thread_pthread.c    (working copy)
@@ -17,6 +17,93 @@
#include <sys/resource.h>
#endif
+#if defined(NetBSD_Version) && NetBSD_Version >= 500000000
+/* Hack for NetBSD 5.0.x's broken pthread->pt_lid /
+/ Copied from /src/lib/libpthread/pthread_int.h /
+#define BROKEN_PTHREAD_T_PT_LID
+#include <lwp.h>
+#include <pthread_queue.h>
+#include <sys/tree.h>
+
+#define PTHREAD_KEYS_MAX       256
+#define        PTHREAD__UNPARK_MAX     32
+
+/
- 
- The size of this structure needs to be no larger than struct
 
- 
- __pthread_cleanup_store, defined in pthread.h.
 
- */
 +struct pt_clean_t {
- 
PTQ_ENTRY(pt_clean_t) ptc_next;
- 
void (*ptc_cleanup)(void *);
- 
void *ptc_arg;
+};
+
+struct pthread_lock_ops {
- 
void (*plo_init)(__cpu_simple_lock_t *);
- 
int (*plo_try)(__cpu_simple_lock_t *);
- 
void (*plo_unlock)(__cpu_simple_lock_t *);
- 
void (*plo_lock)(__cpu_simple_lock_t *);
+};
+
+struct __pthread_st {
- 
pthread_t pt_self; /* Must be first. */
- 
unsigned int pt_magic; /* Magic number */
- 
int pt_state; /* running, blocked, etc. */
- 
pthread_mutex_t pt_lock; /* lock on state */
- 
int pt_flags; /* see PT_FLAG_* below */
- 
int pt_cancel; /* Deferred cancellation */
- 
int pt_errno; /* Thread-specific errno. */
- 
stack_t pt_stack; /* Our stack */
- 
void *pt_exitval; /* Read by pthread_join() */
- 
char *pt_name; /* Thread's name, set by the app. */
- 
int pt_willpark; /* About to park */
- 
lwpid_t pt_unpark; /* Unpark this when parking */
- 
struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
- 
pthread_mutex_t *pt_droplock; /* Drop this lock if cancelled */
- 
pthread_cond_t pt_joiners; /* Threads waiting to join. */
- 
/* Threads to defer waking, usually until pthread_mutex_unlock(). */
- 
lwpid_t pt_waiters[PTHREAD__UNPARK_MAX];
- 
size_t pt_nwaiters;
- 
/* Stack of cancellation cleanup handlers and their arguments */
- 
PTQ_HEAD(, pt_clean_t) pt_cleanup_stack;
- 
/* LWP ID and entry on the list of all threads. */
- 
lwpid_t pt_lid;
- 
RB_ENTRY(__pthread_st) pt_alltree;
- 
PTQ_ENTRY(__pthread_st) pt_allq;
- 
PTQ_ENTRY(__pthread_st) pt_deadq;
- 
/*
- 
* General synchronization data. We try to align, as threads
- 
* on other CPUs will access this data frequently.
- 
*/
- 
int pt_dummy1 __aligned(128);
- 
struct lwpctl *pt_lwpctl; /* Kernel/user comms area */
- 
volatile int pt_blocking; /* Blocking in userspace */
- 
volatile int pt_rwlocked; /* Handed rwlock successfully */
- 
volatile int pt_signalled; /* Received pthread_cond_signal() */
- 
volatile int pt_mutexwait; /* Waiting to acquire mutex */
- 
void * volatile pt_mutexnext; /* Next thread in chain */
- 
void * volatile pt_sleepobj; /* Object slept on */
- 
PTQ_ENTRY(__pthread_st) pt_sleep;
- 
void (*pt_early)(void *);
- 
int pt_dummy2 __aligned(128);
- 
/* Thread-specific data. Large so it sits close to the end. */
- 
int pt_havespecific;
- 
void *pt_specific[PTHREAD_KEYS_MAX];
- 
/*
- 
* Context for thread creation. At the end as it's cached
- 
* and then only ever passed to _lwp_create().
- 
*/
- 
ucontext_t pt_uc;
+};
+#endif /* NetBSD */
+
+
static void native_mutex_lock(pthread_mutex_t *lock);
static void native_mutex_unlock(pthread_mutex_t *lock);
static int native_mutex_trylock(pthread_mutex_t *lock);
@@ -833,6 +920,9 @@
native_reset_timer_thread(void)
{
timer_thread_id = 0;
+#ifdef BROKEN_PTHREAD_T_PT_LID
- ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self();
 +#endif
 }
#ifdef HAVE_SIGALTSTACK