Project

General

Profile

Actions

Bug #2724

closed

fork from other than the main thread causes wrong pthread condition on NetBSD

Added by naruse (Yui NARUSE) over 12 years ago. Updated about 10 years ago.

Status:
Third Party's Issue
Priority:
Normal
Assignee:
-
Target version:
-
ruby -v:
ruby 1.9.2dev (2010-02-07 trunk 26615) [i386-netbsdelf5.0.1]
Backport:
[ruby-dev:40354]

Description

NetBSD 5.0.[01] において、main thread 以外の pthread から fork すると、
pthread とカーネルスレッド (lwp) との関連が壊れるという現象が確認されています。

後述のパッチがあまりにアレなのでこの問題は Third Party's Issue とし、
Ruby 側では修正を入れない事としますが、情報の共有と記録のために
ここにチケットを切っておきます。

なお、この workaround の作成には @_enamiさんの助けがありました。

追記:
NetBSD 側では kern/42772 として報告、修正されています。
http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=42772

Index: thread_pthread.c

--- thread_pthread.c (revision 26615)
+++ thread_pthread.c (working copy)
@@ -17,6 +17,93 @@
#include <sys/resource.h>
#endif

+#if defined(NetBSD_Version) && NetBSD_Version >= 500000000
+/* Hack for NetBSD 5.0.x's broken pthread->pt_lid /
+/
Copied from /src/lib/libpthread/pthread_int.h /
+#define BROKEN_PTHREAD_T_PT_LID
+#include <lwp.h>
+#include <pthread_queue.h>
+#include <sys/tree.h>
+
+#define PTHREAD_KEYS_MAX 256
+#define PTHREAD__UNPARK_MAX 32
+
+/

    • The size of this structure needs to be no larger than struct
    • __pthread_cleanup_store, defined in pthread.h.
  • */
    +struct pt_clean_t {
  •   PTQ_ENTRY(pt_clean_t)   ptc_next;
    
  •   void    (*ptc_cleanup)(void *);
    
  •   void    *ptc_arg;
    

+};
+
+struct pthread_lock_ops {

  •   void    (*plo_init)(__cpu_simple_lock_t *);
    
  •   int     (*plo_try)(__cpu_simple_lock_t *);
    
  •   void    (*plo_unlock)(__cpu_simple_lock_t *);
    
  •   void    (*plo_lock)(__cpu_simple_lock_t *);
    

+};
+
+struct __pthread_st {

  •   pthread_t       pt_self;        /* Must be first. */
    
  •   unsigned int    pt_magic;       /* Magic number */
    
  •   int             pt_state;       /* running, blocked, etc. */
    
  •   pthread_mutex_t pt_lock;        /* lock on state */
    
  •   int             pt_flags;       /* see PT_FLAG_* below */
    
  •   int             pt_cancel;      /* Deferred cancellation */
    
  •   int             pt_errno;       /* Thread-specific errno. */
    
  •   stack_t         pt_stack;       /* Our stack */
    
  •   void            *pt_exitval;    /* Read by pthread_join() */
    
  •   char            *pt_name;       /* Thread's name, set by the app. */
    
  •   int             pt_willpark;    /* About to park */
    
  •   lwpid_t         pt_unpark;      /* Unpark this when parking */
    
  •   struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
    
  •   pthread_mutex_t *pt_droplock;   /* Drop this lock if cancelled */
    
  •   pthread_cond_t  pt_joiners;     /* Threads waiting to join. */
    
  •   /* Threads to defer waking, usually until pthread_mutex_unlock(). */
    
  •   lwpid_t         pt_waiters[PTHREAD__UNPARK_MAX];
    
  •   size_t          pt_nwaiters;
    
  •   /* Stack of cancellation cleanup handlers and their arguments */
    
  •   PTQ_HEAD(, pt_clean_t)  pt_cleanup_stack;
    
  •   /* LWP ID and entry on the list of all threads. */
    
  •   lwpid_t         pt_lid;
    
  •   RB_ENTRY(__pthread_st) pt_alltree;
    
  •   PTQ_ENTRY(__pthread_st) pt_allq;
    
  •   PTQ_ENTRY(__pthread_st) pt_deadq;
    
  •   /*
    
  •    * General synchronization data.  We try to align, as threads
    
  •    * on other CPUs will access this data frequently.
    
  •    */
    
  •   int             pt_dummy1 __aligned(128);
    
  •   struct lwpctl   *pt_lwpctl;     /* Kernel/user comms area */
    
  •   volatile int    pt_blocking;    /* Blocking in userspace */
    
  •   volatile int    pt_rwlocked;    /* Handed rwlock successfully */
    
  •   volatile int    pt_signalled;   /* Received pthread_cond_signal() */
    
  •   volatile int    pt_mutexwait;   /* Waiting to acquire mutex */
    
  •   void * volatile pt_mutexnext;   /* Next thread in chain */
    
  •   void * volatile pt_sleepobj;    /* Object slept on */
    
  •   PTQ_ENTRY(__pthread_st) pt_sleep;
    
  •   void            (*pt_early)(void *);
    
  •   int             pt_dummy2 __aligned(128);
    
  •   /* Thread-specific data.  Large so it sits close to the end. */
    
  •   int             pt_havespecific;
    
  •   void            *pt_specific[PTHREAD_KEYS_MAX];
    
  •   /*
    
  •    * Context for thread creation.  At the end as it's cached
    
  •    * and then only ever passed to _lwp_create().
    
  •    */
    
  •   ucontext_t      pt_uc;
    

+};
+#endif /* NetBSD */
+
+
static void native_mutex_lock(pthread_mutex_t *lock);
static void native_mutex_unlock(pthread_mutex_t *lock);
static int native_mutex_trylock(pthread_mutex_t *lock);
@@ -833,6 +920,9 @@
native_reset_timer_thread(void)
{
timer_thread_id = 0;
+#ifdef BROKEN_PTHREAD_T_PT_LID

  • ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self();
    +#endif
    }

#ifdef HAVE_SIGALTSTACK


Related issues 3 (0 open3 closed)

Related to Backport187 - Backport #2603: NetBSD 5.0以降でpthreadの処理に由来する不具合Closedshyouhei (Shyouhei Urabe)01/14/2010Actions
Related to Ruby master - Bug #270: lazy timer thraed creationClosedko1 (Koichi Sasada)Actions
Related to Ruby master - Bug #6341: SIGSEGV: Thread.new { fork { GC.start } }.joinThird Party's Issue04/22/2012Actions
Actions #1

Updated by skandragon (Michael Graff) about 12 years ago

=begin
I have made a patch (which was based upon this problem report) to NetBSD-current two weeks ago. I do not recommend this patch be accepted, but instead be marked as a "OS vendor problem" and fixed as such.

I have requested a pull-up request to the NetBSD release engineers as well, for NetBSD 5.x, which should make it in the next NetBSD 5.x release on that path.
=end

Actions #2

Updated by usa (Usaku NAKAMURA) about 12 years ago

=begin
Original reporter Naruse says that
"This is a problem of OS itself.
Following patch is only a sample of the workaround.
We shouldn't apply this patch to ruby.
I leave it only the record for the sharing of knowledge."

So, this ticket has been already marked as "Third Party's Issue".

BTW, it's very good news that this problem is fixed at NetBSD side.
Thank you for giving a useful information!
=end

Updated by naruse (Yui NARUSE) about 10 years ago

  • Description updated (diff)

Updated by naruse (Yui NARUSE) about 10 years ago

  • Description updated (diff)
Actions

Also available in: Atom PDF