Blob Blame History Raw
commit 85ee11e317058d44d5c6c29bb4c53acc6c0e22c9
Author: Florian Weimer <fweimer@redhat.com>
Date:   Wed Oct 28 19:32:46 2015 +0100

    malloc: Prevent arena free_list from turning cyclic [BZ #19048]

        [BZ# 19048]
        * malloc/malloc.c (struct malloc_state): Update comment.  Add
        attached_threads member.
        (main_arena): Initialize attached_threads.
        * malloc/arena.c (list_lock): Update comment.
        (ptmalloc_lock_all, ptmalloc_unlock_all): Likewise.
        (ptmalloc_unlock_all2): Reinitialize arena reference counts.
        (deattach_arena): New function.
        (_int_new_arena): Initialize arena reference count and deattach
        replaced arena.
        (get_free_list, reused_arena): Update reference count and deattach
        replaced arena.
        (arena_thread_freeres): Update arena reference count and only put
        unreferenced arenas on the free list.

(Backport reintroduces tsd_getspecific, tsd_setspecific.)

commit 3da825ce483903e3a881a016113b3e59fd4041de
Author: Florian Weimer <fweimer@redhat.com>
Date:   Wed Dec 16 12:39:48 2015 +0100

    malloc: Fix attached thread reference count handling [BZ #19243]
    
    reused_arena can increase the attached thread count of arenas on the
    free list.  This means that the assertion that the reference count is
    zero is incorrect.  In this case, the reference count initialization
    is incorrect as well and could cause arenas to be put on the free
    list too early (while they still have attached threads).

commit 90c400bd4904b0240a148f0b357a5cbc36179239
Author: Florian Weimer <fweimer@redhat.com>
Date:   Mon Dec 21 16:42:46 2015 +0100

    malloc: Fix list_lock/arena lock deadlock [BZ #19182]

commit 7962541a32eff5597bc4207e781cfac8d1bb0d87
Author: Florian Weimer <fweimer@redhat.com>
Date:   Wed Dec 23 17:23:33 2015 +0100

    malloc: Update comment for list_lock

Index: b/malloc/arena.c
===================================================================
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -67,10 +67,30 @@ extern int sanity_check_heap_info_alignm
 /* Thread specific data */
 
 static tsd_key_t arena_key;
-static mutex_t list_lock = MUTEX_INITIALIZER;
+
+/* Arena free list.  free_list_lock synchronizes access to the
+   free_list variable below, and the next_free and attached_threads
+   members of struct malloc_state objects.  No other locks must be
+   acquired after free_list_lock has been acquired.  */
+
+static mutex_t free_list_lock = MUTEX_INITIALIZER;
 static size_t narenas = 1;
 static mstate free_list;
 
+/* list_lock prevents concurrent writes to the next member of struct
+   malloc_state objects.
+
+   Read access to the next member is supposed to synchronize with the
+   atomic_write_barrier and the write to the next member in
+   _int_new_arena.  This suffers from data races; see the FIXME
+   comments in _int_new_arena and reused_arena.
+
+   list_lock also prevents concurrent forks.  At the time list_lock is
+   acquired, no arena lock must have been acquired, but it is
+   permitted to acquire arena locks subsequently, while list_lock is
+   acquired.  */
+static mutex_t list_lock = MUTEX_INITIALIZER;
+
 /* Mapped memory in non-main arenas (reliable only for NO_THREADS). */
 static unsigned long arena_mem;
 
@@ -210,6 +230,9 @@ ptmalloc_lock_all (void)
   if (__malloc_initialized < 1)
     return;
 
+  /* We do not acquire free_list_lock here because we completely
+     reconstruct free_list in ptmalloc_unlock_all2.  */
+
   if (mutex_trylock (&list_lock))
     {
       void *my_arena;
@@ -233,7 +256,10 @@ ptmalloc_lock_all (void)
   save_free_hook = __free_hook;
   __malloc_hook = malloc_atfork;
   __free_hook = free_atfork;
-  /* Only the current thread may perform malloc/free calls now. */
+  /* Only the current thread may perform malloc/free calls now.
+     save_arena will be reattached to the current thread, in
+     ptmalloc_lock_all, so save_arena->attached_threads is not
+     updated.  */
   tsd_getspecific (arena_key, save_arena);
   tsd_setspecific (arena_key, ATFORK_ARENA_PTR);
 out:
@@ -251,6 +277,9 @@ ptmalloc_unlock_all (void)
   if (--atfork_recursive_cntr != 0)
     return;
 
+  /* Replace ATFORK_ARENA_PTR with save_arena.
+     save_arena->attached_threads was not changed in ptmalloc_lock_all
+     and is still correct.  */
   tsd_setspecific (arena_key, save_arena);
   __malloc_hook = save_malloc_hook;
   __free_hook = save_free_hook;
@@ -282,12 +311,20 @@ ptmalloc_unlock_all2 (void)
   tsd_setspecific (arena_key, save_arena);
   __malloc_hook = save_malloc_hook;
   __free_hook = save_free_hook;
+
+  /* Push all arenas to the free list, except save_arena, which is
+     attached to the current thread.  */
+  mutex_init (&free_list_lock);
+  if (save_arena != NULL)
+    ((mstate) save_arena)->attached_threads = 1;
   free_list = NULL;
   for (ar_ptr = &main_arena;; )
     {
       mutex_init (&ar_ptr->mutex);
       if (ar_ptr != save_arena)
         {
+	  /* This arena is no longer attached to any thread.  */
+	  ar_ptr->attached_threads = 0;
           ar_ptr->next_free = free_list;
           free_list = ar_ptr;
         }
@@ -295,6 +332,7 @@ ptmalloc_unlock_all2 (void)
       if (ar_ptr == &main_arena)
         break;
     }
+
   mutex_init (&list_lock);
   atfork_recursive_cntr = 0;
 }
@@ -721,6 +759,22 @@ heap_trim (heap_info *heap, size_t pad)
 
 /* Create a new arena with initial size "size".  */
 
+/* If REPLACED_ARENA is not NULL, detach it from this thread.  Must be
+   called while free_list_lock is held.  */
+static void
+detach_arena (mstate replaced_arena)
+{
+  if (replaced_arena != NULL)
+    {
+      assert (replaced_arena->attached_threads > 0);
+      /* The current implementation only detaches from main_arena in
+	 case of allocation failure.  This means that it is likely not
+	 beneficial to put the arena on free_list even if the
+	 reference count reaches zero.  */
+      --replaced_arena->attached_threads;
+    }
+}
+
 static mstate
 _int_new_arena (size_t size)
 {
@@ -742,6 +796,7 @@ _int_new_arena (size_t size)
     }
   a = h->ar_ptr = (mstate) (h + 1);
   malloc_init_state (a);
+  a->attached_threads = 1;
   /*a->next = NULL;*/
   a->system_mem = a->max_system_mem = h->size;
   arena_mem += h->size;
@@ -755,34 +810,67 @@ _int_new_arena (size_t size)
   set_head (top (a), (((char *) h + h->size) - ptr) | PREV_INUSE);
 
   LIBC_PROBE (memory_arena_new, 2, a, size);
+  mstate replaced_arena;
+  {
+    void *vptr = NULL;
+    replaced_arena = tsd_getspecific (arena_key, vptr);
+  }
   tsd_setspecific (arena_key, (void *) a);
   mutex_init (&a->mutex);
-  (void) mutex_lock (&a->mutex);
 
   (void) mutex_lock (&list_lock);
 
   /* Add the new arena to the global list.  */
   a->next = main_arena.next;
+  /* FIXME: The barrier is an attempt to synchronize with read access
+     in reused_arena, which does not acquire list_lock while
+     traversing the list.  */
   atomic_write_barrier ();
   main_arena.next = a;
 
   (void) mutex_unlock (&list_lock);
 
+  (void) mutex_lock (&free_list_lock);
+  detach_arena (replaced_arena);
+  (void) mutex_unlock (&free_list_lock);
+
+  /* Lock this arena.  NB: Another thread may have been attached to
+     this arena because the arena is now accessible from the
+     main_arena.next list and could have been picked by reused_arena.
+     This can only happen for the last arena created (before the arena
+     limit is reached).  At this point, some arena has to be attached
+     to two threads.  We could acquire the arena lock before list_lock
+     to make it less likely that reused_arena picks this new arena,
+     but this could result in a deadlock with ptmalloc_lock_all.  */
+
+  (void) mutex_lock (&a->mutex);
+
   return a;
 }
 
 
+/* Remove an arena from free_list.  The arena may be in use because it
+   was attached concurrently to a thread by reused_arena below.  */
 static mstate
 get_free_list (void)
 {
+  void *vptr = NULL;
+  mstate replaced_arena = tsd_getspecific (arena_key, vptr);
   mstate result = free_list;
   if (result != NULL)
     {
-      (void) mutex_lock (&list_lock);
+      (void) mutex_lock (&free_list_lock);
       result = free_list;
       if (result != NULL)
-        free_list = result->next_free;
-      (void) mutex_unlock (&list_lock);
+	{
+	  free_list = result->next_free;
+
+	  /* The arena will be attached to this thread.  */
+	  ++result->attached_threads;
+
+	  detach_arena (replaced_arena);
+	}
+      (void) mutex_unlock (&free_list_lock);
 
       if (result != NULL)
         {
@@ -802,16 +890,20 @@ static mstate
 reused_arena (mstate avoid_arena)
 {
   mstate result;
+  /* FIXME: Access to next_to_use suffers from data races.  */
   static mstate next_to_use;
   if (next_to_use == NULL)
     next_to_use = &main_arena;
 
+  /* Iterate over all arenas (including those linked from
+     free_list).  */
   result = next_to_use;
   do
     {
       if (!arena_is_corrupt (result) && !mutex_trylock (&result->mutex))
         goto out;
 
+      /* FIXME: This is a data race, see _int_new_arena.  */
       result = result->next;
     }
   while (result != next_to_use);
@@ -840,6 +932,18 @@ reused_arena (mstate avoid_arena)
   (void) mutex_lock (&result->mutex);
 
 out:
+  /* Attach the arena to the current thread.  Note that we may have
+     selected an arena which was on free_list.  */
+  {
+    /* Update the arena thread attachment counters.  */
+    void *vptr = NULL;
+    mstate replaced_arena = tsd_getspecific (arena_key, vptr);
+    (void) mutex_lock (&free_list_lock);
+    detach_arena (replaced_arena);
+    ++result->attached_threads;
+    (void) mutex_unlock (&free_list_lock);
+  }
+
   LIBC_PROBE (memory_arena_reuse, 2, result, avoid_arena);
   tsd_setspecific (arena_key, (void *) result);
   next_to_use = result->next;
@@ -932,10 +1036,16 @@ arena_thread_freeres (void)
 
   if (a != NULL)
     {
-      (void) mutex_lock (&list_lock);
-      a->next_free = free_list;
-      free_list = a;
-      (void) mutex_unlock (&list_lock);
+      (void) mutex_lock (&free_list_lock);
+      /* If this was the last attached thread for this arena, put the
+	 arena on the free list.  */
+      assert (a->attached_threads > 0);
+      if (--a->attached_threads == 0)
+	{
+	  a->next_free = free_list;
+	  free_list = a;
+	}
+      (void) mutex_unlock (&free_list_lock);
     }
 }
 text_set_element (__libc_thread_subfreeres, arena_thread_freeres);
Index: b/malloc/malloc.c
===================================================================
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1709,9 +1709,15 @@ struct malloc_state
   /* Linked list */
   struct malloc_state *next;
 
-  /* Linked list for free arenas.  */
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
   struct malloc_state *next_free;
 
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
   /* Memory allocated from the system in this arena.  */
   INTERNAL_SIZE_T system_mem;
   INTERNAL_SIZE_T max_system_mem;
@@ -1755,7 +1761,8 @@ struct malloc_par
 static struct malloc_state main_arena =
 {
   .mutex = MUTEX_INITIALIZER,
-  .next = &main_arena
+  .next = &main_arena,
+  .attached_threads = 1
 };
 
 /* There is only one instance of the malloc parameters.  */
Index: b/malloc/Makefile
===================================================================
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -28,7 +28,7 @@ tests := mallocbug tst-malloc tst-valloc
 	 tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \
 	 tst-malloc-usable tst-realloc tst-posix_memalign \
 	 tst-pvalloc tst-memalign tst-mallopt tst-scratch_buffer \
-	 tst-malloc-backtrace
+	 tst-malloc-backtrace tst-malloc-thread-exit
 test-srcs = tst-mtrace
 
 routines = malloc morecore mcheck mtrace obstack \
@@ -47,6 +47,8 @@ libmemusage-inhibit-o = $(filter-out .os
 
 $(objpfx)tst-malloc-backtrace: $(common-objpfx)nptl/libpthread.so \
 			       $(common-objpfx)nptl/libpthread_nonshared.a
+$(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \
+			       $(common-objpfx)nptl/libpthread_nonshared.a
 
 # These should be removed by `make clean'.
 extra-objs = mcheck-init.o libmcheck.a
Index: b/malloc/tst-malloc-thread-exit.c
===================================================================
--- /dev/null
+++ b/malloc/tst-malloc-thread-exit.c
@@ -0,0 +1,217 @@
+/* Test malloc with concurrent thread termination.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This thread spawns a number of outer threads, equal to the arena
+   limit.  The outer threads run a loop which start and join two
+   different kinds of threads: the first kind allocates (attaching an
+   arena to the thread; malloc_first_thread) and waits, the second
+   kind waits and allocates (wait_first_threads).  Both kinds of
+   threads exit immediately after waiting.  The hope is that this will
+   exhibit races in thread termination and arena management,
+   particularly related to the arena free list.  */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define TIMEOUT 7
+
+static bool termination_requested;
+static int inner_thread_count = 4;
+static size_t malloc_size = 32;
+
+static void
+__attribute__ ((noinline, noclone))
+unoptimized_free (void *ptr)
+{
+  free (ptr);
+}
+
+static void *
+malloc_first_thread (void * closure)
+{
+  pthread_barrier_t *barrier = closure;
+  void *ptr = malloc (malloc_size);
+  if (ptr == NULL)
+    {
+      printf ("error: malloc: %m\n");
+      abort ();
+    }
+  int ret = pthread_barrier_wait (barrier);
+  if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+    {
+      errno = ret;
+      printf ("error: pthread_barrier_wait: %m\n");
+      abort ();
+    }
+  unoptimized_free (ptr);
+  return NULL;
+}
+
+static void *
+wait_first_thread (void * closure)
+{
+  pthread_barrier_t *barrier = closure;
+  int ret = pthread_barrier_wait (barrier);
+  if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+    {
+      errno = ret;
+      printf ("error: pthread_barrier_wait: %m\n");
+      abort ();
+    }
+  void *ptr = malloc (malloc_size);
+  if (ptr == NULL)
+    {
+      printf ("error: malloc: %m\n");
+      abort ();
+    }
+  unoptimized_free (ptr);
+  return NULL;
+}
+
+static void *
+outer_thread (void *closure)
+{
+  pthread_t *threads = calloc (sizeof (*threads), inner_thread_count);
+  if (threads == NULL)
+    {
+      printf ("error: calloc: %m\n");
+      abort ();
+    }
+
+  while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED))
+    {
+      pthread_barrier_t barrier;
+      int ret = pthread_barrier_init (&barrier, NULL, inner_thread_count + 1);
+      if (ret != 0)
+        {
+          errno = ret;
+          printf ("pthread_barrier_init: %m\n");
+          abort ();
+        }
+      for (int i = 0; i < inner_thread_count; ++i)
+        {
+          void *(*func) (void *);
+          if ((i  % 2) == 0)
+            func = malloc_first_thread;
+          else
+            func = wait_first_thread;
+          ret = pthread_create (threads + i, NULL, func, &barrier);
+          if (ret != 0)
+            {
+              errno = ret;
+              printf ("error: pthread_create: %m\n");
+              abort ();
+            }
+        }
+      ret = pthread_barrier_wait (&barrier);
+      if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+        {
+          errno = ret;
+          printf ("pthread_wait: %m\n");
+          abort ();
+        }
+      for (int i = 0; i < inner_thread_count; ++i)
+        {
+          ret = pthread_join (threads[i], NULL);
+          if (ret != 0)
+            {
+              ret = errno;
+              printf ("error: pthread_join: %m\n");
+              abort ();
+            }
+        }
+      ret = pthread_barrier_destroy (&barrier);
+      if (ret != 0)
+        {
+          ret = errno;
+          printf ("pthread_barrier_destroy: %m\n");
+          abort ();
+        }
+    }
+
+  free (threads);
+
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  /* The number of top-level threads should be equal to the number of
+     arenas.  See arena_get2.  */
+  long outer_thread_count = sysconf (_SC_NPROCESSORS_ONLN);
+  if (outer_thread_count >= 1)
+    {
+      /* See NARENAS_FROM_NCORES in malloc.c.  */
+      if (sizeof (long) == 4)
+        outer_thread_count *= 2;
+      else
+        outer_thread_count *= 8;
+    }
+
+  /* Leave some room for shutting down all threads gracefully.  */
+  int timeout = TIMEOUT - 2;
+
+  pthread_t *threads = calloc (sizeof (*threads), outer_thread_count);
+  if (threads == NULL)
+    {
+      printf ("error: calloc: %m\n");
+      abort ();
+    }
+
+  for (long i = 0; i < outer_thread_count; ++i)
+    {
+      int ret = pthread_create (threads + i, NULL, outer_thread, NULL);
+      if (ret != 0)
+        {
+          errno = ret;
+          printf ("error: pthread_create: %m\n");
+          abort ();
+        }
+    }
+
+  struct timespec ts = {timeout, 0};
+  if (nanosleep (&ts, NULL))
+    {
+      printf ("error: error: nanosleep: %m\n");
+      abort ();
+    }
+
+  __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED);
+
+  for (long i = 0; i < outer_thread_count; ++i)
+    {
+      int ret = pthread_join (threads[i], NULL);
+      if (ret != 0)
+        {
+          errno = ret;
+          printf ("error: pthread_join: %m\n");
+          abort ();
+        }
+    }
+  free (threads);
+
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"