coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: bug#7489: [coreutils] over aggressive threads in sort


From: Chen Guo
Subject: Re: bug#7489: [coreutils] over aggressive threads in sort
Date: Mon, 6 Dec 2010 00:25:26 -0800

Hi Professor Eggert,
On Sun, Dec 5, 2010 at 11:01 PM, Paul Eggert <address@hidden> wrote:
> On 12/05/2010 09:16 PM, Chen Guo wrote:
>> Before saying anything else, I should note that for mutexes, on 4
>> threads 20% of the time there's a segfault on a seemingly innocuous
>> line in queue_insert ():
>>   node->queued = true
>
> It does sound like mutexes are the way to go, and that this bug
> needs to be fixed.  I assume that this is the call to queue_insert
> from queue_check_insert_parent?  What's the backtrace?  (And
> what patch are you using, to get mutexes?)
>

    I've attached the patch (inlined at the bottom). Here's the GDB
crash, with backtrace. I also printed node->queued in GDB, so it's
evident that its accessible.

(gdb) run --parallel 2 rec_1M > /dev/null
Starting program: /data/chen/Coding/Coreutils/test/sort-mutex
--parallel 2 rec_1M > /dev/null
[Thread debugging using libthread_db enabled]
[New Thread 0x7fffcbb95710 (LWP 19850)]

Program received signal SIGSEGV, Segmentation fault.
queue_insert (queue=0x7fffffffe0c0, node=0x7ffff7ddc560) at sort.c:3202
3202      node->queued = true;
(gdb) bt
#0  queue_insert (queue=0x7fffffffe0c0, node=0x7ffff7ddc560) at sort.c:3202
#1  0x0000000000405844 in queue_check_insert_parent (lines=<value
optimized out>, dest=<value optimized out>, nthreads=<value optimized
out>, total_lines=<value optimized out>, parent=<value optimized out>,
    lo_child=<value optimized out>, queue=0x7fffffffe0c0,
tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3340
#2  merge_loop (lines=<value optimized out>, dest=<value optimized
out>, nthreads=<value optimized out>, total_lines=<value optimized
out>, parent=<value optimized out>, lo_child=<value optimized out>,
queue=0x7fffffffe0c0,
    tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3374
#3  sortlines (lines=<value optimized out>, dest=<value optimized
out>, nthreads=<value optimized out>, total_lines=<value optimized
out>, parent=<value optimized out>, lo_child=<value optimized out>,
queue=0x7fffffffe0c0,
    tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3515
#4  0x0000000000405c2b in sortlines (lines=0x7ffff7344030, dest=<value
optimized out>, nthreads=<value optimized out>, total_lines=<value
optimized out>, parent=<value optimized out>, lo_child=<value
optimized out>,
    queue=0x7fffffffe0c0, tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3494
#5  0x00000000004095e8 in sort (argc=<value optimized out>,
argv=<value optimized out>) at sort.c:3804
#6  main (argc=<value optimized out>, argv=<value optimized out>) at sort.c:4576
(gdb) print node
$1 = (struct merge_node *) 0x7ffff7ddc560
(gdb) print node->queued
$2 = false



And here's the patch:

>From 5a1d0b8f1a4c6d7cd99da7376f8c03f8a4cc2be9 Mon Sep 17 00:00:00 2001
From: Chen Guo <address@hidden>
Date: Mon, 6 Dec 2010 00:15:42 -0800
Subject: [PATCH] sort: change spinlocks to mutexes.

* src/sort.c: (struct merge_node) Change member lock to mutex. All
uses changed.
---
 src/sort.c |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/sort.c b/src/sort.c
index a4c2cbf..9cfc814 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -241,7 +241,7 @@ struct merge_node
   struct merge_node *parent;    /* Parent node. */
   unsigned int level;           /* Level in merge tree. */
   bool queued;                  /* Node is already in heap. */
-  pthread_spinlock_t lock;      /* Lock for node operations. */
+  pthread_mutex_t lock;         /* Lock for node operations. */
 };

 /* Priority queue of merge nodes. */
@@ -3157,7 +3157,7 @@ compare_nodes (void const *a, void const *b)
 static inline void
 lock_node (struct merge_node *node)
 {
-  pthread_spin_lock (&node->lock);
+  pthread_mutex_lock (&node->lock);
 }

 /* Unlock a merge tree NODE. */
@@ -3165,7 +3165,7 @@ lock_node (struct merge_node *node)
 static inline void
 unlock_node (struct merge_node *node)
 {
-  pthread_spin_unlock (&node->lock);
+  pthread_mutex_unlock (&node->lock);
 }

 /* Destroy merge QUEUE. */
@@ -3479,7 +3479,7 @@ sortlines (struct line *restrict lines, struct
line *restrict dest,
   node.parent = parent;
   node.level = parent->level + 1;
   node.queued = false;
-  pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE);
+  pthread_mutex_init (&node.lock, NULL);

   /* Calculate thread arguments. */
   unsigned long int lo_threads = nthreads / 2;
@@ -3515,7 +3515,7 @@ sortlines (struct line *restrict lines, struct
line *restrict dest,
       merge_loop (queue, total_lines, tfp, temp_output);
     }

-  pthread_spin_destroy (&node.lock);
+  pthread_mutex_destroy (&node.lock);
 }

 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
@@ -3799,12 +3799,12 @@ sort (char * const *files, size_t nfiles, char
const *output_file,
               node.parent = NULL;
               node.level = MERGE_END;
               node.queued = false;
-              pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE);
+              pthread_mutex_init (&node.lock, NULL);

               sortlines (line, line, nthreads, buf.nlines, &node, true,
                          &queue, tfp, temp_output);
               queue_destroy (&queue);
-              pthread_spin_destroy (&node.lock);
+              pthread_mutex_destroy (&node.lock);
             }
           else
             write_unique (line - 1, tfp, temp_output);
-- 
1.7.1

Attachment: mutex.patch
Description: Text Data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]