>From 1572a822c722452fcf3279780c92679b10b40a0b Mon Sep 17 00:00:00 2001 From: Shayan Pooya Date: Sat, 25 Jan 2014 00:37:12 +0000 Subject: [PATCH 1/2] sort: fix two threading issues reported by valgrind Neither issue impacts on the correct operation of sort. The issues were detected by both valgrind 3.8.1 and 3.9.0 using: seq 200000 > file.sort valgrind --tool=drd src/sort file.sort -o file.sort For tool usage and error details see: http://valgrind.org/docs/manual/drd-manual.html * src/sort.c (queue_insert): Unlock mutex _after_ signalling the associated condition variable. Valgrind flags this with: "Probably a race condition: condition variable 0xffeffffb0 has been signaled but the associated mutex 0xffeffff88 is not locked by the signalling thread." The explanation at the above URL is: "Sending a signal to a condition variable while no lock is held on the mutex associated with the condition variable. This is a common programming error which can cause subtle race conditions and unpredictable behavior." This should at least give more defined scheduling behavior. (merge_tree_destroy): Make symmetrical with merge_tree_init() thus destroying the correct mutex. Valgrind flags this with: "The object at address 0x5476cf8 is not a mutex." --- src/sort.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sort.c b/src/sort.c index 49caae5..af95f71 100644 --- a/src/sort.c +++ b/src/sort.c @@ -3239,6 +3239,8 @@ merge_tree_init (size_t nthreads, size_t nlines, struct line *dest) static void merge_tree_destroy (struct merge_node *merge_tree) { + struct merge_node *root = merge_tree; + pthread_mutex_destroy (&root->lock); free (merge_tree); } @@ -3354,8 +3356,8 @@ queue_insert (struct merge_node_queue *queue, struct merge_node *node) pthread_mutex_lock (&queue->mutex); heap_insert (queue->priority_queue, node); node->queued = true; - pthread_mutex_unlock (&queue->mutex); pthread_cond_signal (&queue->cond); + pthread_mutex_unlock (&queue->mutex); } /* Pop the top node off the priority QUEUE, lock the node, return it. */ @@ -3950,7 +3952,6 @@ sort (char *const *files, size_t nfiles, char const *output_file, sortlines (line, nthreads, buf.nlines, root, &queue, tfp, temp_output); queue_destroy (&queue); - pthread_mutex_destroy (&root->lock); merge_tree_destroy (merge_tree); } else -- 1.7.7.6 >From e1818c5c263fde8f189cd1a1b1e51bba1a34a97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Fri, 11 Jul 2014 16:11:22 +0100 Subject: [PATCH 2/2] sort: avoid undefined operation with destroying locked mutex This didn't seem to cause any invalid operation on GNU/Linux at least, but depending on the implementation, mutex deadlocks could occur. For example this might be the cause of lockups seen on Solaris: http://lists.gnu.org/archive/html/coreutils/2013-03/msg00048.html This was identified with valgrind 3.9.0 with this setup: seq 200000 > file.sort valgrind --tool=drd src/sort file.sort -o file.sort With that, valgrind would _intermittently_ report the following: Destroying locked mutex: mutex 0x5419548, recursion count 1, owner 2. at 0x4C2E3F0: pthread_mutex_destroy(in vgpreload_drd-amd64-linux.so) by 0x409FA2: sortlines (sort.c:3649) by 0x409E26: sortlines (sort.c:3621) by 0x40AA9E: sort (sort.c:3955) by 0x40C5D9: main (sort.c:4739) mutex 0x5419548 was first observed at: at 0x4C2DE82: pthread_mutex_init(in vgpreload_drd-amd64-linux.so) by 0x409266: init_node (sort.c:3276) by 0x4092F4: init_node (sort.c:3286) by 0x4090DD: merge_tree_init (sort.c:3234) by 0x40AA5A: sort (sort.c:3951) by 0x40C5D9: main (sort.c:4739) Thread 2: The object at address 0x5419548 is not a mutex. at 0x4C2F4A4: pthread_mutex_unlock(in vgpreload_drd-amd64-linux.so) by 0x4093CA: unlock_node (sort.c:3323) by 0x409C85: merge_loop (sort.c:3531) by 0x409F8F: sortlines (sort.c:3644) by 0x409CE3: sortlines_thread (sort.c:3574) by 0x4E44F32: start_thread (in /usr/lib64/libpthread-2.18.so) by 0x514EEAC: clone (in /usr/lib64/libc-2.18.so) * src/sort.c (sortlines): Move pthread_mutex_destroy() out to merge_tree_destroy(), so that we don't overlap mutex destruction with threads still operating on the nodes. (sort): Call the destructors only with "lint" defined, as the memory used will be deallocated implicitly at process end. * NEWS: Mention the bug fix. --- NEWS | 4 ++++ src/sort.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 4e90b79..f2c2e18 100644 --- a/NEWS +++ b/NEWS @@ -93,6 +93,10 @@ GNU coreutils NEWS -*- outline -*- shuf --repeat no longer dumps core if the input is empty. [bug introduced with the --repeat feature in coreutils-8.22] + sort when using multiple threads now avoids undefined behavior with mutex + destruction, which could cause deadlocks on some implementations. + [bug introduced in coreutils-8.6] + tail -f now uses polling mode for VXFS to cater for its clustered mode. [bug introduced with inotify support added in coreutils-7.5] diff --git a/src/sort.c b/src/sort.c index af95f71..c249319 100644 --- a/src/sort.c +++ b/src/sort.c @@ -3237,10 +3237,17 @@ merge_tree_init (size_t nthreads, size_t nlines, struct line *dest) /* Destroy the merge tree. */ static void -merge_tree_destroy (struct merge_node *merge_tree) +merge_tree_destroy (size_t nthreads, struct merge_node *merge_tree) { - struct merge_node *root = merge_tree; - pthread_mutex_destroy (&root->lock); + size_t n_nodes = nthreads * 2; + struct merge_node *node = merge_tree; + + while (n_nodes--) + { + pthread_mutex_destroy (&node->lock); + node++; + } + free (merge_tree); } @@ -3642,8 +3649,6 @@ sortlines (struct line *restrict lines, size_t nthreads, queue_insert (queue, node); merge_loop (queue, total_lines, tfp, temp_output); } - - pthread_mutex_destroy (&node->lock); } /* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are @@ -3947,12 +3952,14 @@ sort (char *const *files, size_t nfiles, char const *output_file, queue_init (&queue, nthreads); struct merge_node *merge_tree = merge_tree_init (nthreads, buf.nlines, line); - struct merge_node *root = merge_tree + 1; - sortlines (line, nthreads, buf.nlines, root, + sortlines (line, nthreads, buf.nlines, merge_tree + 1, &queue, tfp, temp_output); + +#ifdef lint + merge_tree_destroy (nthreads, merge_tree); queue_destroy (&queue); - merge_tree_destroy (merge_tree); +#endif } else write_unique (line - 1, tfp, temp_output); -- 1.7.7.6