bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#9780: sort -u throws out non-duplicates


From: Paul Eggert
Subject: bug#9780: sort -u throws out non-duplicates
Date: Fri, 17 Aug 2012 13:34:55 -0700
User-agent: Mozilla/5.0 (X11; Linux i686; rv:14.0) Gecko/20120714 Thunderbird/14.0

OK, I scratched my head for a bit and came up with the following
further patch, which addresses the issues that I mentioned.

>From ac405d343c379096c7ed51b481d5ed08ee18d6e0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Fri, 17 Aug 2012 13:26:00 -0700
Subject: [PATCH] sort: simpler fix for sort -u data-loss bug

* src/sort.c (overlap): Remove.
(fillbuf): Do not try to copy saved lines, as that is too risky
in the presence of parallelism, reallocated buffers, etc.
(sort): Invalidate any saved line before sorting a new batch.
---
 src/sort.c |   36 +-----------------------------------
 1 files changed, 1 insertions(+), 35 deletions(-)

diff --git a/src/sort.c b/src/sort.c
index c2d2d49..9dbfee1 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1705,14 +1705,6 @@ limfield (struct line const *line, struct keyfield const 
*key)
   return ptr;
 }
 
-/* Return true if LINE and the buffer BUF of length LEN overlap.  */
-static inline bool
-overlap (char const *buf, size_t len, struct line const *line)
-{
-  char const *line_end = line->text + line->length;
-  return !(line_end <= buf || buf + len <= line->text);
-}
-
 /* Fill BUF reading from FP, moving buf->left bytes from the end
    of buf->buf to the beginning first.  If EOF is reached and the
    file wasn't terminated by a newline, supply one.  Set up BUF's line
@@ -1753,33 +1745,6 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
              rest of the input file consists entirely of newlines,
              except that the last byte is not a newline.  */
           size_t readsize = (avail - 1) / (line_bytes + 1);
-
-          /* With --unique, when we're about to read into a buffer that
-             overlaps the saved "preceding" line (saved_line), copy the line's
-             .text member to a realloc'd-as-needed temporary buffer and adjust
-             the line's key-defining members if they're set.  */
-          if (unique && overlap (ptr, readsize, &saved_line))
-            {
-              /* Copy saved_line.text into a buffer where it won't be clobbered
-                 and if KEY is non-NULL, adjust saved_line.key* to match.  */
-              static char *safe_text;
-              static size_t safe_text_n_alloc;
-              if (safe_text_n_alloc < saved_line.length)
-                {
-                  safe_text_n_alloc = saved_line.length;
-                  safe_text = x2nrealloc (safe_text, &safe_text_n_alloc, 1);
-                }
-              memcpy (safe_text, saved_line.text, saved_line.length);
-              if (key)
-                {
-                  #define s saved_line
-                  s.keybeg = safe_text + (s.keybeg - s.text);
-                  s.keylim = safe_text + (s.keylim - s.text);
-                  #undef s
-                }
-              saved_line.text = safe_text;
-            }
-
           size_t bytes_read = fread (ptr, 1, readsize, fp);
           char *ptrlim = ptr + bytes_read;
           char *p;
@@ -3928,6 +3893,7 @@ sort (char *const *files, size_t nfiles, char const 
*output_file,
               break;
             }
 
+          saved_line.text = NULL;
           line = buffer_linelim (&buf);
           if (buf.eof && !nfiles && !ntemps && !buf.left)
             {
-- 
1.7.6.5






reply via email to

[Prev in Thread] Current Thread [Next in Thread]