>From b22891eea88a333387a678fcccabc731327680af Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <address@hidden>
Date: Tue, 23 Feb 2010 08:43:04 +0000
Subject: [PATCH 1/2] sort: add a --debug option to highlight key extents

* src/sort (usage): Add description for --debug.
(write_bytes): Pass a line structure so it can subsequently
be passed to compare to highlight the keys when in debug mode.
Also transform TAB and NUL characters written to stdout so
that the highlighting in debug mode aligns correctly.
(human_numcompare): Pass an "endptr" so we can record the extent
of the number matched.
(general_numcompare): Likewise.
(find_unit_order): Likewise.
(getmonth): Likewise.
(numcompare): Likewise.  Note we reuse find_unit_order() for this,
which is a good enough appropixmation, and means we don't need to
change the strnumcmp() interface.
(check_mixed_SI_IEC): Return whether iec_present, so that can be
used to set the "endptr" in find_unit_order.  Also make the key
parameter optional, which will be the case from numcompare().
(count_tabs): A new function to determine how much to adjust
the mbswidth() values by (TABs don't have a width).
(mark_key): A new function to output the key highlighting to stdout.
(debug_key): A new function to determine the offset and width
of the key highlighting.
(key_compare): Pass the show_debug parameter so the key highlighting
is only displayed when explicitly called.  For each key type, set
the length (lena) and whether leading blanks are auto skipped (skipb)
which are then used by debug_key() to highlight the portion of the
key used in the comparison.
(compare): Pass the show_debug parameter so the key highlighting
is only displayed when explicitly called.  Call debug_key() to
highlight the last resort comparison.
(check): Output highlighting for disorder line to stdout.
(main): Process the --debug option and make it mutually exlusive
with the -o option as I don't see it useful there, even potentially
harmful if someone left a --debug in by mistake when updating a file.
Also restricting debug output to stdout, simplifies the logic
for dealing with temporary files.
* doc/coreutils.texi (sort invocation): Describe the --debug option,
and reference it from the --key description.
* tests/misc/sort-debug-keys: A new test for highlighting keys.
* tests/Makefile.am: Reference the new test.
* NEWS: Mention the new feature.
---
 NEWS                       |    5 +
 doc/coreutils.texi         |    5 +
 src/sort.c                 |  289 ++++++++++++++++++++++++++++++++---------
 tests/Makefile.am          |    1 +
 tests/misc/sort-debug-keys |  317 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 557 insertions(+), 60 deletions(-)
 create mode 100755 tests/misc/sort-debug-keys

diff --git a/NEWS b/NEWS
index 070f338..4c2da67 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,11 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** New features
+
+  sort now accepts the --debug option, to highlight the part of the
+  line significant in the sort.
+
 ** Changes in behavior
 
   sort -g now uses long doubles for greater range and precision.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index c8ba53c..6714ada 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3939,6 +3939,11 @@ multiple fields.
 
 Example:  To sort on the second field, use @option{--key=2,2}
 (@option{-k 2,2}).  See below for more notes on keys and more examples.
+See also the @option{--debug} option to help determine the part
+of the line being used in the sort.
+
address@hidden --debug
+Highlight the portion of each line used for sorting.
 
 @item address@hidden
 @opindex --batch-size
diff --git a/src/sort.c b/src/sort.c
index 54b97e2..65866b9 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -34,6 +34,7 @@
 #include "hash.h"
 #include "ignore-value.h"
 #include "md5.h"
+#include "mbswidth.h"
 #include "physmem.h"
 #include "posixver.h"
 #include "quote.h"
@@ -291,6 +292,9 @@ static struct keyfield *keylist;
 /* Program used to (de)compress temp files.  Must accept -d.  */
 static char const *compress_program;
 
+/* Annotate the output with extra info to aid the user.  */
+static bool debug;
+
 /* Maximum number of files to merge in one go.  If more than this
    number are present, temp files will be used. */
 static unsigned int nmerge = NMERGE_DEFAULT;
@@ -371,6 +375,7 @@ Other options:\n\
   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
       --compress-program=PROG  compress temporaries with PROG;\n\
                               decompress them with PROG -d\n\
+      --debug               annotate the part of the line used to sort\n\
       --files0-from=F       read input from the files specified by\n\
                             NUL-terminated names in file F;\n\
                             If F is - then read names from standard input\n\
@@ -429,6 +434,7 @@ enum
 {
   CHECK_OPTION = CHAR_MAX + 1,
   COMPRESS_PROGRAM_OPTION,
+  DEBUG_PROGRAM_OPTION,
   FILES0_FROM_OPTION,
   NMERGE_OPTION,
   RANDOM_SOURCE_OPTION,
@@ -442,6 +448,7 @@ static struct option const long_options[] =
   {"ignore-leading-blanks", no_argument, NULL, 'b'},
   {"check", optional_argument, NULL, CHECK_OPTION},
   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
+  {"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
   {"dictionary-order", no_argument, NULL, 'd'},
   {"ignore-case", no_argument, NULL, 'f'},
   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
@@ -1111,13 +1118,6 @@ open_temp (const char *name, pid_t pid)
   return fp;
 }
 
-static void
-write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
-{
-  if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
-    die (_("write failed"), output_file);
-}
-
 /* Append DIR to the array of temporary directory names.  */
 static void
 add_temp_dir (char const *dir)
@@ -1734,30 +1734,19 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
     }
 }
 
-/* Compare strings A and B as numbers without explicitly converting them to
-   machine numbers.  Comparatively slow for short strings, but asymptotically
-   hideously fast. */
-
-static int
-numcompare (const char *a, const char *b)
-{
-  while (blanks[to_uchar (*a)])
-    a++;
-  while (blanks[to_uchar (*b)])
-    b++;
-
-  return strnumcmp (a, b, decimal_point, thousands_sep);
-}
-
 /* Exit with an error if a mixture of SI and IEC units detected.  */
 
-static void
+static bool
 check_mixed_SI_IEC (char prefix, struct keyfield *key)
 {
   int iec_present = prefix == 'i';
-  if (key->iec_present != -1 && iec_present != key->iec_present)
-    error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
-  key->iec_present = iec_present;
+  if (key)
+    {
+      if (key->iec_present != -1 && iec_present != key->iec_present)
+        error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
+      key->iec_present = iec_present;
+    }
+  return iec_present;
 }
 
 /* Return an integer which represents the order of magnitude of
@@ -1766,7 +1755,7 @@ check_mixed_SI_IEC (char prefix, struct keyfield *key)
    Negative numbers return a negative unit order.  */
 
 static int
-find_unit_order (const char *number, struct keyfield *key)
+find_unit_order (const char *number, struct keyfield *key, char const **endptr)
 {
   static const char orders [UCHAR_LIM] =
     {
@@ -1822,7 +1811,13 @@ find_unit_order (const char *number, struct keyfield *key)
 
   /* For valid units check for MiB vs MB etc.  */
   if (order)
-    check_mixed_SI_IEC (*(p + 1), key);
+    {
+      p++;
+      p += check_mixed_SI_IEC (*p, key);
+    }
+
+  if (endptr)
+    *endptr = p;
 
   return sign * order;
 }
@@ -1833,25 +1828,50 @@ find_unit_order (const char *number, struct keyfield *key)
    i.e. input will never have both 6000K and 5M.  */
 
 static int
-human_numcompare (const char *a, const char *b, struct keyfield *key)
+human_numcompare (const char *a, const char *b, struct keyfield *key,
+                  char const **ea)
 {
   while (blanks[to_uchar (*a)])
     a++;
   while (blanks[to_uchar (*b)])
     b++;
 
-  int order_a = find_unit_order (a, key);
-  int order_b = find_unit_order (b, key);
+  int order_a = find_unit_order (a, key, ea);
+  int order_b = find_unit_order (b, key, NULL);
 
   return (order_a > order_b ? 1
           : order_a < order_b ? -1
           : strnumcmp (a, b, decimal_point, thousands_sep));
 }
 
+/* Compare strings A and B as numbers without explicitly converting them to
+   machine numbers.  Comparatively slow for short strings, but asymptotically
+   hideously fast. */
+
 static int
-general_numcompare (const char *sa, const char *sb)
+numcompare (const char *a, const char *b, char const **ea)
+{
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  if (debug)
+    {
+      /* Approximate strnumcmp extents with find_unit_order.  */
+      if (find_unit_order (a, NULL, ea))
+        {
+          *ea -= 1; /* ignore the order letter */
+          *ea -= (**ea == 'i'); /* and IEC prefix */
+        }
+    }
+
+  return strnumcmp (a, b, decimal_point, thousands_sep);
+}
+
+static int
+general_numcompare (const char *sa, const char *sb, char const **ea)
 {
-  /* FIXME: add option to warn about failed conversions.  */
   /* FIXME: maybe add option to try expensive FP conversion
      only if A and B can't be compared more cheaply/accurately.  */
 
@@ -1863,13 +1883,12 @@ general_numcompare (const char *sa, const char *sb)
 # define strtold strtod
 #endif
 
-  char *ea;
   char *eb;
-  long_double a = strtold (sa, &ea);
+  long_double a = strtold (sa, (char **) ea);
   long_double b = strtold (sb, &eb);
 
   /* Put conversion errors at the start of the collating sequence.  */
-  if (sa == ea)
+  if (sa == *ea)
     return sb == eb ? 0 : -1;
   if (sb == eb)
     return 1;
@@ -1889,7 +1908,7 @@ general_numcompare (const char *sa, const char *sb)
    Return 0 if the name in S is not recognized.  */
 
 static int
-getmonth (char const *month, size_t len)
+getmonth (char const *month, size_t len, char const **ea)
 {
   size_t lo = 0;
   size_t hi = MONTHS_PER_YEAR;
@@ -1913,7 +1932,11 @@ getmonth (char const *month, size_t len)
       for (;; m++, n++)
         {
           if (!*n)
-            return monthtab[ix].val;
+            {
+              if (ea)
+                *ea = m;
+              return monthtab[ix].val;
+            }
           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
             {
               hi = ix;
@@ -2070,11 +2093,76 @@ compare_version (char *restrict texta, size_t lena,
   return diff;
 }
 
+/* For debug mode, count tabs in the passed string
+   so we can adjust the widths returned by mbswidth.
+   FIXME: Should we generally be counting non printable chars?  */
+
+static size_t
+count_tabs (char const *text, const size_t len)
+{
+  size_t tabs = 0;
+  size_t tlen = strnlen (text, len);
+
+  while (tlen--)
+    {
+      if (*text++ == '\t')
+        tabs++;
+    }
+
+  return tabs;
+}
+
+/* For debug mode, "underline" a key at the
+   specified offset and screen width.  */
+
+static void
+mark_key (size_t offset, size_t width)
+{
+  printf ("%*s", offset, "");
+
+  if (!width)
+    printf (_("^ no match for key\n"));
+  else
+    {
+      while (width--)
+        putchar ('_');
+      putchar ('\n');
+    }
+}
+
+/* For debug mode, determine the screen offset and width
+   to highlight for a key, and then output the highlight.  */
+
+static void
+debug_key (char const *sline, char const *sfield, char const *efield,
+           size_t flen, bool skipb)
+{
+  char const *sa = sfield;
+
+  if (skipb) /* This key type implicitly skips leading blanks.  */
+    {
+      while (sa < efield && blanks[to_uchar (*sa)])
+        {
+          sa++;
+          if (flen)
+            flen--; /* This assumes TABs same width as SPACEs.  */
+        }
+    }
+
+  size_t offset = mbsnwidth (sline, sfield - sline, 0) + (sa - sfield);
+  offset += count_tabs (sline, sfield - sline);
+
+  size_t width = mbsnwidth (sa, flen, 0);
+  width += count_tabs (sa, flen);
+
+  mark_key (offset, width);
+}
+
 /* Compare two lines A and B trying every key in sequence until there
    are no more keys or a difference is found. */
 
 static int
-keycompare (const struct line *a, const struct line *b)
+keycompare (const struct line *a, const struct line *b, bool show_debug)
 {
   struct keyfield *key = keylist;
 
@@ -2091,6 +2179,7 @@ keycompare (const struct line *a, const struct line *b)
     {
       char const *translate = key->translate;
       bool const *ignore = key->ignore;
+      bool skipb = false; /* Whether key type auto skips leading blanks.  */
 
       /* Treat field ends before field starts as empty fields.  */
       lima = MAX (texta, lima);
@@ -2107,21 +2196,42 @@ keycompare (const struct line *a, const struct line *b)
       else if (key->numeric || key->general_numeric || key->human_numeric)
         {
           char savea = *lima, saveb = *limb;
+          char const* ea = lima;
 
           *lima = *limb = '\0';
-          diff = (key->numeric ? numcompare (texta, textb)
-                  : key->general_numeric ? general_numcompare (texta, textb)
-                  : human_numcompare (texta, textb, key));
+          diff = (key->numeric ? numcompare (texta, textb, &ea)
+                  : key->general_numeric ? general_numcompare (texta, textb,
+                                                               &ea)
+                  : human_numcompare (texta, textb, key, &ea));
+          if (show_debug)
+            {
+              lena = ea - texta;
+              skipb = true;
+            }
           *lima = savea, *limb = saveb;
         }
       else if (key->version)
         diff = compare_version (texta, lena, textb, lenb);
       else if (key->month)
-        diff = getmonth (texta, lena) - getmonth (textb, lenb);
+        {
+          char const *ea = lima;
+
+          int amon = getmonth (texta, lena, &ea);
+          diff = amon - getmonth (textb, lenb, NULL);
+
+          if (show_debug)
+            {
+              lena = amon ? ea - texta : 0;
+              skipb = true;
+            }
+        }
       /* Sorting like this may become slow, so in a simple locale the user
          can select a faster sort that is similar to ascii sort.  */
       else if (hard_LC_COLLATE)
         {
+          /* FIXME: for debug, account for skipped chars, while handling mb chars.
+             Generally perhaps xmemfrm could be used to determine chars that are
+             excluded from the collating order?  */
           if (ignore || translate)
             {
               char buf[4000];
@@ -2165,6 +2275,8 @@ keycompare (const struct line *a, const struct line *b)
         }
       else if (ignore)
         {
+          char *savea = texta;
+
 #define CMP_WITH_IGNORE(A, B)						\
   do									\
     {									\
@@ -2192,6 +2304,10 @@ keycompare (const struct line *a, const struct line *b)
                              translate[to_uchar (*textb)]);
           else
             CMP_WITH_IGNORE (*texta, *textb);
+
+          /* We only need to restore this for debug_key
+             in which case the keys being compared are equal.  */
+          texta = savea;
         }
       else if (lena == 0)
         diff = - NONZERO (lenb);
@@ -2201,6 +2317,8 @@ keycompare (const struct line *a, const struct line *b)
         {
           if (translate)
             {
+              char *savea = texta;
+
               while (texta < lima && textb < limb)
                 {
                   diff = (to_uchar (translate[to_uchar (*texta++)])
@@ -2208,6 +2326,10 @@ keycompare (const struct line *a, const struct line *b)
                   if (diff)
                     goto not_equal;
                 }
+
+              /* We only need to restore this for debug_key
+                 in which case the keys being compared are equal.  */
+              texta = savea;
             }
           else
             {
@@ -2221,6 +2343,9 @@ keycompare (const struct line *a, const struct line *b)
       if (diff)
         goto not_equal;
 
+      if (show_debug)
+        debug_key (a->text, texta, lima, lena, skipb);
+
       key = key->next;
       if (! key)
         break;
@@ -2258,7 +2383,7 @@ keycompare (const struct line *a, const struct line *b)
    depending on whether A compares less than, equal to, or greater than B. */
 
 static int
-compare (const struct line *a, const struct line *b)
+compare (const struct line *a, const struct line *b, bool show_debug)
 {
   int diff;
   size_t alen, blen;
@@ -2268,7 +2393,7 @@ compare (const struct line *a, const struct line *b)
      and unadorned sort -r. */
   if (keylist)
     {
-      diff = keycompare (a, b);
+      diff = keycompare (a, b, show_debug);
       if (diff || unique || stable)
         return diff;
     }
@@ -2277,6 +2402,9 @@ compare (const struct line *a, const struct line *b)
      fall through to the default comparison.  */
   alen = a->length - 1, blen = b->length - 1;
 
+  if (show_debug)
+    debug_key (a->text, a->text, a->text + alen, alen, false);
+
   if (alen == 0)
     diff = - NONZERO (blen);
   else if (blen == 0)
@@ -2289,6 +2417,38 @@ compare (const struct line *a, const struct line *b)
   return reverse ? -diff : diff;
 }
 
+static void
+write_bytes (const struct line *line, FILE *fp, char const *output_file)
+{
+  char const *buf = line->text;
+  size_t n_bytes = line->length;
+
+  /* Convert TABs to '>' and \0 to \n when -z specified.  */
+  if (debug && fp == stdout)
+    {
+      char const *ebuf = buf + n_bytes;
+      char const *c = buf;
+
+      while (c < ebuf)
+        {
+          char wc = *c++;
+          if (wc == '\t')
+            wc = '>';
+          else if (wc == 0 && eolchar == 0)
+            wc = '\n';
+          if (fputc (wc, fp) == EOF)
+            die (_("write failed"), output_file);
+        }
+
+      compare (line, line, true);
+    }
+  else
+    {
+      if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
+        die (_("write failed"), output_file);
+    }
+}
+
 /* Check that the lines read from FILE_NAME come in order.  Return
    true if they are in order.  If CHECKONLY == 'c', also print a
    diagnostic (FILE_NAME, line number, contents of line) to stderr if
@@ -2317,7 +2477,7 @@ check (char const *file_name, char checkonly)
 
       /* Make sure the line saved from the old buffer contents is
          less than or equal to the first line of the new buffer. */
-      if (alloc && nonunique <= compare (&temp, line - 1))
+      if (alloc && nonunique <= compare (&temp, line - 1, false))
         {
         found_disorder:
           {
@@ -2330,8 +2490,10 @@ check (char const *file_name, char checkonly)
                 fprintf (stderr, _("%s: %s:%s: disorder: "),
                          program_name, file_name,
                          umaxtostr (disorder_line_number, hr_buf));
-                write_bytes (disorder_line->text, disorder_line->length,
-                             stderr, _("standard error"));
+                if (debug)
+                  fputc ('\n', stderr);
+                write_bytes (disorder_line, debug ? stdout : stderr,
+                             debug ? _("standard out") : _("standard error"));
               }
 
             ordered = false;
@@ -2341,7 +2503,7 @@ check (char const *file_name, char checkonly)
 
       /* Compare each line in the buffer with its successor.  */
       while (linebase < --line)
-        if (nonunique <= compare (line, line - 1))
+        if (nonunique <= compare (line, line - 1, false))
           goto found_disorder;
 
       line_number += buf.nlines;
@@ -2470,7 +2632,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
   for (i = 0; i < nfiles; ++i)
     ord[i] = i;
   for (i = 1; i < nfiles; ++i)
-    if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
+    if (0 < compare (cur[ord[i - 1]], cur[ord[i]], false))
       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
 
   /* Repeatedly output the smallest line until no input remains. */
@@ -2482,10 +2644,10 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
          an identical series of lines. */
       if (unique)
         {
-          if (savedline && compare (savedline, smallest))
+          if (savedline && compare (savedline, smallest, false))
             {
               savedline = NULL;
-              write_bytes (saved.text, saved.length, ofp, output_file);
+              write_bytes (&saved, ofp, output_file);
             }
           if (!savedline)
             {
@@ -2514,7 +2676,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
             }
         }
       else
-        write_bytes (smallest->text, smallest->length, ofp, output_file);
+        write_bytes (smallest, ofp, output_file);
 
       /* Check if we need to read more lines into core. */
       if (base[ord[0]] < smallest)
@@ -2568,7 +2730,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
 
         while (lo < hi)
           {
-            int cmp = compare (cur[ord0], cur[ord[probe]]);
+            int cmp = compare (cur[ord0], cur[ord[probe]], false);
             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
               hi = probe;
             else
@@ -2589,7 +2751,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
 
   if (unique && savedline)
     {
-      write_bytes (saved.text, saved.length, ofp, output_file);
+      write_bytes (&saved, ofp, output_file);
       free (saved.text);
     }
 
@@ -2634,7 +2796,7 @@ mergelines (struct line *t,
             struct line const *hi, size_t nhi)
 {
   for (;;)
-    if (compare (lo - 1, hi - 1) <= 0)
+    if (compare (lo - 1, hi - 1, false) <= 0)
       {
         *--t = *--lo;
         if (! --nlo)
@@ -2676,7 +2838,7 @@ sortlines (struct line *lines, size_t nlines, struct line *temp)
 {
   if (nlines == 2)
     {
-      if (0 < compare (&lines[-1], &lines[-2]))
+      if (0 < compare (&lines[-1], &lines[-2], false))
         {
           struct line tmp = lines[-1];
           lines[-1] = lines[-2];
@@ -2712,7 +2874,7 @@ sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
       /* Declare `swap' as int, not bool, to work around a bug
          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
-      int swap = (0 < compare (&lines[-1], &lines[-2]));
+      int swap = (0 < compare (&lines[-1], &lines[-2], false));
       temp[-1] = lines[-1 - swap];
       temp[-2] = lines[-2 + swap];
     }
@@ -2994,9 +3156,9 @@ sort (char * const *files, size_t nfiles, char const *output_file)
           do
             {
               line--;
-              write_bytes (line->text, line->length, tfp, temp_output);
+              write_bytes (line, tfp, temp_output);
               if (unique)
-                while (linebase < line && compare (line, line - 1) == 0)
+                while (linebase < line && compare (line, line - 1, false) == 0)
                   line--;
             }
           while (linebase < line);
@@ -3459,6 +3621,10 @@ main (int argc, char **argv)
           compress_program = optarg;
           break;
 
+        case DEBUG_PROGRAM_OPTION:
+          debug = true;
+          break;
+
         case FILES0_FROM_OPTION:
           files_from = optarg;
           break;
@@ -3715,6 +3881,9 @@ main (int argc, char **argv)
 
   check_ordering_compatibility ();
 
+  if (debug && outfile)
+    error (SORT_FAILURE, 0, _("options -o and --debug are incompatible"));
+
   reverse = gkey.reverse;
 
   if (need_random)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1049b2b..46d388a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -224,6 +224,7 @@ TESTS =						\
   misc/sort					\
   misc/sort-compress				\
   misc/sort-continue				\
+  misc/sort-debug-keys				\
   misc/sort-files0-from				\
   misc/sort-float				\
   misc/sort-merge				\
diff --git a/tests/misc/sort-debug-keys b/tests/misc/sort-debug-keys
new file mode 100755
index 0000000..0437678
--- /dev/null
+++ b/tests/misc/sort-debug-keys
@@ -0,0 +1,317 @@
+#!/bin/sh
+# Test annotation of sort keys
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if test "$VERBOSE" = yes; then
+  set -x
+  sort --version
+fi
+
+. $srcdir/test-lib.sh
+
+number() { cat -n | sed 's/^ *//'; }
+
+cat <<\EOF > exp
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+
+^ no match for key
+1
+_
+2
+_
+33
+__
+44
+__
+2>
+  ^ no match for key
+3>1
+  _
+1>2
+  _
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+
+^ no match for key
+1
+_
+2
+_
+33
+__
+44
+__
+2>
+  ^ no match for key
+3>1
+  _
+1>2
+  _
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+1
+ ^ no match for key
+
+^ no match for key
+44
+  ^ no match for key
+33
+  ^ no match for key
+2
+ ^ no match for key
+
+^ no match for key
+1
+_
+2
+_
+33
+__
+44
+__
+2>
+  ^ no match for key
+3>1
+  _
+1>2
+  _
+
+^ no match for key
+JAN
+___
+FEB
+___
+FEB
+   ^ no match for key
+
+^ no match for key
+JAN
+   ^ no match for key
+JAZZ
+^ no match for key
+
+^ no match for key
+JAN
+___
+FEB
+___
+2>JAZZ
+  ^ no match for key
+3>
+  ^ no match for key
+4>JAN
+  ___
+1>FEB
+  ___
+
+^ no match for key
+JANZ
+___
+JAN
+___
+FEB
+___
+3>
+  ^ no match for key
+2>JANZ
+  ___
+4>JAN
+  ___
+1>FEB
+  ___
+ 1.2ignore
+ ___
+ 1.1e4ignore
+ _____
+>>a
+___
+>b
+__
+a
+ ^ no match for key
+
+^ no match for key
+a
+_
+b
+_
+-3
+__
+-2
+__
+-0
+__
+--Mi-1
+_
+-0
+__
+1
+_
+ 1
+ _
+__
+1
+_
+_
+ 1
+ _
+1
+_
+ 1
+__
+1
+_
+2,5
+_
+2.4
+___
+2.,,3
+_
+2.4
+___
+2,,3
+_
+2.4
+___
+1a
+_
+2b
+_
+EOF
+
+(
+for type in n h g; do
+  printf "1\n\n44\n33\n2\n" | sort -s -k2$type --debug
+  printf "1\n\n44\n33\n2\n" | sort -s -k1.3$type --debug
+  printf "1\n\n44\n33\n2\n" | sort -s -k1$type --debug
+  printf "2\n\n1\n" | number | sort -s -k2g --debug
+done
+
+printf "FEB\n\nJAN\n" | sort -s -k1M --debug
+printf "FEB\n\nJAN\n" | sort -s -k2,2M --debug
+printf "FEB\nJAZZ\n\nJAN\n" | sort -s -k1M --debug
+printf "FEB\nJAZZ\n\nJAN\n" | number | sort -s -k2,2M --debug
+printf "FEB\nJANZ\n\nJAN\n" | sort -s -k1M --debug
+printf "FEB\nJANZ\n\nJAN\n" | number | sort -s -k2,2M --debug
+
+printf " 1.2ignore\n 1.1e4ignore\n" | sort -s -g --debug
+
+printf "\tb\n\t\ta\n" | sort -s -d --debug # ignore = 1
+
+printf "a\n\n" | sort -s -k2,2 --debug #lena = 0
+
+printf "b\na\n" | sort -s -k1 --debug #otherwise key compare
+
+printf -- "-0\n1\n-2\n--Mi-1\n-3\n-0\n" | sort -s --debug -k1,1h
+
+printf " 1\n1\n" | sort -b --debug
+printf " 1\n1\n" | sort -sb --debug
+printf " 1\n1\n" | sort --debug
+
+# strnumcmp is a bit weird, so we don't match exactly
+printf "2,5\n2.4\n" | sort -s -k1n --debug
+printf "2.,,3\n2.4\n" | sort -s -k1n --debug
+printf "2,,3\n2.4\n" | sort -s -k1n --debug
+
+# -z means we convert \0 to \n
+printf "1a\x002b\x00" | sort -s -n -z --debug
+) > out
+
+compare out exp || fail=1
+
+cat <<\EOF > exp
+   1Â²---++3   1,234  Mi
+               _
+   _________
+________________________
+   1Â²---++3   1,234  Mi
+              _____
+   ________
+_______________________
++1234 1234Gi 1,234M
+^ no match for key
+_____
+^ no match for key
+      ____
+      ____
+      ______
+             _____
+             _____
+             ______
+___________________
+EOF
+
+(
+if test "$LOCALE_FR_UTF8"; then
+  echo "   1Â²---++3   1,234  Mi" |
+    LC_ALL=C sort --debug -k2g -k1b,1
+  echo "   1Â²---++3   1,234  Mi" |
+    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
+  echo "+1234 1234Gi 1,234M" |
+    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
+    -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
+fi
+) > out
+
+compare out exp || fail=1
+
+Exit $fail
-- 
1.6.2.5