coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: RFE: hash-type in sum utils


From: Jim Meyering
Subject: Re: RFE: hash-type in sum utils
Date: Wed, 01 Aug 2012 13:08:26 +0200

Ondrej Oprala wrote:
> Here's the patch.

Thanks.
I've adjusted the NEWS patch so that it only inserts the
new 3-line entry (rather than also removing many others),
and adjusted the wording slightly:

diff --git a/NEWS b/NEWS
index f1255ea..51ebc63 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,10 @@ GNU coreutils NEWS                                    -*- 
outline -*-

 ** New features

+  md5sum now accepts the --tag option to print BSD-style output with GNU
+  file name escaping.  This also affects sha1sum, sha224sum, sha256sum,
+  sha384sum and sha512sum.
+
   stat -f recognizes the new remote file system types: aufs, panfs.

 ** Changes in behavior
-----------------------------------------------

I noticed that "make syntax-check" failed with this:

                      fputs(" (", stdout);
    *** src/md5sum.c
    maint.mk: the above files lack a space-before-open-paren
    make: *** [sc_space_before_open_paren] Error 1
    make: *** Waiting for unfinished jobs....

That prompted me to look for other formatting problems.
There was an improperly indented line and a stray empty line.
Here are the fixes:
(but see below, "Finally...", for a complete change-set)

diff --git a/src/md5sum.c b/src/md5sum.c
index eecf2c7..735e8ed 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -356,7 +356,6 @@ split_3 (char *s, size_t s_len,
   else if (escaped_filename && (s[i] == ' ' || s[i + 1] == ' '))
     return false;

-
   /* Ignore this line if it is too short.
      Each line must have at least 'min_digest_line_length - 1' (or one more, if
      the first is a backslash) more characters to contain correct message 
digest
@@ -364,7 +363,7 @@ split_3 (char *s, size_t s_len,
   if (s_len - i < min_digest_line_length + (s[i] == '\\'))
     return false;

-    *hex_digest = (unsigned char *) &s[i];
+  *hex_digest = (unsigned char *) &s[i];

   /* The first field has to be the n-character hexadecimal
      representation of the message digest.  If it is not followed
@@ -815,7 +814,7 @@ main (int argc, char **argv)
                   if (!file_is_binary)
                     putchar (' ');
                   fputs (DIGEST_TYPE_STRING, stdout);
-                  fputs(" (", stdout);
+                  fputs (" (", stdout);
                   print_filename (file);
                   fputs (") = ", stdout);
                 }
-----------------------------------------------

Your patch lacks a commit log.
Also, any time we add a new option, we must document it
in two places: in --help output (i.e., the usage function)
and in doc/coreutils.texi.  The --help addition will be
minimal, and coreutils.texi will give more detail, e.g.,
explaining how md5sum --tag output differs from that produced
by the BSD md5 program.

Thanks for adding the tests.  Are you willing to add a few more
to get proper coverage of this new feature?  For example,
we'll need cases that show what happens with files whose
names contain backslash and newline, as well as what
md5sum --check does with inputs where the name is escaped
but contains say "\t" or ends in a single backslash.

In rewriting the description of your filename_unescape
function, I realized that it could be simpler, so eliminated its
third parameter and made it return S (or NULL) instead of a boolean.

diff --git a/src/md5sum.c b/src/md5sum.c
index 735e8ed..22efac0 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -217,16 +217,17 @@ space for text), and name for each FILE.\n"),

 #define ISWHITE(c) ((c) == ' ' || (c) == '\t')

-/* Translate each '\n' string in the file name beginning
-   at string S (of length S_LEN)  to a NEWLINE,
-   and each '\\' string to a backslash; FILE_NAME becoming
-   the pointer used to print the actual file name. Return
-   true unless file name is invalid.  */
-
-static bool
-filename_unescape (char *s, size_t s_len, char **file_name)
+/* Given a file name, S of length S_LEN, that is not NUL-terminated,
+   modify it in place, performing the equivalent of this sed substitution:
+   's/\\n/\n/g;s/\\\\/\\/g' i.e., replacing each "\\n" string with a newline
+   and each "\\\\" with a single backslash, NUL-terminate it and return S.
+   If S is not a valid escaped file name, i.e., if it ends with an odd number
+   of backslashes or if it contains a backslash followed by anything other
+   than "n" or another backslash, return NULL.  */
+
+static char *
+filename_unescape (char *s, size_t s_len)
 {
-
   char *dst = s;
   size_t i = 0;

@@ -238,7 +239,7 @@ filename_unescape (char *s, size_t s_len, char **file_name)
           if (i == s_len - 1)
             {
               /* A valid line does not end with a backslash.  */
-              return false;
+              return NULL;
             }
           ++i;
           switch (s[i++])
@@ -251,13 +252,13 @@ filename_unescape (char *s, size_t s_len, char 
**file_name)
               break;
             default:
               /* Only '\' or 'n' may follow a backslash.  */
-              return false;
+              return NULL;
             }
           break;

         case '\0':
           /* The file name may not contain a NUL.  */
-          return false;
+          return NULL;

         default:
           *dst++ = s[i++];
@@ -266,9 +267,7 @@ filename_unescape (char *s, size_t s_len, char **file_name)
     }
   *dst = '\0';

-  *file_name = s;
-
-  return true;
+  return s;
 }

 /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
@@ -284,7 +283,7 @@ bsd_split_3 (char *s, size_t s_len, unsigned char 
**hex_digest,
   if (s_len == 0)
     return false;
   /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
-     filenames, so search backwards for the last ')'. */
+     file names, so search backwards for the last ')'.  */
   i = s_len - 1;
   while (i && s[i] != ')')
     i--;
@@ -294,9 +293,8 @@ bsd_split_3 (char *s, size_t s_len, unsigned char 
**hex_digest,

   *file_name = s;

-  if (escaped_filename)
-    if (!filename_unescape (s, i, file_name))
-      return false;
+  if (escaped_filename && filename_unescape (s, i) == NULL)
+    return false;

   s[i++] = '\0';

@@ -398,7 +396,7 @@ split_3 (char *s, size_t s_len,
   *file_name = &s[i];

   if (escaped_filename)
-    return filename_unescape (&s[i], s_len - i, file_name);
+    return filename_unescape (&s[i], s_len - i) != NULL;

   return true;
 }

===============================================================
Also, here's a change to make filename_unescape use a for-loop,
since factoring out the two bottom-of-loop "i++" expressions is cleaner:

diff --git a/src/md5sum.c b/src/md5sum.c
index 22efac0..5fdc98a 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -229,20 +229,19 @@ static char *
 filename_unescape (char *s, size_t s_len)
 {
   char *dst = s;
-  size_t i = 0;

-  while (i < s_len)
+  for (size_t i = 0; i < s_len; i++)
     {
       switch (s[i])
         {
         case '\\':
           if (i == s_len - 1)
             {
-              /* A valid line does not end with a backslash.  */
+              /* File name ends with an unescaped backslash: invalid.  */
               return NULL;
             }
           ++i;
-          switch (s[i++])
+          switch (s[i])
             {
             case 'n':
               *dst++ = '\n';
@@ -261,7 +260,7 @@ filename_unescape (char *s, size_t s_len)
           return NULL;

         default:
-          *dst++ = s[i++];
+          *dst++ = s[i];
           break;
         }
     }


===============================================================
Finally, here's a change-set relative to the latest in git
that you can apply using "git am FILE".
Please use this as a starting point for any additional changes.

>From c199ef3890e1eb8679a2577cabec34801e276b09 Mon Sep 17 00:00:00 2001
From: Ondrej Oprala <address@hidden>
Date: Tue, 31 Jul 2012 13:57:48 +0200
Subject: [PATCH] md5sum: add the --tag option to *sum utils for BSD-like
 output

* src/md5sum.c (filename_unescape): New function.
FIXME: say more here
* NEWS (New features): Mention it.
* tests/misc/md5sum-bsd: Add tests.
---
 NEWS                  |   4 +
 src/md5sum.c          | 202 +++++++++++++++++++++++++++++++-------------------
 tests/misc/md5sum-bsd |  11 +++
 3 files changed, 139 insertions(+), 78 deletions(-)

diff --git a/NEWS b/NEWS
index f1255ea..51ebc63 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,10 @@ GNU coreutils NEWS                                    -*- 
outline -*-

 ** New features

+  md5sum now accepts the --tag option to print BSD-style output with GNU
+  file name escaping.  This also affects sha1sum, sha224sum, sha256sum,
+  sha384sum and sha512sum.
+
   stat -f recognizes the new remote file system types: aufs, panfs.

 ** Changes in behavior
diff --git a/src/md5sum.c b/src/md5sum.c
index f7e0849..5fdc98a 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -135,7 +135,8 @@ enum
 {
   STATUS_OPTION = CHAR_MAX + 1,
   QUIET_OPTION,
-  STRICT_OPTION
+  STRICT_OPTION,
+  TAG_OPTION
 };

 static struct option const long_options[] =
@@ -147,6 +148,7 @@ static struct option const long_options[] =
   { "text", no_argument, NULL, 't' },
   { "warn", no_argument, NULL, 'w' },
   { "strict", no_argument, NULL, STRICT_OPTION },
+  { "tag", no_argument, NULL, TAG_OPTION },
   { GETOPT_HELP_OPTION_DECL },
   { GETOPT_VERSION_OPTION_DECL },
   { NULL, 0, NULL, 0 }
@@ -215,23 +217,72 @@ space for text), and name for each FILE.\n"),

 #define ISWHITE(c) ((c) == ' ' || (c) == '\t')

+/* Given a file name, S of length S_LEN, that is not NUL-terminated,
+   modify it in place, performing the equivalent of this sed substitution:
+   's/\\n/\n/g;s/\\\\/\\/g' i.e., replacing each "\\n" string with a newline
+   and each "\\\\" with a single backslash, NUL-terminate it and return S.
+   If S is not a valid escaped file name, i.e., if it ends with an odd number
+   of backslashes or if it contains a backslash followed by anything other
+   than "n" or another backslash, return NULL.  */
+
+static char *
+filename_unescape (char *s, size_t s_len)
+{
+  char *dst = s;
+
+  for (size_t i = 0; i < s_len; i++)
+    {
+      switch (s[i])
+        {
+        case '\\':
+          if (i == s_len - 1)
+            {
+              /* File name ends with an unescaped backslash: invalid.  */
+              return NULL;
+            }
+          ++i;
+          switch (s[i])
+            {
+            case 'n':
+              *dst++ = '\n';
+              break;
+            case '\\':
+              *dst++ = '\\';
+              break;
+            default:
+              /* Only '\' or 'n' may follow a backslash.  */
+              return NULL;
+            }
+          break;
+
+        case '\0':
+          /* The file name may not contain a NUL.  */
+          return NULL;
+
+        default:
+          *dst++ = s[i];
+          break;
+        }
+    }
+  *dst = '\0';
+
+  return s;
+}
+
 /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
    'sha1' command into two parts: a hexadecimal digest, and the file
    name.  S is modified.  Return true if successful.  */

 static bool
 bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
-             char **file_name)
+             char **file_name, bool escaped_filename)
 {
   size_t i;

   if (s_len == 0)
     return false;
-
-  *file_name = s;
-
   /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
-     filenames, so search backwards for the last ')'. */
+     file names, so search backwards for the last ')'.  */
   i = s_len - 1;
   while (i && s[i] != ')')
     i--;
@@ -239,6 +290,11 @@ bsd_split_3 (char *s, size_t s_len, unsigned char 
**hex_digest,
   if (s[i] != ')')
     return false;

+  *file_name = s;
+
+  if (escaped_filename && filename_unescape (s, i) == NULL)
+    return false;
+
   s[i++] = '\0';

   while (ISWHITE (s[i]))
@@ -271,7 +327,16 @@ split_3 (char *s, size_t s_len,
   while (ISWHITE (s[i]))
     ++i;

+  if (s[i] == '\\')
+    {
+      ++i;
+      escaped_filename = true;
+    }
+
   /* Check for BSD-style checksum line. */
+  if (s[i] == ' ')
+    ++i;
+
   algo_name_len = strlen (DIGEST_TYPE_STRING);
   if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
     {
@@ -282,9 +347,11 @@ split_3 (char *s, size_t s_len,
           *binary = 0;
           return bsd_split_3 (s +      i + algo_name_len + 1,
                               s_len - (i + algo_name_len + 1),
-                              hex_digest, file_name);
+                              hex_digest, file_name, escaped_filename);
         }
     }
+  else if (escaped_filename && (s[i] == ' ' || s[i + 1] == ' '))
+    return false;

   /* Ignore this line if it is too short.
      Each line must have at least 'min_digest_line_length - 1' (or one more, if
@@ -293,11 +360,6 @@ split_3 (char *s, size_t s_len,
   if (s_len - i < min_digest_line_length + (s[i] == '\\'))
     return false;

-  if (s[i] == '\\')
-    {
-      ++i;
-      escaped_filename = true;
-    }
   *hex_digest = (unsigned char *) &s[i];

   /* The first field has to be the n-character hexadecimal
@@ -333,49 +395,8 @@ split_3 (char *s, size_t s_len,
   *file_name = &s[i];

   if (escaped_filename)
-    {
-      /* Translate each '\n' string in the file name to a NEWLINE,
-         and each '\\' string to a backslash.  */
-
-      char *dst = &s[i];
-
-      while (i < s_len)
-        {
-          switch (s[i])
-            {
-            case '\\':
-              if (i == s_len - 1)
-                {
-                  /* A valid line does not end with a backslash.  */
-                  return false;
-                }
-              ++i;
-              switch (s[i++])
-                {
-                case 'n':
-                  *dst++ = '\n';
-                  break;
-                case '\\':
-                  *dst++ = '\\';
-                  break;
-                default:
-                  /* Only '\' or 'n' may follow a backslash.  */
-                  return false;
-                }
-              break;
+    return filename_unescape (&s[i], s_len - i) != NULL;

-            case '\0':
-              /* The file name may not contain a NUL.  */
-              return false;
-              break;
-
-            default:
-              *dst++ = s[i++];
-              break;
-            }
-        }
-      *dst = '\0';
-    }
   return true;
 }

@@ -636,6 +657,31 @@ digest_check (const char *checkfile_name)
           && (!strict || n_improperly_formatted_lines == 0));
 }

+static void
+print_filename (char const *file)
+{
+  /* Translate each NEWLINE byte to the string, "\\n",
+     and each backslash to "\\\\".  */
+  while (*file)
+    {
+      switch (*file)
+        {
+        case '\n':
+          fputs ("\\n", stdout);
+          break;
+
+        case '\\':
+          fputs ("\\\\", stdout);
+          break;
+
+        default:
+          putchar (*file);
+          break;
+        }
+      file++;
+    }
+}
+
 int
 main (int argc, char **argv)
 {
@@ -646,6 +692,7 @@ main (int argc, char **argv)
   int opt;
   bool ok = true;
   int binary = -1;
+  bool prefix_tag = false;

   /* Setting values of global variables.  */
   initialize_main (&argc, &argv);
@@ -690,6 +737,9 @@ main (int argc, char **argv)
       case STRICT_OPTION:
         strict = true;
         break;
+      case TAG_OPTION:
+        prefix_tag = true;
+        break;
       case_GETOPT_HELP_CHAR;
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
       default:
@@ -754,41 +804,37 @@ main (int argc, char **argv)
             ok = false;
           else
             {
+              if (prefix_tag)
+                {
+                  if (strchr (file, '\n') || strchr (file, '\\'))
+                    putchar ('\\');
+                  if (!file_is_binary)
+                    putchar (' ');
+                  fputs (DIGEST_TYPE_STRING, stdout);
+                  fputs (" (", stdout);
+                  print_filename (file);
+                  fputs (") = ", stdout);
+                }
+
               size_t i;

               /* Output a leading backslash if the file name contains
                  a newline or backslash.  */
-              if (strchr (file, '\n') || strchr (file, '\\'))
+              if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\')))
                 putchar ('\\');

               for (i = 0; i < (digest_hex_bytes / 2); ++i)
                 printf ("%02x", bin_buffer[i]);

-              putchar (' ');
-              if (file_is_binary)
-                putchar ('*');
-              else
-                putchar (' ');
-
-              /* Translate each NEWLINE byte to the string, "\\n",
-                 and each backslash to "\\\\".  */
-              for (i = 0; i < strlen (file); ++i)
+              if (!prefix_tag)
                 {
-                  switch (file[i])
-                    {
-                    case '\n':
-                      fputs ("\\n", stdout);
-                      break;
-
-                    case '\\':
-                      fputs ("\\\\", stdout);
-                      break;
-
-                    default:
-                      putchar (file[i]);
-                      break;
-                    }
+                  putchar (' ');
+
+                  putchar (file_is_binary ? '*' : ' ');
+
+                  print_filename (file);
                 }
+
               putchar ('\n');
             }
         }
diff --git a/tests/misc/md5sum-bsd b/tests/misc/md5sum-bsd
index 8226d7a..6497933 100755
--- a/tests/misc/md5sum-bsd
+++ b/tests/misc/md5sum-bsd
@@ -38,4 +38,15 @@ md5sum --strict -c check.md5 || fail=1
 # an option to avoid the ambiguity.
 tail -n+2 check.md5 | md5sum --strict -c && fail=1

+#--tag option test
+
+for i in 'a' ' b' '*c' 'dd' ' '; do
+  echo "$i" > "$i"
+  md5sum --tag "$i" >> check.md5sum
+done
+sed 's/  / /' check.md5sum > check.md5
+
+md5sum --strict -c check.md5sum || fail=1
+md5sum --strict -c check.md5 || fail=1
+
 Exit $fail
--
1.7.12.rc1.10.g97c7934



reply via email to

[Prev in Thread] Current Thread [Next in Thread]