bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: canonicalize_file_name does not support MS-Windows style file names


From: Eli Zaretskii
Subject: Re: canonicalize_file_name does not support MS-Windows style file names
Date: Mon, 19 Nov 2012 20:02:52 +0200

> From: Mark H Weaver <address@hidden>
> Cc: Paul Eggert <address@hidden>, address@hidden,  address@hidden
> Date: Wed, 07 Nov 2012 22:49:36 -0500
> 
> Eli Zaretskii <address@hidden> writes:
> >> Eli, I see that Bruno asked for a set of Windows-syntax filenames to
> >> extend the unit test.  Would you be willing to work on that?
> >
> > I can come up with file names that could be used to test the code, if
> > this will help, but I won't have resources to make a complete patch
> > for the unit test program(s), sorry.
> 
> That would be very helpful!

Sorry for the delay.  I found that the sources changed in gnulib since
I patched them, so I went back and redid the changes relative to the
current version.  You will find below, in addition to the test cases
and the test program I used, also patches for both canonicalize-lgpl.c
and canonicalize.c.

> Hopefully the rest will be an easy job for someone familiar with the
> gnulib unit tests, but if no one steps forward, I'll do it.
> 
> > I just hope that whatever I submit in addition to what I already did
> > will not be left collecting dust for another year.
> 
> I'll do what I can to prevent that outcome.  Hopefully Paul can help us
> get your patch committed.

Thanks.  Looking forward to see this resolved.

Here are the test cases I tried and their results.  My current
directory when running these tests was d:\usr\eli\utils\CURDIR.

     'foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     '.\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     './foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     '..\foo.bar' => 'd:\usr\eli\utils\foo.bar'
     '../foo.bar' => 'd:\usr\eli\utils\foo.bar'
     '../CURDIR/foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     '..\CURDIR\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     '..\CURDIR/foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     '..\CURDIR\foo.bar\' => '(null)'
     '../CURDIR\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar'
     'C:WINDOWS' => '(null)'
     'C:system32' => '(null)'
     'C:/WINDOWS' => 'C:/WINDOWS'
     'C:\WINDOWS' => 'C:/WINDOWS'
     'C:\\WINDOWS' => 'C:/WINDOWS'
     'C:./WINDOWS' => '(null)'
     'C:./system32' => '(null)'
     'C:.\WINDOWS' => '(null)'
     'C:.\system32' => '(null)'
     'C:.' => 'd:\usr\eli\utils\lib'
     'C:/' => 'C:/'
     'C:\' => 'C:/'

The "(null)" results are failures, and they all mean one thing: the
patched functions still do not support drive-relative file names.  I
don't consider it a significant omission, since these names are very
rare, and OTOH supporting them would require much more invasive
changes in the original gnulib code.  So I decided to omit that.

Here's the test program I used:

  /* Before running, type: "cd C:\WINDOWS" at the shell prompt.  */

  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <string.h>

  extern char * canonicalize_file_name (const char *);

  int
  main (void)
  {
    const char *test_fname[] = {
      "foo.bar",
      ".\\foo.bar",
      "./foo.bar",
      "..\\foo.bar",
      "../foo.bar",
      "../%s/foo.bar",
      "..\\%s\\foo.bar",
      "..\\%s/foo.bar",
      "..\\%s\\foo.bar\\",
      "../%s\\foo.bar",
      "C:WINDOWS",
      "C:system32",
      "C:/WINDOWS",
      "C:\\WINDOWS",
      "C:\\\\WINDOWS",
      "C:./WINDOWS",
      "C:./system32",
      "C:.\\WINDOWS",
      "C:.\\system32",
      "C:.",
      "C:/",
      "C:\\"
    };
    int i;
    char curdir[MAX_PATH], *pc;
    getcwd (curdir, MAX_PATH);
    for (pc = curdir + strlen (curdir); !ISSLASH (pc[-1]); pc--)
      ;

    for (i = 0; i < sizeof (test_fname) / sizeof (test_fname[0]); i++)
      {
        char fname[MAX_PATH];

        sprintf (fname, test_fname[i], pc);
        printf ("'%s' => '%s'\n", fname, canonicalize_file_name (fname));
      }

    return 0;
  }

Here are the patched to the two gnulib source files:

--- canonicalize-lgpl.c~0       2012-11-19 07:28:15.584793000 +0200
+++ canonicalize-lgpl.c 2012-11-19 08:31:46.886703100 +0200
@@ -51,6 +51,7 @@
 # define __realpath realpath
 # include "pathmax.h"
 # include "malloca.h"
+# include "dosname.h"
 # if HAVE_GETCWD
 #  if IN_RELOCWRAPPER
     /* When building the relocatable program wrapper, use the system's getcwd
@@ -131,6 +132,7 @@
   const char *start, *end, *rpath_limit;
   long int path_max;
   int num_links = 0;
+  size_t prefix_len;
 
   if (name == NULL)
     {
@@ -173,7 +175,11 @@
     rpath = resolved;
   rpath_limit = rpath + path_max;
 
-  if (name[0] != '/')
+  /* This is always zero for Posix hosts, but can be 2 for MS-Windows
+     and MS-DOS X:/foo/bar file names.  */
+  prefix_len = FILE_SYSTEM_PREFIX_LEN (name);
+
+  if (!IS_ABSOLUTE_FILE_NAME (name))
     {
       if (!__getcwd (rpath, path_max))
         {
@@ -184,17 +190,22 @@
     }
   else
     {
-      rpath[0] = '/';
-      dest = rpath + 1;
+      dest = rpath;
+      if (prefix_len)
+       {
+         memcpy (rpath, name, prefix_len);
+         dest += prefix_len;
+       }
+      *dest++ = '/';
       if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
         {
-          if (name[1] == '/' && name[2] != '/')
+          if (ISSLASH (name[1]) && !ISSLASH (name[2]) && !prefix_len)
             *dest++ = '/';
           *dest = '\0';
         }
     }
 
-  for (start = end = name; *start; start = end)
+  for (start = end = name + prefix_len; *start; start = end)
     {
 #ifdef _LIBC
       struct stat64 st;
@@ -204,11 +215,11 @@
       int n;
 
       /* Skip sequence of multiple path-separators.  */
-      while (*start == '/')
+      while (ISSLASH (*start))
         ++start;
 
       /* Find end of path component.  */
-      for (end = start; *end && *end != '/'; ++end)
+      for (end = start; *end && !ISSLASH (*end); ++end)
         /* Nothing.  */;
 
       if (end - start == 0)
@@ -218,17 +229,18 @@
       else if (end - start == 2 && start[0] == '.' && start[1] == '.')
         {
           /* Back up to previous component, ignore if at root already.  */
-          if (dest > rpath + 1)
-            while ((--dest)[-1] != '/');
-          if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1
-              && *dest == '/' && dest[1] != '/')
+          if (dest > rpath + prefix_len + 1)
+           for ( --dest; !ISSLASH (dest[-1]); --dest);
+          if (DOUBLE_SLASH_IS_DISTINCT_ROOT
+             && dest == rpath + 1 && !prefix_len
+              && ISSLASH (*dest) && !ISSLASH (dest[1]))
             dest++;
         }
       else
         {
           size_t new_size;
 
-          if (dest[-1] != '/')
+          if (!ISSLASH (dest[-1]))
             *dest++ = '/';
 
           if (dest + (end - start) >= rpath_limit)
@@ -239,7 +251,7 @@
               if (resolved)
                 {
                   __set_errno (ENAMETOOLONG);
-                  if (dest > rpath + 1)
+                  if (dest > rpath + prefix_len + 1)
                     dest--;
                   *dest = '\0';
                   goto error;
@@ -329,24 +341,31 @@
               memmove (&extra_buf[n], end, len + 1);
               name = end = memcpy (extra_buf, buf, n);
 
-              if (buf[0] == '/')
+              if (IS_ABSOLUTE_FILE_NAME (buf))
                 {
-                  dest = rpath + 1;     /* It's an absolute symlink */
+                 size_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
+
+                 if (pfxlen)
+                   memcpy (rpath, buf, pfxlen);
+                  dest = rpath + pfxlen;
+                 *dest++ = '/'; /* It's an absolute symlink */
                   if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
                     {
-                      if (buf[1] == '/' && buf[2] != '/')
+                      if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen)
                         *dest++ = '/';
                       *dest = '\0';
                     }
+                 /* Install the new prefix to be in effect hereafter.  */
+                 prefix_len = pfxlen;
                 }
               else
                 {
                   /* Back up to previous component, ignore if at root
                      already: */
-                  if (dest > rpath + 1)
-                    while ((--dest)[-1] != '/');
+                  if (dest > rpath + prefix_len + 1)
+                   for (--dest; !ISSLASH (dest[-1]); --dest);
                   if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1
-                      && *dest == '/' && dest[1] != '/')
+                      && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len)
                     dest++;
                 }
             }
@@ -357,10 +376,10 @@
             }
         }
     }
-  if (dest > rpath + 1 && dest[-1] == '/')
+  if (dest > rpath + prefix_len + 1 && ISSLASH (dest[-1]))
     --dest;
-  if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1
-      && *dest == '/' && dest[1] != '/')
+  if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 && !prefix_len
+      && ISSLASH (*dest) && !ISSLASH (dest[1]))
     dest++;
   *dest = '\0';
 

--- canonicalize.c~0    2012-10-24 12:32:12.000000000 +0200
+++ canonicalize.c      2012-11-19 08:45:52.450238000 +0200
@@ -31,4 +31,5 @@
 #include "xalloc.h"
 #include "xgetcwd.h"
+#include "dosname.h"
 
 #define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0)
@@ -51,4 +73,10 @@
    The result is malloc'd.  */
 
+#if ISSLASH('\\')
+# define SLASHES "/\\"
+#else
+# define SLASHES "/"
+#endif
+
 char *
 canonicalize_file_name (const char *name)
@@ -101,4 +139,5 @@
   int can_flags = can_mode & ~CAN_MODE_MASK;
   bool logical = can_flags & CAN_NOLINKS;
+  size_t prefix_len;
 
   can_mode &= CAN_MODE_MASK;
@@ -122,5 +161,9 @@
     }
 
-  if (name[0] != '/')
+  /* This is always zero for Posix hosts, but can be 2 for MS-Windows
+     and MS-DOS X:/foo/bar file names.  */
+  prefix_len = FILE_SYSTEM_PREFIX_LEN (name);
+
+  if (!IS_ABSOLUTE_FILE_NAME (name))
     {
       rname = xgetcwd ();
@@ -144,9 +187,14 @@
       rname = xmalloc (PATH_MAX);
       rname_limit = rname + PATH_MAX;
-      rname[0] = '/';
-      dest = rname + 1;
+      dest = rname;
+      if (prefix_len)
+       {
+         memcpy (rname, name, prefix_len);
+         dest += prefix_len;
+       }
+      *dest++ = '/';
       if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
         {
-          if (name[1] == '/' && name[2] != '/')
+          if (ISSLASH (name[1]) && !ISSLASH (name[2]) && !prefix_len)
             *dest++ = '/';
           *dest = '\0';
@@ -154,12 +202,12 @@
     }
 
-  for (start = name; *start; start = end)
+  for (start = name + prefix_len; *start; start = end)
     {
       /* Skip sequence of multiple file name separators.  */
-      while (*start == '/')
+      while (ISSLASH (*start))
         ++start;
 
       /* Find end of component.  */
-      for (end = start; *end && *end != '/'; ++end)
+      for (end = start; *end && !ISSLASH (*end); ++end)
         /* Nothing.  */;
 
@@ -171,8 +219,8 @@
         {
           /* Back up to previous component, ignore if at root already.  */
-          if (dest > rname + 1)
-            while ((--dest)[-1] != '/');
+          if (dest > rname + prefix_len + 1)
+           for ( --dest; !ISSLASH (dest[-1]); --dest);
           if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1
-              && *dest == '/' && dest[1] != '/')
+              && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1]))
             dest++;
         }
@@ -181,5 +229,5 @@
           struct stat st;
 
-          if (dest[-1] != '/')
+          if (!ISSLASH (dest[-1]))
             *dest++ = '/';
 
@@ -217,5 +265,5 @@
               if (can_mode == CAN_ALL_BUT_LAST)
                 {
-                  if (end[strspn (end, "/")] || saved_errno != ENOENT)
+                  if (end[strspn (end, SLASHES)] || saved_errno != ENOENT)
                     goto error;
                   continue;
@@ -269,13 +317,20 @@
               name = end = memcpy (extra_buf, buf, n);
 
-              if (buf[0] == '/')
+              if (IS_ABSOLUTE_FILE_NAME (buf))
                 {
-                  dest = rname + 1;     /* It's an absolute symlink */
+                 size_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
+
+                 if (pfxlen)
+                   memcpy (rname, buf, pfxlen);
+                  dest = rname + pfxlen;
+                 *dest++ = '/'; /* It's an absolute symlink */
                   if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
                     {
-                      if (buf[1] == '/' && buf[2] != '/')
+                      if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen)
                         *dest++ = '/';
                       *dest = '\0';
                     }
+                 /* Install the new prefix to be in effect hereafter.  */
+                 prefix_len = pfxlen;
                 }
               else
@@ -283,8 +338,8 @@
                   /* Back up to previous component, ignore if at root
                      already: */
-                  if (dest > rname + 1)
-                    while ((--dest)[-1] != '/');
+                  if (dest > rname + prefix_len + 1)
+                   for (--dest; !ISSLASH (dest[-1]); --dest);
                   if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1
-                      && *dest == '/' && dest[1] != '/')
+                      && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len)
                     dest++;
                 }
@@ -302,8 +357,8 @@
         }
     }
-  if (dest > rname + 1 && dest[-1] == '/')
+  if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1]))
     --dest;
-  if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1
-      && *dest == '/' && dest[1] != '/')
+  if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len
+      && ISSLASH (*dest) && !ISSLASH (dest[1]))
     dest++;
   *dest = '\0';



reply via email to

[Prev in Thread] Current Thread [Next in Thread]