[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: fnmatch has exponential running time
From: |
Jakub Jelinek |
Subject: |
Re: fnmatch has exponential running time |
Date: |
Tue, 27 Mar 2007 12:06:28 +0200 |
User-agent: |
Mutt/1.4.2.2i |
On Thu, Mar 22, 2007 at 09:21:37PM +0100, Bruno Haible wrote:
> fnmatch() has a worst-case complexity O(m*n) where m is the size of the
> pattern and n is the size of the sample string. Unfortunately glibc has
> chosen an implementation with exponential running time.
>
> $ mkdir foo
> $ cd foo
> $ touch
> aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmmnnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy
> $ time find . -name 'a*b*d*e*f*g*h*i*j*k*l*m*n*o*p*q*z*'
> real 4m50.007s
> user 4m44.002s
> sys 0m0.056s
The following patch ought to fix it.
When recursing for * handling it will stop when it hits next normal *,
tell the caller what the current pattern and string pointers were
and return to the caller which will restart from that place.
I have also removed no_leading_period2 variables, since there is no need
to preserve no_leading_period variable. Either the code will always
return without ever looking at that var again (that was the only case
before this patch), or it will reinitialize it from what the recursive
call passed up.
2007-03-27 Jakub Jelinek <address@hidden>
* posix/fnmatch.c (STRUCT): Define.
(fnmatch): Pass NULL as last argument to internal_fn{,w}match.
* posix/fnmatch_loop.c (struct STRUCT): New type.
(FCT): Add ends argument. If ends != NULL and normal * is
seen in the pattern, store current pattern and string pointers
and return. Adjust recursive calls.
(EXT): Adjust FCT callers.
(STRUCT): Undef at the end of the file.
* posix/Makefile (tests): Add tst-fnmatch2.
* posix/tst-fnmatch2.c: New test.
--- libc/posix/fnmatch.c.jj 2005-03-30 06:17:47.000000000 +0200
+++ libc/posix/fnmatch.c 2007-03-27 10:31:25.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003
+/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2007
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -209,6 +209,7 @@ __wcschrnul (s, c)
# define FCT internal_fnmatch
# define EXT ext_match
# define END end_pattern
+# define STRUCT fnmatch_struct
# define L(CS) CS
# ifdef _LIBC
# define BTOWC(C) __btowc (C)
@@ -235,7 +236,8 @@ __wcschrnul (s, c)
# define INT wint_t
# define FCT internal_fnwmatch
# define EXT ext_wmatch
-# define END end_wpattern
+# define END end_wpattern
+# define STRUCT fnwmatch_struct
# define L(CS) L##CS
# define BTOWC(C) (C)
# define STRLEN(S) __wcslen (S)
@@ -397,12 +399,12 @@ fnmatch (pattern, string, flags)
}
return internal_fnwmatch (wpattern, wstring, wstring + n,
- flags & FNM_PERIOD, flags);
+ flags & FNM_PERIOD, flags, NULL);
}
# endif /* mbstate_t and mbsrtowcs or _LIBC. */
return internal_fnmatch (pattern, string, string + strlen (string),
- flags & FNM_PERIOD, flags);
+ flags & FNM_PERIOD, flags, NULL);
}
# ifdef _LIBC
--- libc/posix/fnmatch_loop.c.jj 2005-10-15 00:44:42.000000000 +0200
+++ libc/posix/fnmatch_loop.c 2007-03-27 11:47:12.000000000 +0200
@@ -1,5 +1,5 @@
-/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005
- Free Software Foundation, Inc.
+/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005,
+ 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,18 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+struct STRUCT
+{
+ const CHAR *pattern;
+ const CHAR *string;
+ int no_leading_period;
+};
+
/* Match STRING against the filename pattern PATTERN, returning zero if
it matches, nonzero if not. */
static int FCT (const CHAR *pattern, const CHAR *string,
- const CHAR *string_end, int no_leading_period, int flags)
+ const CHAR *string_end, int no_leading_period, int flags,
+ struct STRUCT *ends)
internal_function;
static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
const CHAR *string_end, int no_leading_period, int flags)
@@ -29,12 +37,13 @@ static const CHAR *END (const CHAR *patt
static int
internal_function
-FCT (pattern, string, string_end, no_leading_period, flags)
+FCT (pattern, string, string_end, no_leading_period, flags, ends)
const CHAR *pattern;
const CHAR *string;
const CHAR *string_end;
int no_leading_period;
int flags;
+ struct STRUCT *ends;
{
register const CHAR *p = pattern, *n = string;
register UCHAR c;
@@ -97,6 +106,13 @@ FCT (pattern, string, string_end, no_lea
if (res != -1)
return res;
}
+ else if (ends != NULL)
+ {
+ ends->pattern = p - 1;
+ ends->string = n;
+ ends->no_leading_period = no_leading_period;
+ return 0;
+ }
if (n != string_end && *n == L('.') && no_leading_period)
return FNM_NOMATCH;
@@ -157,7 +173,9 @@ FCT (pattern, string, string_end, no_lea
else
{
const CHAR *endp;
+ struct STRUCT end;
+ end.pattern = NULL;
endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
string_end - n);
if (endp == NULL)
@@ -170,36 +188,46 @@ FCT (pattern, string, string_end, no_lea
{
int flags2 = ((flags & FNM_FILE_NAME)
? flags : (flags & ~FNM_PERIOD));
- int no_leading_period2 = no_leading_period;
- for (--p; n < endp; ++n, no_leading_period2 = 0)
- if (FCT (p, n, string_end, no_leading_period2, flags2)
- == 0)
- return 0;
+ for (--p; n < endp; ++n, no_leading_period = 0)
+ if (FCT (p, n, string_end, no_leading_period, flags2,
+ &end) == 0)
+ goto found;
}
else if (c == L('/') && (flags & FNM_FILE_NAME))
{
while (n < string_end && *n != L('/'))
++n;
if (n < string_end && *n == L('/')
- && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
- == 0))
+ && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
+ NULL) == 0))
return 0;
}
else
{
int flags2 = ((flags & FNM_FILE_NAME)
? flags : (flags & ~FNM_PERIOD));
- int no_leading_period2 = no_leading_period;
if (c == L('\\') && !(flags & FNM_NOESCAPE))
c = *p;
c = FOLD (c);
- for (--p; n < endp; ++n, no_leading_period2 = 0)
+ for (--p; n < endp; ++n, no_leading_period = 0)
if (FOLD ((UCHAR) *n) == c
- && (FCT (p, n, string_end, no_leading_period2, flags2)
- == 0))
- return 0;
+ && (FCT (p, n, string_end, no_leading_period, flags2,
+ &end) == 0))
+ {
+ found:
+ if (end.pattern == NULL)
+ return 0;
+ break;
+ }
+ if (end.pattern != NULL)
+ {
+ p = end.pattern;
+ n = end.string;
+ no_leading_period = end.no_leading_period;
+ continue;
+ }
}
}
@@ -1098,7 +1126,7 @@ EXT (INT opt, const CHAR *pattern, const
switch (opt)
{
case L('*'):
- if (FCT (p, string, string_end, no_leading_period, flags) == 0)
+ if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
return 0;
/* FALLTHROUGH */
@@ -1109,7 +1137,8 @@ EXT (INT opt, const CHAR *pattern, const
/* First match the prefix with the current pattern with the
current pattern. */
if (FCT (list->str, string, rs, no_leading_period,
- flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
+ flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+ NULL) == 0
/* This was successful. Now match the rest with the rest
of the pattern. */
&& (FCT (p, rs, string_end,
@@ -1117,7 +1146,7 @@ EXT (INT opt, const CHAR *pattern, const
? no_leading_period
: rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
flags & FNM_FILE_NAME
- ? flags : flags & ~FNM_PERIOD) == 0
+ ? flags : flags & ~FNM_PERIOD, NULL) == 0
/* This didn't work. Try the whole pattern. */
|| (rs != string
&& FCT (pattern - 1, rs, string_end,
@@ -1126,7 +1155,7 @@ EXT (INT opt, const CHAR *pattern, const
: (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
? 1 : 0),
flags & FNM_FILE_NAME
- ? flags : flags & ~FNM_PERIOD) == 0)))
+ ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
/* It worked. Signal success. */
return 0;
}
@@ -1136,7 +1165,7 @@ EXT (INT opt, const CHAR *pattern, const
return FNM_NOMATCH;
case L('?'):
- if (FCT (p, string, string_end, no_leading_period, flags) == 0)
+ if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
return 0;
/* FALLTHROUGH */
@@ -1148,7 +1177,8 @@ EXT (INT opt, const CHAR *pattern, const
pattern list. */
if (FCT (STRCAT (list->str, p), string, string_end,
no_leading_period,
- flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
+ flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+ NULL) == 0)
/* It worked. Signal success. */
return 0;
while ((list = list->next) != NULL);
@@ -1163,7 +1193,8 @@ EXT (INT opt, const CHAR *pattern, const
for (runp = list; runp != NULL; runp = runp->next)
if (FCT (runp->str, string, rs, no_leading_period,
- flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
+ flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+ NULL) == 0)
break;
/* If none of the patterns matched see whether the rest does. */
@@ -1172,8 +1203,8 @@ EXT (INT opt, const CHAR *pattern, const
rs == string
? no_leading_period
: rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
- flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
- == 0))
+ flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+ NULL) == 0))
/* This is successful. */
return 0;
}
@@ -1198,6 +1229,7 @@ EXT (INT opt, const CHAR *pattern, const
#undef FCT
#undef EXT
#undef END
+#undef STRUCT
#undef MEMPCPY
#undef MEMCHR
#undef STRCOLL
--- libc/posix/Makefile.jj 2007-02-26 18:13:45.000000000 +0100
+++ libc/posix/Makefile 2007-03-27 11:07:05.000000000 +0200
@@ -90,7 +90,7 @@ tests := tstgetopt testfnm runtests run
tst-execv1 tst-execv2 tst-execl1 tst-execl2 \
tst-execve1 tst-execve2 tst-execle1 tst-execle2 \
tst-execvp3 tst-execvp4 tst-rfc3484 tst-rfc3484-2 \
- tst-getaddrinfo3
+ tst-getaddrinfo3 tst-fnmatch2
xtests := bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
--- libc/posix/tst-fnmatch2.c.jj 2007-03-27 11:06:37.000000000 +0200
+++ libc/posix/tst-fnmatch2.c 2007-03-27 11:55:15.000000000 +0200
@@ -0,0 +1,35 @@
+#include <fnmatch.h>
+#include <stdio.h>
+
+int
+do_test (void)
+{
+ char pattern[] = "a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p*q*r*s*t*u*v*w*x*y*z*";
+ const char *string = "aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmm"
+ "nnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy";
+ if (fnmatch (pattern, string, 0) != FNM_NOMATCH)
+ {
+ puts ("First fnmatch didn't return FNM_NOMATCH");
+ return 1;
+ }
+ pattern[(sizeof pattern) - 3] = '*';
+ if (fnmatch (pattern, string, 0) != 0)
+ {
+ puts ("Second fnmatch didn't return 0");
+ return 1;
+ }
+ if (fnmatch ("a*b/*", "abbb/.x", FNM_PATHNAME | FNM_PERIOD) != FNM_NOMATCH)
+ {
+ puts ("Third fnmatch didn't return FNM_NOMATCH");
+ return 1;
+ }
+ if (fnmatch ("a*b/*", "abbb/xy", FNM_PATHNAME | FNM_PERIOD) != 0)
+ {
+ puts ("Fourth fnmatch didn't return 0");
+ return 1;
+ }
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
Jakub