bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: memmem issues


From: Bruno Haible
Subject: Re: memmem issues
Date: Mon, 31 Dec 2007 11:59:31 +0100
User-agent: KMail/1.5.4

Paul Eggert wrote:
> Bruno Haible <address@hidden> writes:
> 
> > Most of your comments apply to all copies of the KMP code in gnulib.
> 
> Ouch!  Should these be coalesced?

Out of the 8 copies of the code currently in gnulib, I can easily coalesce
5 into 1; see attached patch. The remaining 4 copies, however, cannot be
merged without making them less efficient or without introducing ugly #ifs.

Bruno


2007-12-30  Bruno Haible  <address@hidden>

        Unify 5 copies of the KMP code.
        * lib/str-kmp.h: New file.
        * lib/c-strcasestr.c: Include str-kmp.h.
        (knuth_morris_pratt): Remove function.
        (c_strcasestr): Update.
        * lib/c-strstr.c: Include str-kmp.h.
        (knuth_morris_pratt): Remove function.
        (c_strcasestr): Update.
        * lib/mbscasestr.c: Include str-kmp.h.
        (knuth_morris_pratt_unibyte): Remove function.
        * lib/mbsstr.c: Include str-kmp.h.
        (knuth_morris_pratt_unibyte): Remove function.
        * lib/strcasestr.c: Include str-kmp.h.
        (knuth_morris_pratt): Remove function.
        (strcasestr): Update.
        * modules/c-strcasestr (Files): Add lib/str-kmp.h.
        * modules/c-strstr (Files): Likewise.
        * modules/mbscasestr (Files): Likewise.
        * modules/mbsstr (Files): Likewise.
        * modules/strcasestr (Files): Likewise.
        Suggested by Paul Eggert.

*** lib/c-strcasestr.c.orig     2007-12-30 17:20:46.000000000 +0100
--- lib/c-strcasestr.c  2007-12-30 17:13:41.000000000 +0100
***************
*** 27,153 ****
  #include "malloca.h"
  #include "c-ctype.h"
  
! /* Knuth-Morris-Pratt algorithm.
!    See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
!    Return a boolean indicating success.  */
! static bool
! knuth_morris_pratt (const char *haystack, const char *needle,
!                   const char **resultp)
! {
!   size_t m = strlen (needle);
! 
!   /* Allocate the table.  */
!   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
!   if (table == NULL)
!     return false;
!   /* Fill the table.
!      For 0 < i < m:
!        0 < table[i] <= i is defined such that
!        forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
!        and table[i] is as large as possible with this property.
!      This implies:
!      1) For 0 < i < m:
!           If table[i] < i,
!           needle[table[i]..i-1] = needle[0..i-1-table[i]].
!      2) For 0 < i < m:
!           rhaystack[0..i-1] == needle[0..i-1]
!           and exists h, i <= h < m: rhaystack[h] != needle[h]
!           implies
!           forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
!      table[0] remains uninitialized.  */
!   {
!     size_t i, j;
! 
!     /* i = 1: Nothing to verify for x = 0.  */
!     table[1] = 1;
!     j = 0;
! 
!     for (i = 2; i < m; i++)
!       {
!       /* Here: j = i-1 - table[i-1].
!          The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
!          for x < table[i-1], by induction.
!          Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!       unsigned char b = c_tolower ((unsigned char) needle[i - 1]);
! 
!       for (;;)
!         {
!           /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
!              is known to hold for x < i-1-j.
!              Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!           if (b == c_tolower ((unsigned char) needle[j]))
!             {
!               /* Set table[i] := i-1-j.  */
!               table[i] = i - ++j;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for x = i-1-j, because
!                needle[i-1] != needle[j] = needle[i-1-x].  */
!           if (j == 0)
!             {
!               /* The inequality holds for all possible x.  */
!               table[i] = i;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for i-1-j < x < i-1-j+table[j], because for these x:
!                needle[x..i-2]
!                = needle[x-(i-1-j)..j-1]
!                != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
!                   = needle[0..i-2-x],
!              hence needle[x..i-1] != needle[0..i-1-x].
!              Furthermore
!                needle[i-1-j+table[j]..i-2]
!                = needle[table[j]..j-1]
!                = needle[0..j-1-table[j]]  (by definition of table[j]).  */
!           j = j - table[j];
!         }
!       /* Here: j = i - table[i].  */
!       }
!   }
! 
!   /* Search, using the table to accelerate the processing.  */
!   {
!     size_t j;
!     const char *rhaystack;
!     const char *phaystack;
! 
!     *resultp = NULL;
!     j = 0;
!     rhaystack = haystack;
!     phaystack = haystack;
!     /* Invariant: phaystack = rhaystack + j.  */
!     while (*phaystack != '\0')
!       if (c_tolower ((unsigned char) needle[j])
!         == c_tolower ((unsigned char) *phaystack))
!       {
!         j++;
!         phaystack++;
!         if (j == m)
!           {
!             /* The entire needle has been found.  */
!             *resultp = rhaystack;
!             break;
!           }
!       }
!       else if (j > 0)
!       {
!         /* Found a match of needle[0..j-1], mismatch at needle[j].  */
!         rhaystack += table[j];
!         j -= table[j];
!       }
!       else
!       {
!         /* Found a mismatch at needle[0] already.  */
!         rhaystack++;
!         phaystack++;
!       }
!   }
! 
!   freea (table);
!   return true;
! }
  
  /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
     comparison.
--- 27,35 ----
  #include "malloca.h"
  #include "c-ctype.h"
  
! /* Knuth-Morris-Pratt algorithm.  */
! #define CANON_ELEMENT(c) c_tolower (c)
! #include "str-kmp.h"
  
  /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
     comparison.
***************
*** 215,221 ****
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
--- 97,103 ----
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt_unibyte (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
*** lib/c-strstr.c.orig 2007-12-30 17:20:47.000000000 +0100
--- lib/c-strstr.c      2007-12-30 17:14:31.000000000 +0100
***************
*** 26,151 ****
  
  #include "malloca.h"
  
! /* Knuth-Morris-Pratt algorithm.
!    See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
!    Return a boolean indicating success.  */
! static bool
! knuth_morris_pratt (const char *haystack, const char *needle,
!                   const char **resultp)
! {
!   size_t m = strlen (needle);
! 
!   /* Allocate the table.  */
!   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
!   if (table == NULL)
!     return false;
!   /* Fill the table.
!      For 0 < i < m:
!        0 < table[i] <= i is defined such that
!        forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
!        and table[i] is as large as possible with this property.
!      This implies:
!      1) For 0 < i < m:
!           If table[i] < i,
!           needle[table[i]..i-1] = needle[0..i-1-table[i]].
!      2) For 0 < i < m:
!           rhaystack[0..i-1] == needle[0..i-1]
!           and exists h, i <= h < m: rhaystack[h] != needle[h]
!           implies
!           forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
!      table[0] remains uninitialized.  */
!   {
!     size_t i, j;
! 
!     /* i = 1: Nothing to verify for x = 0.  */
!     table[1] = 1;
!     j = 0;
! 
!     for (i = 2; i < m; i++)
!       {
!       /* Here: j = i-1 - table[i-1].
!          The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
!          for x < table[i-1], by induction.
!          Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!       unsigned char b = (unsigned char) needle[i - 1];
! 
!       for (;;)
!         {
!           /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
!              is known to hold for x < i-1-j.
!              Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!           if (b == (unsigned char) needle[j])
!             {
!               /* Set table[i] := i-1-j.  */
!               table[i] = i - ++j;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for x = i-1-j, because
!                needle[i-1] != needle[j] = needle[i-1-x].  */
!           if (j == 0)
!             {
!               /* The inequality holds for all possible x.  */
!               table[i] = i;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for i-1-j < x < i-1-j+table[j], because for these x:
!                needle[x..i-2]
!                = needle[x-(i-1-j)..j-1]
!                != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
!                   = needle[0..i-2-x],
!              hence needle[x..i-1] != needle[0..i-1-x].
!              Furthermore
!                needle[i-1-j+table[j]..i-2]
!                = needle[table[j]..j-1]
!                = needle[0..j-1-table[j]]  (by definition of table[j]).  */
!           j = j - table[j];
!         }
!       /* Here: j = i - table[i].  */
!       }
!   }
! 
!   /* Search, using the table to accelerate the processing.  */
!   {
!     size_t j;
!     const char *rhaystack;
!     const char *phaystack;
! 
!     *resultp = NULL;
!     j = 0;
!     rhaystack = haystack;
!     phaystack = haystack;
!     /* Invariant: phaystack = rhaystack + j.  */
!     while (*phaystack != '\0')
!       if ((unsigned char) needle[j] == (unsigned char) *phaystack)
!       {
!         j++;
!         phaystack++;
!         if (j == m)
!           {
!             /* The entire needle has been found.  */
!             *resultp = rhaystack;
!             break;
!           }
!       }
!       else if (j > 0)
!       {
!         /* Found a match of needle[0..j-1], mismatch at needle[j].  */
!         rhaystack += table[j];
!         j -= table[j];
!       }
!       else
!       {
!         /* Found a mismatch at needle[0] already.  */
!         rhaystack++;
!         phaystack++;
!       }
!   }
! 
!   freea (table);
!   return true;
! }
  
  /* Find the first occurrence of NEEDLE in HAYSTACK.  */
  char *
--- 26,34 ----
  
  #include "malloca.h"
  
! /* Knuth-Morris-Pratt algorithm.  */
! #define CANON_ELEMENT(c) c
! #include "str-kmp.h"
  
  /* Find the first occurrence of NEEDLE in HAYSTACK.  */
  char *
***************
*** 210,216 ****
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
--- 93,99 ----
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt_unibyte (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
*** lib/mbscasestr.c.orig       2007-12-30 17:20:47.000000000 +0100
--- lib/mbscasestr.c    2007-12-30 17:15:39.000000000 +0100
***************
*** 31,160 ****
  
  #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
  /* Knuth-Morris-Pratt algorithm.
     See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
     Return a boolean indicating success.  */
- 
- static bool
- knuth_morris_pratt_unibyte (const char *haystack, const char *needle,
-                           const char **resultp)
- {
-   size_t m = strlen (needle);
- 
-   /* Allocate the table.  */
-   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
-   if (table == NULL)
-     return false;
-   /* Fill the table.
-      For 0 < i < m:
-        0 < table[i] <= i is defined such that
-        forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
-        and table[i] is as large as possible with this property.
-      This implies:
-      1) For 0 < i < m:
-           If table[i] < i,
-           needle[table[i]..i-1] = needle[0..i-1-table[i]].
-      2) For 0 < i < m:
-           rhaystack[0..i-1] == needle[0..i-1]
-           and exists h, i <= h < m: rhaystack[h] != needle[h]
-           implies
-           forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
-      table[0] remains uninitialized.  */
-   {
-     size_t i, j;
- 
-     /* i = 1: Nothing to verify for x = 0.  */
-     table[1] = 1;
-     j = 0;
- 
-     for (i = 2; i < m; i++)
-       {
-       /* Here: j = i-1 - table[i-1].
-          The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
-          for x < table[i-1], by induction.
-          Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-       unsigned char b = TOLOWER ((unsigned char) needle[i - 1]);
- 
-       for (;;)
-         {
-           /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
-              is known to hold for x < i-1-j.
-              Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-           if (b == TOLOWER ((unsigned char) needle[j]))
-             {
-               /* Set table[i] := i-1-j.  */
-               table[i] = i - ++j;
-               break;
-             }
-           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-              for x = i-1-j, because
-                needle[i-1] != needle[j] = needle[i-1-x].  */
-           if (j == 0)
-             {
-               /* The inequality holds for all possible x.  */
-               table[i] = i;
-               break;
-             }
-           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-              for i-1-j < x < i-1-j+table[j], because for these x:
-                needle[x..i-2]
-                = needle[x-(i-1-j)..j-1]
-                != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
-                   = needle[0..i-2-x],
-              hence needle[x..i-1] != needle[0..i-1-x].
-              Furthermore
-                needle[i-1-j+table[j]..i-2]
-                = needle[table[j]..j-1]
-                = needle[0..j-1-table[j]]  (by definition of table[j]).  */
-           j = j - table[j];
-         }
-       /* Here: j = i - table[i].  */
-       }
-   }
- 
-   /* Search, using the table to accelerate the processing.  */
-   {
-     size_t j;
-     const char *rhaystack;
-     const char *phaystack;
- 
-     *resultp = NULL;
-     j = 0;
-     rhaystack = haystack;
-     phaystack = haystack;
-     /* Invariant: phaystack = rhaystack + j.  */
-     while (*phaystack != '\0')
-       if (TOLOWER ((unsigned char) needle[j])
-         == TOLOWER ((unsigned char) *phaystack))
-       {
-         j++;
-         phaystack++;
-         if (j == m)
-           {
-             /* The entire needle has been found.  */
-             *resultp = rhaystack;
-             break;
-           }
-       }
-       else if (j > 0)
-       {
-         /* Found a match of needle[0..j-1], mismatch at needle[j].  */
-         rhaystack += table[j];
-         j -= table[j];
-       }
-       else
-       {
-         /* Found a mismatch at needle[0] already.  */
-         rhaystack++;
-         phaystack++;
-       }
-   }
- 
-   freea (table);
-   return true;
- }
- 
- #if HAVE_MBRTOWC
  static bool
  knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
                              const char **resultp)
--- 31,44 ----
  
  #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
+ /* Knuth-Morris-Pratt algorithm.  */
+ #define CANON_ELEMENT(c) TOLOWER (c)
+ #include "str-kmp.h"
+ 
+ #if HAVE_MBRTOWC
  /* Knuth-Morris-Pratt algorithm.
     See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
     Return a boolean indicating success.  */
  static bool
  knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
                              const char **resultp)
*** lib/mbsstr.c.orig   2007-12-30 17:20:47.000000000 +0100
--- lib/mbsstr.c        2007-12-30 17:16:23.000000000 +0100
***************
*** 28,156 ****
  # include "mbuiter.h"
  #endif
  
  /* Knuth-Morris-Pratt algorithm.
     See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
     Return a boolean indicating success.  */
- 
- static bool
- knuth_morris_pratt_unibyte (const char *haystack, const char *needle,
-                           const char **resultp)
- {
-   size_t m = strlen (needle);
- 
-   /* Allocate the table.  */
-   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
-   if (table == NULL)
-     return false;
-   /* Fill the table.
-      For 0 < i < m:
-        0 < table[i] <= i is defined such that
-        forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
-        and table[i] is as large as possible with this property.
-      This implies:
-      1) For 0 < i < m:
-           If table[i] < i,
-           needle[table[i]..i-1] = needle[0..i-1-table[i]].
-      2) For 0 < i < m:
-           rhaystack[0..i-1] == needle[0..i-1]
-           and exists h, i <= h < m: rhaystack[h] != needle[h]
-           implies
-           forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
-      table[0] remains uninitialized.  */
-   {
-     size_t i, j;
- 
-     /* i = 1: Nothing to verify for x = 0.  */
-     table[1] = 1;
-     j = 0;
- 
-     for (i = 2; i < m; i++)
-       {
-       /* Here: j = i-1 - table[i-1].
-          The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
-          for x < table[i-1], by induction.
-          Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-       unsigned char b = (unsigned char) needle[i - 1];
- 
-       for (;;)
-         {
-           /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
-              is known to hold for x < i-1-j.
-              Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-           if (b == (unsigned char) needle[j])
-             {
-               /* Set table[i] := i-1-j.  */
-               table[i] = i - ++j;
-               break;
-             }
-           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-              for x = i-1-j, because
-                needle[i-1] != needle[j] = needle[i-1-x].  */
-           if (j == 0)
-             {
-               /* The inequality holds for all possible x.  */
-               table[i] = i;
-               break;
-             }
-           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-              for i-1-j < x < i-1-j+table[j], because for these x:
-                needle[x..i-2]
-                = needle[x-(i-1-j)..j-1]
-                != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
-                   = needle[0..i-2-x],
-              hence needle[x..i-1] != needle[0..i-1-x].
-              Furthermore
-                needle[i-1-j+table[j]..i-2]
-                = needle[table[j]..j-1]
-                = needle[0..j-1-table[j]]  (by definition of table[j]).  */
-           j = j - table[j];
-         }
-       /* Here: j = i - table[i].  */
-       }
-   }
- 
-   /* Search, using the table to accelerate the processing.  */
-   {
-     size_t j;
-     const char *rhaystack;
-     const char *phaystack;
- 
-     *resultp = NULL;
-     j = 0;
-     rhaystack = haystack;
-     phaystack = haystack;
-     /* Invariant: phaystack = rhaystack + j.  */
-     while (*phaystack != '\0')
-       if ((unsigned char) needle[j] == (unsigned char) *phaystack)
-       {
-         j++;
-         phaystack++;
-         if (j == m)
-           {
-             /* The entire needle has been found.  */
-             *resultp = rhaystack;
-             break;
-           }
-       }
-       else if (j > 0)
-       {
-         /* Found a match of needle[0..j-1], mismatch at needle[j].  */
-         rhaystack += table[j];
-         j -= table[j];
-       }
-       else
-       {
-         /* Found a mismatch at needle[0] already.  */
-         rhaystack++;
-         phaystack++;
-       }
-   }
- 
-   freea (table);
-   return true;
- }
- 
- #if HAVE_MBRTOWC
  static bool
  knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
                              const char **resultp)
--- 28,41 ----
  # include "mbuiter.h"
  #endif
  
+ /* Knuth-Morris-Pratt algorithm.  */
+ #define CANON_ELEMENT(c) c
+ #include "str-kmp.h"
+ 
+ #if HAVE_MBRTOWC
  /* Knuth-Morris-Pratt algorithm.
     See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
     Return a boolean indicating success.  */
  static bool
  knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
                              const char **resultp)
*** lib/strcasestr.c.orig       2007-12-30 17:20:47.000000000 +0100
--- lib/strcasestr.c    2007-12-30 17:12:25.000000000 +0100
***************
*** 29,155 ****
  
  #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
! /* Knuth-Morris-Pratt algorithm.
!    See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
!    Return a boolean indicating success.  */
! static bool
! knuth_morris_pratt (const char *haystack, const char *needle,
!                   const char **resultp)
! {
!   size_t m = strlen (needle);
! 
!   /* Allocate the table.  */
!   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
!   if (table == NULL)
!     return false;
!   /* Fill the table.
!      For 0 < i < m:
!        0 < table[i] <= i is defined such that
!        forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
!        and table[i] is as large as possible with this property.
!      This implies:
!      1) For 0 < i < m:
!           If table[i] < i,
!           needle[table[i]..i-1] = needle[0..i-1-table[i]].
!      2) For 0 < i < m:
!           rhaystack[0..i-1] == needle[0..i-1]
!           and exists h, i <= h < m: rhaystack[h] != needle[h]
!           implies
!           forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
!      table[0] remains uninitialized.  */
!   {
!     size_t i, j;
! 
!     /* i = 1: Nothing to verify for x = 0.  */
!     table[1] = 1;
!     j = 0;
! 
!     for (i = 2; i < m; i++)
!       {
!       /* Here: j = i-1 - table[i-1].
!          The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
!          for x < table[i-1], by induction.
!          Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!       unsigned char b = TOLOWER ((unsigned char) needle[i - 1]);
! 
!       for (;;)
!         {
!           /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
!              is known to hold for x < i-1-j.
!              Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
!           if (b == TOLOWER ((unsigned char) needle[j]))
!             {
!               /* Set table[i] := i-1-j.  */
!               table[i] = i - ++j;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for x = i-1-j, because
!                needle[i-1] != needle[j] = needle[i-1-x].  */
!           if (j == 0)
!             {
!               /* The inequality holds for all possible x.  */
!               table[i] = i;
!               break;
!             }
!           /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
!              for i-1-j < x < i-1-j+table[j], because for these x:
!                needle[x..i-2]
!                = needle[x-(i-1-j)..j-1]
!                != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
!                   = needle[0..i-2-x],
!              hence needle[x..i-1] != needle[0..i-1-x].
!              Furthermore
!                needle[i-1-j+table[j]..i-2]
!                = needle[table[j]..j-1]
!                = needle[0..j-1-table[j]]  (by definition of table[j]).  */
!           j = j - table[j];
!         }
!       /* Here: j = i - table[i].  */
!       }
!   }
! 
!   /* Search, using the table to accelerate the processing.  */
!   {
!     size_t j;
!     const char *rhaystack;
!     const char *phaystack;
! 
!     *resultp = NULL;
!     j = 0;
!     rhaystack = haystack;
!     phaystack = haystack;
!     /* Invariant: phaystack = rhaystack + j.  */
!     while (*phaystack != '\0')
!       if (TOLOWER ((unsigned char) needle[j])
!         == TOLOWER ((unsigned char) *phaystack))
!       {
!         j++;
!         phaystack++;
!         if (j == m)
!           {
!             /* The entire needle has been found.  */
!             *resultp = rhaystack;
!             break;
!           }
!       }
!       else if (j > 0)
!       {
!         /* Found a match of needle[0..j-1], mismatch at needle[j].  */
!         rhaystack += table[j];
!         j -= table[j];
!       }
!       else
!       {
!         /* Found a mismatch at needle[0] already.  */
!         rhaystack++;
!         phaystack++;
!       }
!   }
! 
!   freea (table);
!   return true;
! }
  
  /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
     comparison.
--- 29,37 ----
  
  #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
! /* Knuth-Morris-Pratt algorithm.  */
! #define CANON_ELEMENT(c) TOLOWER (c)
! #include "str-kmp.h"
  
  /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
     comparison.
***************
*** 212,218 ****
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
--- 94,100 ----
                  /* Try the Knuth-Morris-Pratt algorithm.  */
                  const char *result;
                  bool success =
!                   knuth_morris_pratt_unibyte (haystack, needle - 1, &result);
                  if (success)
                    return (char *) result;
                  try_kmp = false;
*** modules/c-strcasestr.orig   2007-12-30 17:20:47.000000000 +0100
--- modules/c-strcasestr        2007-12-30 17:17:13.000000000 +0100
***************
*** 4,9 ****
--- 4,10 ----
  Files:
  lib/c-strcasestr.h
  lib/c-strcasestr.c
+ lib/str-kmp.h
  
  Depends-on:
  c-ctype
*** modules/c-strstr.orig       2007-12-30 17:20:47.000000000 +0100
--- modules/c-strstr    2007-12-30 17:17:29.000000000 +0100
***************
*** 4,9 ****
--- 4,10 ----
  Files:
  lib/c-strstr.h
  lib/c-strstr.c
+ lib/str-kmp.h
  
  Depends-on:
  stdbool
*** modules/mbscasestr.orig     2007-12-30 17:20:47.000000000 +0100
--- modules/mbscasestr  2007-12-30 17:18:02.000000000 +0100
***************
*** 3,8 ****
--- 3,9 ----
  
  Files:
  lib/mbscasestr.c
+ lib/str-kmp.h
  m4/mbscasestr.m4
  m4/mbrtowc.m4
  
*** modules/mbsstr.orig 2007-12-30 17:20:47.000000000 +0100
--- modules/mbsstr      2007-12-30 17:18:12.000000000 +0100
***************
*** 3,8 ****
--- 3,9 ----
  
  Files:
  lib/mbsstr.c
+ lib/str-kmp.h
  m4/mbsstr.m4
  m4/mbrtowc.m4
  
*** modules/strcasestr.orig     2007-12-30 17:20:47.000000000 +0100
--- modules/strcasestr  2007-12-30 17:17:49.000000000 +0100
***************
*** 3,8 ****
--- 3,9 ----
  
  Files:
  lib/strcasestr.c
+ lib/str-kmp.h
  m4/strcasestr.m4
  
  Depends-on:




reply via email to

[Prev in Thread] Current Thread [Next in Thread]