grep-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Grep-devel] [PATCH] dfa: prefer ptrdiff_t to size_t


From: Paul Eggert
Subject: [Grep-devel] [PATCH] dfa: prefer ptrdiff_t to size_t
Date: Sun, 15 Jan 2017 19:46:15 -0800

The code already cannot handle objects with size greater than
SIZE_MAX / 2, so be more honest about it and use ptrdiff_t instead
of size_t.  ptrdiff_t arithmetic is signed, which allows for more
checking via -fsanitize=undefined.  It also makes the code a tad
smaller on x86-64, since it can test for < 0 rather than for ==
SIZE_MAX.
* src/dfasearch.c (struct dfa_comp.kwset_exact_matches):
(kwsmusts, EGexecute):
* src/kwsearch.c (Fcompile, Fexecute):
* src/kwset.c (struct kwset.kwsexec, kwsincr, memchr_kwset)
(memoff2_kwset, bmexec_trans, bmexec, cwexec, acexec_trans)
(acexec, kwsexec):
* src/kwset.h (struct kwsmatch.index, .offset, .size):
Prefer ptrdiff_t to size_t where either will do.
---
 src/dfasearch.c | 13 +++++----
 src/kwsearch.c  | 14 ++++-----
 src/kwset.c     | 89 ++++++++++++++++++++++++++++++++-------------------------
 src/kwset.h     | 11 +++----
 4 files changed, 70 insertions(+), 57 deletions(-)

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 44bfaa3..9b32d48 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -42,7 +42,7 @@ struct dfa_comp
   /* Number of compiled fixed strings known to exactly match the regexp.
      If kwsexec returns < kwset_exact_matches, then we don't need to
      call the regexp matcher at all. */
-  size_t kwset_exact_matches;
+  ptrdiff_t kwset_exact_matches;
 
   bool begline;
 };
@@ -80,8 +80,8 @@ kwsmusts (struct dfa_comp *dc)
          The kwset matcher will return the index of the matching
          string that it chooses. */
       ++dc->kwset_exact_matches;
-      size_t old_len = strlen (dm->must);
-      size_t new_len = old_len + dm->begline + dm->endline;
+      ptrdiff_t old_len = strlen (dm->must);
+      ptrdiff_t new_len = old_len + dm->begline + dm->endline;
       char *must = xmalloc (new_len);
       char *mp = must;
       *mp = eolbyte;
@@ -244,9 +244,10 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
               char const *prev_beg;
 
               /* Find a possible match using the KWset matcher.  */
-              size_t offset = kwsexec (dc->kwset, beg - dc->begline,
-                                       buflim - beg + dc->begline, &kwsm, 
true);
-              if (offset == (size_t) -1)
+              ptrdiff_t offset = kwsexec (dc->kwset, beg - dc->begline,
+                                          buflim - beg + dc->begline,
+                                          &kwsm, true);
+              if (offset < 0)
                 goto failure;
               match = beg + offset;
               prev_beg = beg;
diff --git a/src/kwsearch.c b/src/kwsearch.c
index e07eb63..5e9df16 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -25,7 +25,7 @@ void *
 Fcompile (char const *pattern, size_t size, reg_syntax_t ignored)
 {
   kwset_t kwset;
-  size_t total = size;
+  ptrdiff_t total = size;
   char *buf = NULL;
   size_t bufalloc = 0;
 
@@ -34,7 +34,7 @@ Fcompile (char const *pattern, size_t size, reg_syntax_t 
ignored)
   char const *p = pattern;
   do
     {
-      size_t len;
+      ptrdiff_t len;
       char const *sep = memchr (p, '\n', total);
       if (sep)
         {
@@ -84,7 +84,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
           char const *start_ptr)
 {
   char const *beg, *end, *mb_start;
-  size_t len;
+  ptrdiff_t len;
   char eol = eolbyte;
   struct kwsmatch kwsmatch;
   size_t ret_val;
@@ -102,10 +102,10 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
 
   for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
     {
-      size_t offset = kwsexec (kwset, beg - match_lines,
-                               buf + size - beg + match_lines, &kwsmatch,
-                               longest);
-      if (offset == (size_t) -1)
+      ptrdiff_t offset = kwsexec (kwset, beg - match_lines,
+                                  buf + size - beg + match_lines, &kwsmatch,
+                                  longest);
+      if (offset < 0)
         break;
       len = kwsmatch.size[0] - 2 * match_lines;
       if (mb_check && mb_goback (&mb_start, beg + offset, buf + size) != 0)
diff --git a/src/kwset.c b/src/kwset.c
index 0c22041..0b6fab4 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -127,7 +127,8 @@ struct kwset
   bool reverse;
 
   /* kwsexec implementation.  */
-  size_t (*kwsexec) (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+  ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t,
+                        struct kwsmatch *, bool);
 };
 
 /* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
@@ -137,9 +138,12 @@ tr (char const *trans, char c)
   return trans ? trans[U(c)] : c;
 }
 
-static size_t acexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
-static size_t cwexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
-static size_t bmexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t,
+                         struct kwsmatch *, bool);
+static ptrdiff_t cwexec (kwset_t, char const *, ptrdiff_t,
+                         struct kwsmatch *, bool);
+static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t,
+                         struct kwsmatch *, bool);
 
 /* Return a newly allocated keyword set.  A nonnull TRANS specifies a
    table of character translations to be applied to all pattern and
@@ -176,8 +180,9 @@ enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
 
 /* Add the given string to the contents of the keyword set.  */
 void
-kwsincr (kwset_t kwset, char const *text, size_t len)
+kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
 {
+  assume (0 <= len);
   struct trie *trie = kwset->trie;
   char const *trans = kwset->trans;
 
@@ -310,7 +315,10 @@ kwsincr (kwset_t kwset, char const *text, size_t len)
   /* Mark the node finally reached as accepting, encoding the
      index number of this word in the keyword set so far.  */
   if (!trie->accepting)
-    trie->accepting = 1 + 2 * kwset->words;
+    {
+      size_t words = kwset->words;
+      trie->accepting = 2 * words + 1;
+    }
   ++kwset->words;
 
   /* Keep track of the longest and shortest string of the keyword set.  */
@@ -629,14 +637,14 @@ bm_delta2_search (char const **tpp, char const *ep, char 
const *sp,
    that matches the last byte specified by KWSET, a singleton.
    Return NULL if there is no match.  */
 static char const *
-memchr_kwset (char const *s, size_t n, kwset_t kwset)
+memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
 {
   if (kwset->gc1help < 0)
     return memchr (s, kwset->gc1, n);
   int small_heuristic = 2;
   int small = (- (uintptr_t) s % sizeof (long)
                + small_heuristic * sizeof (long));
-  size_t ntrans = kwset->gc1help < NCHAR && small < n ? small : n;
+  ptrdiff_t ntrans = kwset->gc1help < NCHAR && small < n ? small : n;
   char const *slim = s + ntrans;
   for (; s < slim; s++)
     if (kwset->trans[U(*s)] == kwset->gc1)
@@ -647,9 +655,9 @@ memchr_kwset (char const *s, size_t n, kwset_t kwset)
 
 /* Return the offset of the first byte in the buffer S (of size N)
    that matches the last byte specified by KWSET, a pair.
-   Return SIZE_MAX if there is no match.  */
-static size_t
-memoff2_kwset (char const *s, size_t n, kwset_t kwset,
+   Return -1 if there is no match.  */
+static ptrdiff_t
+memoff2_kwset (char const *s, ptrdiff_t n, kwset_t kwset,
                struct kwsmatch *kwsmatch)
 {
   struct tree const *cur = kwset->trie->links;
@@ -658,10 +666,10 @@ memoff2_kwset (char const *s, size_t n, kwset_t kwset,
                      ? memchr2 (s, cur->label, clink->label, n)
                      : memchr (s, cur->label, n));
   if (! mch)
-    return SIZE_MAX;
+    return -1;
   else
     {
-      size_t off = mch - s;
+      ptrdiff_t off = mch - s;
       if (*mch == cur->label)
         kwsmatch->index = cur->trie->accepting / 2;
       else
@@ -673,9 +681,10 @@ memoff2_kwset (char const *s, size_t n, kwset_t kwset,
 }
 
 /* Fast Boyer-Moore search (inlinable version).  */
-static inline size_t
-bmexec_trans (kwset_t kwset, char const *text, size_t size)
+static inline ptrdiff_t
+bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
 {
+  assume (0 <= size);
   unsigned char const *d1;
   char const *ep, *sp, *tp;
   int d;
@@ -685,11 +694,11 @@ bmexec_trans (kwset_t kwset, char const *text, size_t 
size)
   if (len == 0)
     return 0;
   if (len > size)
-    return SIZE_MAX;
+    return -1;
   if (len == 1)
     {
       tp = memchr_kwset (text, size, kwset);
-      return tp ? tp - text : SIZE_MAX;
+      return tp ? tp - text : -1;
     }
 
   d1 = kwset->delta;
@@ -730,7 +739,7 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
                     tp--;
                     tp = memchr_kwset (tp, text + size - tp, kwset);
                     if (! tp)
-                      return SIZE_MAX;
+                      return -1;
                     tp++;
                     if (ep <= tp)
                       break;
@@ -754,21 +763,21 @@ bmexec_trans (kwset_t kwset, char const *text, size_t 
size)
         return tp - text;
     }
 
-  return SIZE_MAX;
+  return -1;
 }
 
 /* Fast Boyer-Moore search.  */
-static size_t
-bmexec (kwset_t kwset, char const *text, size_t size,
+static ptrdiff_t
+bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
   /* Help the compiler inline in two ways, depending on whether
      kwset->trans is null.  */
-  size_t ret = (kwset->trans
-                ? bmexec_trans (kwset, text, size)
-                : bmexec_trans (kwset, text, size));
+  ptrdiff_t ret = (kwset->trans
+                   ? bmexec_trans (kwset, text, size)
+                   : bmexec_trans (kwset, text, size));
 
-  if (ret != SIZE_MAX)
+  if (0 <= ret)
     {
        kwsmatch->index = 0;
        kwsmatch->offset[0] = ret;
@@ -779,10 +788,11 @@ bmexec (kwset_t kwset, char const *text, size_t size,
 }
 
 /* Hairy multiple string search with the Commentz-Walter algorithm.  */
-static size_t
-cwexec (kwset_t kwset, char const *text, size_t len,
+static ptrdiff_t
+cwexec (kwset_t kwset, char const *text, ptrdiff_t len,
         struct kwsmatch *kwsmatch, bool longest)
 {
+  assume (0 <= len);
   struct trie * const *next;
   struct trie const *trie;
   struct trie const *accept;
@@ -796,7 +806,7 @@ cwexec (kwset_t kwset, char const *text, size_t len,
 
   /* Initialize register copies and look for easy ways out.  */
   if (len < kwset->mind)
-    return SIZE_MAX;
+    return -1;
   if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
     return memoff2_kwset (text, len, kwset, kwsmatch);
   next = kwset->next;
@@ -868,7 +878,7 @@ cwexec (kwset_t kwset, char const *text, size_t len,
       if (mch)
         goto match;
     }
-  return SIZE_MAX;
+  return -1;
 
  match:
   /* Given a known match, find the longest possible match anchored
@@ -938,8 +948,8 @@ cwexec (kwset_t kwset, char const *text, size_t len,
 
 /* Hairy multiple string search with the Aho-Corasick algorithm.
    (inlinable version)  */
-static inline size_t
-acexec_trans (kwset_t kwset, char const *text, size_t len,
+static inline ptrdiff_t
+acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
               struct kwsmatch *kwsmatch, bool longest)
 {
   struct trie * const *next;
@@ -950,7 +960,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
 
   /* Initialize register copies and look for easy ways out.  */
   if (len < kwset->mind)
-    return SIZE_MAX;
+    return -1;
   trans = kwset->trans;
   if (!trans && kwset->maxd == 1 && kwset->words == 2)
     return memoff2_kwset (text, len, kwset, kwsmatch);
@@ -969,7 +979,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
           while (! (trie = next[c]))
             {
               if (tp >= lim)
-                return SIZE_MAX;
+                return -1;
               c = tr (trans, *tp++);
             }
 
@@ -978,7 +988,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
               if (trie->accepting)
                 goto match;
               if (tp >= lim)
-                return SIZE_MAX;
+                return -1;
               c = tr (trans, *tp++);
 
               for (tree = trie->links; c != tree->label; )
@@ -1046,10 +1056,11 @@ acexec_trans (kwset_t kwset, char const *text, size_t 
len,
 }
 
 /* Hairy multiple string search with Aho-Corasick algorithm.  */
-static size_t
-acexec (kwset_t kwset, char const *text, size_t size,
+static ptrdiff_t
+acexec (kwset_t kwset, char const *text, ptrdiff_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
+  assume (0 <= size);
   /* Help the compiler inline in two ways, depending on whether
      kwset->trans is null.  */
   return (kwset->trans
@@ -1059,12 +1070,12 @@ acexec (kwset_t kwset, char const *text, size_t size,
 
 /* Find the first instance of a KWSET member in TEXT, which has SIZE bytes.
    Return the offset (into TEXT) of the first byte of the matching substring,
-   or SIZE_MAX if no match is found.  Upon a match, store details in
+   or -1 if no match is found.  Upon a match, store details in
    *KWSMATCH: index of matched keyword, start offset (same as the return
    value), and length.  If LONGEST, find the longest match; otherwise
    any match will do.  */
-size_t
-kwsexec (kwset_t kwset, char const *text, size_t size,
+ptrdiff_t
+kwsexec (kwset_t kwset, char const *text, ptrdiff_t size,
          struct kwsmatch *kwsmatch, bool longest)
 {
   return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
diff --git a/src/kwset.h b/src/kwset.h
index e9fa3dc..ef78db3 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -24,9 +24,9 @@
 
 struct kwsmatch
 {
-  size_t index;                        /* Index number of matching keyword.  */
-  size_t offset[1];            /* Offset of match.  */
-  size_t size[1];              /* Length of match.  */
+  ptrdiff_t index;                     /* Index number of matching keyword.  */
+  ptrdiff_t offset[1];         /* Offset of match.  */
+  ptrdiff_t size[1];           /* Length of match.  */
 };
 
 #include "arg-nonnull.h"
@@ -35,8 +35,9 @@ struct kwset;
 typedef struct kwset *kwset_t;
 
 extern kwset_t kwsalloc (char const *, bool);
-extern void kwsincr (kwset_t, char const *, size_t);
+extern void kwsincr (kwset_t, char const *, ptrdiff_t);
 extern void kwsprep (kwset_t);
-extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *, bool)
+extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t,
+                          struct kwsmatch *, bool)
   _GL_ARG_NONNULL ((4));
 extern void kwsfree (kwset_t);
-- 
2.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]