grep-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] grep: prefer signed to unsigned integers


From: Paul Eggert
Subject: [PATCH] grep: prefer signed to unsigned integers
Date: Wed, 25 Aug 2021 12:12:12 -0700

This improves runtime checking for integer overflow when compiling
with gcc -fsanitize=undefined and the like.  It also avoids
the need for some integer casts, which can be error-prone.
* bootstrap.conf (gnulib_modules): Add idx.
* src/dfasearch.c (struct dfa_comp, kwsmusts):
(possible_backrefs_in_pattern, regex_compile, GEAcompile)
(EGexecute):
* src/grep.c (struct patloc, patlocs_allocated, patlocs_used)
(n_patterns, update_patterns, pattern_file_name, poison_len)
(asan_poison, fwrite_errno, compile_fp_t, execute_fp_t)
(buf_has_encoding_errors, buf_has_nulls, file_must_have_nulls)
(bufalloc, pagesize, all_zeros, fillbuf, nlscan)
(print_line_head, print_line_middle, print_line_tail, grepbuf)
(grep, contains_encoding_error, fgrep_icase_available)
(fgrep_icase_charlen, fgrep_to_grep_pattern, try_fgrep_pattern)
(main):
* src/kwsearch.c (struct kwsearch, Fcompile, Fexecute):
* src/kwset.c (struct trie, struct kwset, kwsalloc, kwsincr)
(kwswords, treefails, memchr_kwset, acexec_trans, kwsexec)
(treedelta, kwsprep, bm_delta2_search, bmexec_trans, bmexec)
(acexec):
* src/kwset.h (struct kwsmatch):
* src/pcresearch.c (Pcompile, Pexecute):
* src/search.h (mb_clen):
* src/searchutils.c (kwsinit, mb_goback, wordchars_count)
(wordchars_size, wordchar_next, wordchar_prev):
Prefer idx_t to size_t or ptrdiff_t for nonnegative sizes,
and prefer ptrdiff_t to size_t for sizes plus error values.
* src/grep.c (uword_size): New constant, used for signed
size calculations.
(totalnl, add_count, totalcc, print_offset, print_line_head, grep):
Prefer intmax_t to uintmax_t for wide integer calculations.
(fgrep_icase_charlen): Prefer ptrdiff_t to int for size offsets.
* src/grep.h: Include idx.h.
* src/search.h (imbrlen): New function, like mbrlen except
with idx_t and ptrdiff_t.
---
 bootstrap.conf    |   1 +
 src/dfasearch.c   |  75 ++++++------
 src/grep.c        | 282 ++++++++++++++++++++++------------------------
 src/grep.h        |   3 +-
 src/kwsearch.c    |  30 ++---
 src/kwset.c       |  88 +++++++--------
 src/kwset.h       |  17 +--
 src/pcresearch.c  |   6 +-
 src/search.h      |  47 +++++---
 src/searchutils.c |  22 ++--
 10 files changed, 294 insertions(+), 277 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 8e46000..7e4f24c 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -50,6 +50,7 @@ gitlog-to-changelog
 gnu-web-doc-update
 gnupload
 hash
+idx
 ignore-value
 intprops
 inttypes
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d6afa8d..1675865 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -36,13 +36,13 @@ struct dfa_comp
 
   /* Regex compiled regexps. */
   struct re_pattern_buffer *patterns;
-  size_t pcount;
+  idx_t pcount;
   struct re_registers regs;
 
   /* Number of compiled fixed strings known to exactly match the regexp.
      If kwsexec returns < kwset_exact_matches, then we don't need to
      call the regexp matcher at all. */
-  ptrdiff_t kwset_exact_matches;
+  idx_t kwset_exact_matches;
 
   bool begline;
 };
@@ -80,9 +80,9 @@ kwsmusts (struct dfa_comp *dc)
          The kwset matcher will return the index of the matching
          string that it chooses. */
       ++dc->kwset_exact_matches;
-      ptrdiff_t old_len = strlen (dm->must);
-      ptrdiff_t new_len = old_len + dm->begline + dm->endline;
-      char *must = xmalloc (new_len);
+      idx_t old_len = strlen (dm->must);
+      idx_t new_len = old_len + dm->begline + dm->endline;
+      char *must = ximalloc (new_len);
       char *mp = must;
       *mp = eolbyte;
       mp += dm->begline;
@@ -108,7 +108,7 @@ kwsmusts (struct dfa_comp *dc)
    BS_SAFE is true of encodings where a backslash cannot appear as the
    last byte of a multibyte character.  */
 static bool _GL_ATTRIBUTE_PURE
-possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
+possible_backrefs_in_pattern (char const *keys, idx_t len, bool bs_safe)
 {
   /* Normally a backslash, but in an unsafe encoding this is a non-char
      value so that the comparison below always fails, because if there
@@ -144,8 +144,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t 
len, bool bs_safe)
 }
 
 static bool
-regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
-               ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
+regex_compile (struct dfa_comp *dc, char const *p, idx_t len,
+               idx_t pcount, idx_t lineno, reg_syntax_t syntax_bits,
                bool syntax_only)
 {
   struct re_pattern_buffer pat0;
@@ -154,7 +154,9 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   pat->allocated = 0;
 
   /* Do not use a fastmap with -i, to work around glibc Bug#20381.  */
-  pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
+  verify (UCHAR_MAX < IDX_MAX);
+  idx_t uchar_max = UCHAR_MAX;
+  pat->fastmap = (syntax_only | match_icase) ? NULL : ximalloc (uchar_max + 1);
 
   pat->translate = NULL;
 
@@ -168,14 +170,17 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
     return true;
 
   /* Emit a filename:lineno: prefix for patterns taken from files.  */
-  size_t pat_lineno;
+  idx_t pat_lineno;
   char const *pat_filename
     = lineno < 0 ? "" : pattern_file_name (lineno, &pat_lineno);
 
   if (*pat_filename == '\0')
     error (0, 0, "%s", err);
   else
-    error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err);
+    {
+      ptrdiff_t n = pat_lineno;
+      error (0, 0, "%s:%td: %s", pat_filename, n, err);
+    }
 
   return false;
 }
@@ -185,7 +190,7 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
    Return a description of the compiled pattern.  */
 
 void *
-GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
+GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
             bool exact)
 {
   char *motif;
@@ -210,29 +215,30 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
   dc->patterns = xmalloc (sizeof *dc->patterns);
   dc->patterns++;
   dc->pcount = 0;
-  size_t palloc = 1;
+  idx_t palloc = 1;
 
   char const *prev = pattern;
 
   /* Buffer containing back-reference-free patterns.  */
   char *buf = NULL;
-  ptrdiff_t buflen = 0;
-  size_t bufalloc = 0;
+  idx_t buflen = 0;
+  idx_t bufalloc = 0;
 
-  ptrdiff_t lineno = 0;
+  idx_t lineno = 0;
 
   do
     {
       char const *sep = rawmemchr (p, '\n');
-      ptrdiff_t len = sep - p;
+      idx_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
       if (backref && prev < p)
         {
-          ptrdiff_t prevlen = p - prev;
-          while (bufalloc < buflen + prevlen)
-            buf = x2realloc (buf, &bufalloc);
+          idx_t prevlen = p - prev;
+          ptrdiff_t bufshortage = buflen - bufalloc + prevlen;
+          if (0 < bufshortage)
+            buf = xpalloc (buf, &bufalloc, bufshortage, -1, 1);
           memcpy (buf + buflen, prev, prevlen);
           buflen += prevlen;
         }
@@ -240,10 +246,11 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
       /* Ensure room for at least two more patterns.  The extra one is
          for the regex_compile that may be executed after this loop
          exits, and its (unused) slot is patterns[-1] until then.  */
-      while (palloc <= dc->pcount + 1)
+      ptrdiff_t shortage = dc->pcount - palloc + 2;
+      if (0 < shortage)
         {
-          dc->patterns = x2nrealloc (dc->patterns - 1, &palloc,
-                                     sizeof *dc->patterns);
+          dc->patterns = xpalloc (dc->patterns - 1, &palloc, shortage, -1,
+                                  sizeof *dc->patterns);
           dc->patterns++;
         }
 
@@ -271,8 +278,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
     {
       if (pattern < prev)
         {
-          ptrdiff_t prevlen = patlim - prev;
-          buf = xrealloc (buf, buflen + prevlen);
+          idx_t prevlen = patlim - prev;
+          buf = xirealloc (buf, buflen + prevlen);
           memcpy (buf + buflen, prev, prevlen);
           buflen += prevlen;
         }
@@ -298,11 +305,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
       static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
       static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
       int bk = !(syntax_bits & RE_NO_BK_PARENS);
-      char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
+      idx_t bracket_bytes = sizeof word_beg_bk - 1 + sizeof word_end_bk;
+      char *n = ximalloc (size + bracket_bytes);
 
       strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
                              : (bk ? word_beg_bk : word_beg_no_bk));
-      size_t total = strlen (n);
+      idx_t total = strlen (n);
       memcpy (n + total, pattern, size);
       total += size;
       strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
@@ -338,16 +346,16 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
   return dc;
 }
 
-size_t
-EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
            char const *start_ptr)
 {
   char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
   char eol = eolbyte;
   regoff_t start;
-  size_t len, best_len;
+  idx_t len, best_len;
   struct kwsmatch kwsm;
-  size_t i;
+  idx_t i;
   struct dfa_comp *dc = vdc;
   struct dfa *superset = dfasuperset (dc->dfa);
   bool dfafast = dfaisfast (dc->dfa);
@@ -362,7 +370,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
       if (!start_ptr)
         {
           char const *next_beg, *dfa_beg = beg;
-          ptrdiff_t count = 0;
+          idx_t count = 0;
           bool exact_kwset_match = false;
           bool backref = false;
 
@@ -584,7 +592,6 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
  success:
   len = end - beg;
  success_in_len:;
-  size_t off = beg - buf;
   *match_size = len;
-  return off;
+  return beg - buf;
 }
diff --git a/src/grep.c b/src/grep.c
index 3569375..a55194c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -88,13 +88,13 @@ struct patloc
   {
     /* Line number of the pattern in PATTERN_ARRAY.  Line numbers
        start at 0, and each pattern is terminated by '\n'.  */
-    ptrdiff_t lineno;
+    idx_t lineno;
 
     /* Input location of the pattern.  The FILENAME "-" represents
        standard input, and "" represents the command line.  FILELINE is
        origin-1 for files and is irrelevant for the command line.  */
     char const *filename;
-    ptrdiff_t fileline;
+    idx_t fileline;
   };
 
 /* The array of pattern locations.  The concatenation of all patterns
@@ -108,13 +108,13 @@ struct patloc
    removed patterns not at a file start or end requires another
    PATLOC entry for the first non-removed pattern.  */
 static struct patloc *patloc;
-static size_t patlocs_allocated, patlocs_used;
+static idx_t patlocs_allocated, patlocs_used;
 
 /* Pointer to the array of patterns, each terminated by newline.  */
 static char *pattern_array;
 
 /* The number of unique patterns seen so far.  */
-static size_t n_patterns;
+static idx_t n_patterns;
 
 /* Hash table of patterns seen so far.  */
 static Hash_table *pattern_table;
@@ -160,16 +160,16 @@ compare_patterns (void const *a, void const *b)
    sequence of patterns with no duplicates; SIZE is the total number
    of bytes in KEYS.  If some patterns past the first DUPFREE_SIZE
    bytes are not duplicates, update PATLOCS accordingly.  */
-static ptrdiff_t
-update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
+static idx_t
+update_patterns (char *keys, idx_t dupfree_size, idx_t size,
                  char const *filename)
 {
   char *dst = keys + dupfree_size;
-  ptrdiff_t fileline = 1;
+  idx_t fileline = 1;
   int prev_inserted = 0;
 
   char const *srclim = keys + size;
-  ptrdiff_t patsize;
+  idx_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
       char const *patend = rawmemchr (src, '\n');
@@ -190,8 +190,8 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
           if (!prev_inserted)
             {
               if (patlocs_used == patlocs_allocated)
-                patloc = x2nrealloc (patloc, &patlocs_allocated,
-                                     sizeof *patloc);
+                patloc = xpalloc (patloc, &patlocs_allocated, 1, -1,
+                                  sizeof *patloc);
               patloc[patlocs_used++]
                 = (struct patloc) { .lineno = n_patterns,
                                     .filename = filename,
@@ -213,9 +213,9 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
    Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
    or to an unspecified value if PATTERN came from the command line.  */
 char const * _GL_ATTRIBUTE_PURE
-pattern_file_name (size_t lineno, size_t *new_lineno)
+pattern_file_name (idx_t lineno, idx_t *new_lineno)
 {
-  ptrdiff_t i;
+  idx_t i;
   for (i = 1; i < patlocs_used; i++)
     if (lineno < patloc[i].lineno)
       break;
@@ -227,7 +227,7 @@ pattern_file_name (size_t lineno, size_t *new_lineno)
 /* Record the starting address and length of the sole poisoned region,
    so that we can unpoison it later, just before each following read.  */
 static void const *poison_buf;
-static size_t poison_len;
+static idx_t poison_len;
 
 static void
 clear_asan_poison (void)
@@ -237,7 +237,7 @@ clear_asan_poison (void)
 }
 
 static void
-asan_poison (void const *addr, size_t size)
+asan_poison (void const *addr, idx_t size)
 {
   poison_buf = addr;
   poison_len = size;
@@ -246,7 +246,7 @@ asan_poison (void const *addr, size_t size)
 }
 #else
 static void clear_asan_poison (void) { }
-static void asan_poison (void const volatile *addr, size_t size) { }
+static void asan_poison (void const volatile *addr, idx_t size) { }
 #endif
 
 /* The group separator used when context is requested. */
@@ -467,7 +467,7 @@ printf_errno (char const *format, ...)
 }
 
 static void
-fwrite_errno (void const *ptr, size_t size, size_t nmemb)
+fwrite_errno (void const *ptr, idx_t size, idx_t nmemb)
 {
   if (fwrite (ptr, size, nmemb, stdout) != nmemb)
     stdout_errno = errno;
@@ -644,9 +644,9 @@ static bool seek_failed;
 static bool seek_data_failed;
 
 /* Functions we'll use to search. */
-typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
-typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
-                                char const *);
+typedef void *(*compile_fp_t) (char *, idx_t, reg_syntax_t, bool);
+typedef ptrdiff_t (*execute_fp_t) (void *, char const *, idx_t, idx_t *,
+                                   char const *);
 static execute_fp_t execute;
 static void *compiled_pattern;
 
@@ -694,6 +694,7 @@ clean_up_stdout (void)
 /* An unsigned type suitable for fast matching.  */
 typedef uintmax_t uword;
 static uword const uword_max = UINTMAX_MAX;
+enum { uword_size = sizeof (uword) }; /* For when a signed size is wanted.  */
 
 struct localeinfo localeinfo;
 
@@ -742,7 +743,7 @@ skip_easy_bytes (char const *buf)
      the buffer end, but that's benign.  */
   char const *p;
   uword const *s;
-  for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
+  for (p = buf; (uintptr_t) p % uword_size != 0; p++)
     if (to_uchar (*p) & unibyte_mask)
       return p;
   for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
@@ -753,22 +754,22 @@ skip_easy_bytes (char const *buf)
 }
 
 /* Return true if BUF, of size SIZE, has an encoding error.
-   BUF must be followed by at least sizeof (uword) bytes,
+   BUF must be followed by at least uword_size bytes,
    the first of which may be modified.  */
 static bool
-buf_has_encoding_errors (char *buf, size_t size)
+buf_has_encoding_errors (char *buf, idx_t size)
 {
   if (! unibyte_mask)
     return false;
 
   mbstate_t mbs = { 0 };
-  size_t clen;
+  ptrdiff_t clen;
 
   buf[size] = -1;
   for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
     {
-      clen = mbrlen (p, buf + size - p, &mbs);
-      if (MB_LEN_MAX < clen)
+      clen = imbrlen (p, buf + size - p, &mbs);
+      if (clen < 0)
         return true;
     }
 
@@ -780,7 +781,7 @@ buf_has_encoding_errors (char *buf, size_t size)
    BUF must be followed by at least one byte,
    which may be arbitrarily written to or read from.  */
 static bool
-buf_has_nulls (char *buf, size_t size)
+buf_has_nulls (char *buf, idx_t size)
 {
   buf[size] = 0;
   return strlen (buf) != size;
@@ -790,7 +791,7 @@ buf_has_nulls (char *buf, size_t size)
    SIZE bytes have already been read from the file
    with descriptor FD and status ST.  */
 static bool
-file_must_have_nulls (size_t size, int fd, struct stat const *st)
+file_must_have_nulls (idx_t size, int fd, struct stat const *st)
 {
   /* If the file has holes, it must contain a null byte somewhere.  */
   if (SEEK_HOLE != SEEK_SET && !seek_failed
@@ -869,18 +870,18 @@ skipped_file (char const *name, bool command_line, bool 
is_dir)
    page size, unless a read yields a partial page.  */
 
 static char *buffer;           /* Base of buffer. */
-static size_t bufalloc;                /* Allocated buffer size, counting 
slop. */
+static idx_t bufalloc;         /* Allocated buffer size, counting slop. */
 static int bufdesc;            /* File descriptor. */
 static char *bufbeg;           /* Beginning of user-visible stuff. */
 static char *buflim;           /* Limit of user-visible stuff. */
-static size_t pagesize;                /* alignment of memory pages */
+static idx_t pagesize;         /* alignment of memory pages */
 static off_t bufoffset;                /* Read offset.  */
 static off_t after_last_match; /* Pointer after last matching line that
                                    would have been output if we were
                                    outputting characters. */
 static bool skip_nuls;         /* Skip '\0' in data.  */
 static bool skip_empty_lines;  /* Skip empty lines in data.  */
-static uintmax_t totalnl;      /* Total newline count before lastnl. */
+static intmax_t totalnl;       /* Total newline count before lastnl. */
 
 /* Initial buffer size, not counting slop. */
 enum { INITIAL_BUFSIZE = 96 * 1024 };
@@ -894,18 +895,18 @@ enum { INITIAL_BUFSIZE = 96 * 1024 };
 
 /* Add two numbers that count input bytes or lines, and report an
    error if the addition overflows.  */
-static uintmax_t
-add_count (uintmax_t a, uintmax_t b)
+static intmax_t
+add_count (intmax_t a, idx_t b)
 {
-  uintmax_t sum = a + b;
-  if (sum < a)
+  intmax_t sum;
+  if (!INT_ADD_OK (a, b, &sum))
     die (EXIT_TROUBLE, 0, _("input is too large to count"));
   return sum;
 }
 
 /* Return true if BUF (of size SIZE) is all zeros.  */
 static bool
-all_zeros (char const *buf, size_t size)
+all_zeros (char const *buf, idx_t size)
 {
   for (char const *p = buf; p < buf + size; p++)
     if (*p)
@@ -944,55 +945,55 @@ reset (int fd, struct stat const *st)
    to the beginning of the buffer contents, and 'buflim'
    points just after the end.  Return false if there's an error.  */
 static bool
-fillbuf (size_t save, struct stat const *st)
+fillbuf (idx_t save, struct stat const *st)
 {
-  size_t fillsize;
-  bool cc = true;
   char *readbuf;
-  size_t readsize;
 
-  if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
+  /* After BUFLIM, we need room for at least a page of data plus a
+     trailing uword.  */
+  idx_t min_after_buflim = pagesize + uword_size;
+
+  if (min_after_buflim <= buffer + bufalloc - buflim)
     readbuf = buflim;
   else
     {
-      size_t minsize = save + pagesize;
-      size_t newsize;
-      size_t newalloc;
       char *newbuf;
 
-      /* Grow newsize until it is at least as great as minsize.  */
-      for (newsize = bufalloc - pagesize - sizeof (uword);
-           newsize < minsize;
-           newsize *= 2)
-        if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
-          xalloc_die ();
-
-      /* Try not to allocate more memory than the file size indicates,
-         as that might cause unnecessary memory exhaustion if the file
-         is large.  However, do not use the original file size as a
-         heuristic if we've already read past the file end, as most
-         likely the file is growing.  */
-      if (usable_st_size (st))
-        {
-          off_t to_be_read = st->st_size - bufoffset;
-          off_t maxsize_off = save + to_be_read;
-          if (0 <= to_be_read && to_be_read <= maxsize_off
-              && maxsize_off == (size_t) maxsize_off
-              && minsize <= (size_t) maxsize_off
-              && (size_t) maxsize_off < newsize)
-            newsize = maxsize_off;
-        }
+      /* For data to be searched we need room for the saved bytes,
+         plus at least a page of data to read.  */
+      idx_t minsize = save + pagesize;
 
       /* Add enough room so that the buffer is aligned and has room
          for byte sentinels fore and aft, and so that a uword can
          be read aft.  */
-      newalloc = newsize + pagesize + sizeof (uword);
+      ptrdiff_t incr_min = minsize - bufalloc + min_after_buflim;
+
+      if (incr_min <= 0)
+        newbuf = buffer;
+      else
+        {
+          /* Try not to allocate more memory than the file size indicates,
+             as that might cause unnecessary memory exhaustion if the file
+             is large.  However, do not use the original file size as a
+             heuristic if we've already read past the file end, as most
+             likely the file is growing.  */
+          ptrdiff_t alloc_max = -1;
+          if (usable_st_size (st))
+            {
+              off_t to_be_read = st->st_size - bufoffset;
+              ptrdiff_t a;
+              if (0 <= to_be_read
+                  && INT_ADD_OK (to_be_read, save + min_after_buflim, &a))
+                alloc_max = a;
+            }
+
+          newbuf = xpalloc (NULL, &bufalloc, incr_min, alloc_max, 1);
+        }
 
-      newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
       readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
-      size_t moved = save + 1;  /* Move the preceding byte sentinel too.  */
+      idx_t moved = save + 1;  /* Move the preceding byte sentinel too.  */
       memmove (readbuf - moved, buflim - moved, moved);
-      if (newbuf != buffer)
+      if (0 < incr_min)
         {
           free (buffer);
           buffer = newbuf;
@@ -1003,9 +1004,12 @@ fillbuf (size_t save, struct stat const *st)
 
   clear_asan_poison ();
 
-  readsize = buffer + bufalloc - sizeof (uword) - readbuf;
+  idx_t readsize = buffer + bufalloc - uword_size - readbuf;
   readsize -= readsize % pagesize;
 
+  idx_t fillsize;
+  bool cc = true;
+
   while (true)
     {
       fillsize = safe_read (bufdesc, readbuf, readsize);
@@ -1043,12 +1047,11 @@ fillbuf (size_t save, struct stat const *st)
   /* Initialize the following word, because skip_easy_bytes and some
      matchers read (but do not use) those bytes.  This avoids false
      positive reports of these bytes being used uninitialized.  */
-  memset (buflim, 0, sizeof (uword));
+  memset (buflim, 0, uword_size);
 
   /* Mark the part of the buffer not filled by the read or set by
      the above memset call as ASAN-poisoned.  */
-  asan_poison (buflim + sizeof (uword),
-               bufalloc - (buflim - buffer) - sizeof (uword));
+  asan_poison (buflim + uword_size, bufalloc - (buflim - buffer) - uword_size);
 
   return cc;
 }
@@ -1089,7 +1092,7 @@ static char *label = NULL;      /* Fake filename for 
stdin */
 
 
 /* Internal variables to keep track of byte count, context, etc. */
-static uintmax_t totalcc;      /* Total character count before bufbeg. */
+static intmax_t totalcc;       /* Total character count before bufbeg. */
 static char const *lastnl;     /* Pointer after last newline counted. */
 static char *lastout;          /* Pointer after last character output;
                                    NULL if no character has been output
@@ -1105,7 +1108,7 @@ static bool binary;               /* Use binary rather 
than text I/O.  */
 static void
 nlscan (char const *lim)
 {
-  size_t newlines = 0;
+  idx_t newlines = 0;
   for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
@@ -1137,16 +1140,16 @@ print_sep (char sep)
 
 /* Print a line number or a byte offset.  */
 static void
-print_offset (uintmax_t pos, const char *color)
+print_offset (intmax_t pos, const char *color)
 {
   pr_sgr_start_if (color);
-  printf_errno ("%*"PRIuMAX, offset_width, pos);
+  printf_errno ("%*"PRIdMAX, offset_width, pos);
   pr_sgr_end_if (color);
 }
 
 /* Print a whole line head (filename, line, byte).  The output data
    starts at BEG and contains LEN bytes; it is followed by at least
-   sizeof (uword) bytes, the first of which may be temporarily modified.
+   uword_size bytes, the first of which may be temporarily modified.
    The output data comes from what is perhaps a larger input line that
    goes until LIM, where LIM[-1] is an end-of-line byte.  Use SEP as
    the separator on output.
@@ -1154,7 +1157,7 @@ print_offset (uintmax_t pos, const char *color)
    Return true unless the line was suppressed due to an encoding error.  */
 
 static bool
-print_line_head (char *beg, size_t len, char const *lim, char sep)
+print_line_head (char *beg, idx_t len, char const *lim, char sep)
 {
   if (binary_files != TEXT_BINARY_FILES)
     {
@@ -1191,7 +1194,7 @@ print_line_head (char *beg, size_t len, char const *lim, 
char sep)
 
   if (out_byte)
     {
-      uintmax_t pos = add_count (totalcc, beg - bufbeg);
+      intmax_t pos = add_count (totalcc, beg - bufbeg);
       print_offset (pos, byte_num_color);
       print_sep (sep);
     }
@@ -1206,16 +1209,16 @@ static char *
 print_line_middle (char *beg, char *lim,
                    const char *line_color, const char *match_color)
 {
-  size_t match_size;
-  size_t match_offset;
+  idx_t match_size;
+  ptrdiff_t match_offset;
   char *cur;
   char *mid = NULL;
   char *b;
 
   for (cur = beg;
        (cur < lim
-        && ((match_offset = execute (compiled_pattern, beg, lim - beg,
-                                     &match_size, cur)) != (size_t) -1));
+        && 0 <= (match_offset = execute (compiled_pattern, beg, lim - beg,
+                                         &match_size, cur)));
        cur = b + match_size)
     {
       b = beg + match_offset;
@@ -1273,8 +1276,8 @@ print_line_middle (char *beg, char *lim,
 static char *
 print_line_tail (char *beg, const char *lim, const char *line_color)
 {
-  size_t eol_size;
-  size_t tail_size;
+  idx_t eol_size;
+  idx_t tail_size;
 
   eol_size   = (lim > beg && lim[-1] == eolbyte);
   eol_size  += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
@@ -1462,10 +1465,10 @@ grepbuf (char *beg, char const *lim)
 
   for (char *p = beg; p < lim; p = endp)
     {
-      size_t match_size;
-      size_t match_offset = execute (compiled_pattern, p, lim - p,
-                                     &match_size, NULL);
-      if (match_offset == (size_t) -1)
+      idx_t match_size;
+      ptrdiff_t match_offset = execute (compiled_pattern, p, lim - p,
+                                        &match_size, NULL);
+      if (match_offset < 0)
         {
           if (!out_invert)
             break;
@@ -1500,7 +1503,7 @@ static intmax_t
 grep (int fd, struct stat const *st, bool *ineof)
 {
   intmax_t nlines, i;
-  size_t residue, save;
+  idx_t residue, save;
   char oldc;
   char *beg;
   char *lim;
@@ -1540,8 +1543,8 @@ grep (int fd, struct stat const *st, bool *ineof)
   if (align_tabs)
     {
       /* Width is log of maximum number.  Line numbers are origin-1.  */
-      uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
-      num += out_line && num < UINTMAX_MAX;
+      intmax_t num = usable_st_size (st) ? st->st_size : INTMAX_MAX;
+      num += out_line && num < INTMAX_MAX;
       do
         offset_width++;
       while ((num /= 10) != 0);
@@ -2231,15 +2234,15 @@ parse_grep_colors (void)
 
 /* Return true if PAT (of length PATLEN) contains an encoding error.  */
 static bool
-contains_encoding_error (char const *pat, size_t patlen)
+contains_encoding_error (char const *pat, idx_t patlen)
 {
   mbstate_t mbs = { 0 };
-  size_t charlen;
+  ptrdiff_t charlen;
 
-  for (size_t i = 0; i < patlen; i += charlen)
+  for (idx_t i = 0; i < patlen; i += charlen)
     {
       charlen = mb_clen (pat + i, patlen - i, &mbs);
-      if (MB_LEN_MAX < charlen)
+      if (charlen < 0)
         return true;
     }
   return false;
@@ -2279,8 +2282,8 @@ setup_ok_fold (void)
    Fcompile cannot handle it.  MBS is the multibyte conversion state.
    PATLEN must be nonzero.  */
 
-static int
-fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
+static ptrdiff_t
+fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
 {
   unsigned char pat0 = pat[0];
 
@@ -2302,7 +2305,7 @@ fgrep_icase_charlen (char const *pat, size_t patlen, 
mbstate_t *mbs)
   wchar_t folded[CASE_FOLDED_BUFSIZE];
   if (case_folded_counterparts (wc, folded))
     return -1;
-  for (int i = wn; 0 < --i; )
+  for (idx_t i = wn; 0 < --i; )
     {
       unsigned char c = pat[i];
       if (toupper (c) != c)
@@ -2317,11 +2320,11 @@ fgrep_icase_charlen (char const *pat, size_t patlen, 
mbstate_t *mbs)
    and so can be processed by Fcompile.  */
 
 static bool
-fgrep_icase_available (char const *pat, size_t patlen)
+fgrep_icase_available (char const *pat, idx_t patlen)
 {
   mbstate_t mbs = {0,};
 
-  for (size_t i = 0; i < patlen; )
+  for (idx_t i = 0; i < patlen; )
     {
       int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
       if (n < 0)
@@ -2335,28 +2338,27 @@ fgrep_icase_available (char const *pat, size_t patlen)
 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style.  */
 
 void
-fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
+fgrep_to_grep_pattern (char **keys_p, idx_t *len_p)
 {
-  size_t len = *len_p;
+  idx_t len = *len_p;
   char *keys = *keys_p;
   mbstate_t mb_state = { 0 };
   char *new_keys = xnmalloc (len + 1, 2);
   char *p = new_keys;
-  size_t n;
 
-  for (; len; keys += n, len -= n)
+  for (ptrdiff_t n; len; keys += n, len -= n)
     {
       n = mb_clen (keys, len, &mb_state);
       switch (n)
         {
-        case (size_t) -2:
+        case -2:
           n = len;
           FALLTHROUGH;
         default:
           p = mempcpy (p, keys, n);
           break;
 
-        case (size_t) -1:
+        case -1:
           memset (&mb_state, 0, sizeof mb_state);
           n = 1;
           FALLTHROUGH;
@@ -2385,11 +2387,11 @@ fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
    to the -F pattern "a".  */
 
 static int
-try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
+try_fgrep_pattern (int matcher, char *keys, idx_t *len_p)
 {
   int result = matcher;
-  size_t len = *len_p;
-  char *new_keys = xmalloc (len + 1);
+  idx_t len = *len_p;
+  char *new_keys = ximalloc (len + 1);
   char *p = new_keys;
   char const *q = keys;
   mbstate_t mb_state = { 0 };
@@ -2434,26 +2436,14 @@ try_fgrep_pattern (int matcher, char *keys, size_t 
*len_p)
           break;
         }
 
-      {
-        size_t n;
-        if (match_icase)
-          {
-            int ni = fgrep_icase_charlen (q, len, &mb_state);
-            if (ni < 0)
-              goto fail;
-            n = ni;
-          }
-        else
-          {
-            n = mb_clen (q, len, &mb_state);
-            if (MB_LEN_MAX < n)
-              goto fail;
-          }
-
-        p = mempcpy (p, q, n);
-        q += n;
-        len -= n;
-      }
+      ptrdiff_t clen = (match_icase
+                        ? fgrep_icase_charlen (q, len, &mb_state)
+                        : mb_clen (q, len, &mb_state));
+      if (clen < 0)
+        goto fail;
+      p = mempcpy (p, q, clen);
+      q += clen;
+      len -= clen;
     }
 
   if (*len_p != p - new_keys)
@@ -2473,7 +2463,7 @@ int
 main (int argc, char **argv)
 {
   char *keys = NULL;
-  size_t keycc = 0, keyalloc = 0;
+  idx_t keycc = 0, keyalloc = 0;
   int matcher = -1;
   int opt;
   int prev_optind, last_recursive;
@@ -2612,12 +2602,10 @@ main (int argc, char **argv)
 
       case 'e':
         {
-          ptrdiff_t cc = strlen (optarg);
-          if (keyalloc < keycc + cc + 1)
-            {
-              keyalloc = keycc + cc + 1;
-              pattern_array = keys = x2realloc (keys, &keyalloc);
-            }
+          idx_t cc = strlen (optarg);
+          ptrdiff_t shortage = keycc - keyalloc + cc + 1;
+          if (0 < shortage)
+            pattern_array = keys = xpalloc (keys, &keyalloc, shortage, -1, 1);
           char *keyend = mempcpy (keys + keycc, optarg, cc);
           *keyend = '\n';
           keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
@@ -2638,11 +2626,13 @@ main (int argc, char **argv)
               if (!fp)
                 die (EXIT_TROUBLE, errno, "%s", optarg);
             }
-          ptrdiff_t newkeycc = keycc, cc;
+          idx_t newkeycc = keycc, cc;
           for (;; newkeycc += cc)
             {
-              if (keyalloc <= newkeycc + 1)
-                pattern_array = keys = x2realloc (keys, &keyalloc);
+              ptrdiff_t shortage = newkeycc - keyalloc + 2;
+              if (0 < shortage)
+                pattern_array = keys = xpalloc (keys, &keyalloc,
+                                                shortage, -1, 1);
               cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
               if (cc == 0)
                 break;
@@ -2861,7 +2851,7 @@ main (int argc, char **argv)
     {
       /* Make a copy so that it can be reallocated or freed later.  */
       pattern_array = keys = xstrdup (argv[optind++]);
-      ptrdiff_t patlen = strlen (keys);
+      idx_t patlen = strlen (keys);
       keys[patlen] = '\n';
       keycc = update_patterns (keys, 0, patlen + 1, "");
     }
@@ -2968,7 +2958,7 @@ main (int argc, char **argv)
                                only_matching | color_option);
   /* We need one byte prior and one after.  */
   char eolbytes[3] = { 0, eolbyte, 0 };
-  size_t match_size;
+  idx_t match_size;
   skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
                                 &match_size, NULL) == 0)
                       == out_invert);
@@ -2987,11 +2977,11 @@ main (int argc, char **argv)
 #else
   long psize = getpagesize ();
 #endif
-  if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
+  if (! (0 < psize && psize <= (IDX_MAX - uword_size) / 2))
     abort ();
   pagesize = psize;
-  bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
-  buffer = xmalloc (bufalloc);
+  bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + uword_size;
+  buffer = ximalloc (bufalloc);
 
   if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
     devices = READ_DEVICES;
diff --git a/src/grep.h b/src/grep.h
index a3cd73e..04c15dd 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -21,6 +21,7 @@
 #define GREP_GREP_H 1
 
 #include <stdbool.h>
+#include <idx.h>
 
 /* The following flags are exported from grep for the matchers
    to look at. */
@@ -29,6 +30,6 @@ extern bool match_words;      /* -w */
 extern bool match_lines;       /* -x */
 extern char eolbyte;           /* -z */
 
-extern char const *pattern_file_name (size_t, size_t *);
+extern char const *pattern_file_name (idx_t, idx_t *);
 
 #endif
diff --git a/src/kwsearch.c b/src/kwsearch.c
index ea18ce1..171db9a 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -32,11 +32,11 @@ struct kwsearch
      'kwswords (kwset)' when some extra one-character words have been
      appended, one for each troublesome character that will require a
      DFA search.  */
-  ptrdiff_t words;
+  idx_t words;
 
   /* The user's pattern and its size in bytes.  */
   char *pattern;
-  size_t size;
+  idx_t size;
 
   /* The user's pattern compiled as a regular expression,
      or null if it has not been compiled.  */
@@ -47,11 +47,11 @@ struct kwsearch
    followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
-Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
 {
   kwset_t kwset;
   char *buf = NULL;
-  size_t bufalloc = 0;
+  idx_t bufalloc = 0;
 
   kwset = kwsinit (true);
 
@@ -59,7 +59,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
   do
     {
       char const *sep = rawmemchr (p, '\n');
-      ptrdiff_t len = sep - p;
+      idx_t len = sep - p;
 
       if (match_lines)
         {
@@ -70,8 +70,8 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
               if (bufalloc < len + 2)
                 {
                   free (buf);
-                  bufalloc = len + 2;
-                  buf = x2realloc (NULL, &bufalloc);
+                  bufalloc = len;
+                  buf = xpalloc (NULL, &bufalloc, 2, -1, 1);
                   buf[0] = eolbyte;
                 }
               memcpy (buf + 1, p, len);
@@ -88,7 +88,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
 
   free (buf);
 
-  ptrdiff_t words = kwswords (kwset);
+  idx_t words = kwswords (kwset);
   kwsprep (kwset);
 
   struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch);
@@ -102,14 +102,14 @@ Fcompile (char *pattern, size_t size, reg_syntax_t 
ignored, bool exact)
 
 /* Use the compiled pattern VCP to search the buffer BUF of size SIZE.
    If found, return the offset of the first match and store its
-   size into *MATCH_SIZE.  If not found, return SIZE_MAX.
+   size into *MATCH_SIZE.  If not found, return -1.
    If START_PTR is nonnull, start searching there.  */
-size_t
-Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
           char const *start_ptr)
 {
   char const *beg, *end, *mb_start;
-  ptrdiff_t len;
+  idx_t len;
   char eol = eolbyte;
   struct kwsearch *kwsearch = vcp;
   kwset_t kwset = kwsearch->kwset;
@@ -126,7 +126,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
         break;
       len = kwsmatch.size - 2 * match_lines;
 
-      size_t mbclen = 0;
+      idx_t mbclen = 0;
       if (mb_check
           && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
         {
@@ -198,8 +198,8 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                 else
                   end = buf + size;
 
-                if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
-                    != (size_t) -1)
+                if (0 <= EGexecute (kwsearch->re, beg, end - beg,
+                                    match_size, NULL))
                   goto success_match_words;
                 beg = end - 1;
                 break;
diff --git a/src/kwset.c b/src/kwset.c
index e5ac1a9..329b802 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -59,31 +59,31 @@ struct tree
 struct trie
 {
   /* If an accepting node, this is either 2*W + 1 where W is the word
-     index, or is SIZE_MAX if Aho-Corasick is in use and FAIL
+     index, or is -1 if Aho-Corasick is in use and FAIL
      specifies where to look for more info.  If not an accepting node,
      this is zero.  */
-  size_t accepting;
+  ptrdiff_t accepting;
 
   struct tree *links;          /* Tree of edges leaving this node.  */
   struct trie *parent;         /* Parent of this node.  */
   struct trie *next;           /* List of all trie nodes in level order.  */
   struct trie *fail;           /* Aho-Corasick failure function.  */
-  ptrdiff_t depth;             /* Depth of this node from the root.  */
-  ptrdiff_t shift;             /* Shift function for search failures.  */
-  ptrdiff_t maxshift;          /* Max shift of self and descendants.  */
+  idx_t depth;                 /* Depth of this node from the root.  */
+  idx_t shift;                 /* Shift function for search failures.  */
+  idx_t maxshift;              /* Max shift of self and descendants.  */
 };
 
 /* Structure returned opaquely to the caller, containing everything.  */
 struct kwset
 {
   struct obstack obstack;      /* Obstack for node allocation.  */
-  ptrdiff_t words;             /* Number of words in the trie.  */
+  idx_t words;                 /* Number of words in the trie.  */
   struct trie *trie;           /* The trie itself.  */
-  ptrdiff_t mind;              /* Minimum depth of an accepting node.  */
+  idx_t mind;                  /* Minimum depth of an accepting node.  */
   unsigned char delta[NCHAR];  /* Delta table for rapid search.  */
   struct trie *next[NCHAR];    /* Table of children of the root.  */
   char *target;                        /* Target string if there's only one.  
*/
-  ptrdiff_t *shift;            /* Used in Boyer-Moore search for one
+  idx_t *shift;                        /* Used in Boyer-Moore search for one
                                    string.  */
   char const *trans;           /* Character translation table.  */
 
@@ -108,8 +108,7 @@ struct kwset
   char gc2;
 
   /* kwsexec implementation.  */
-  ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t,
-                        struct kwsmatch *, bool);
+  ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool);
 };
 
 /* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
@@ -119,9 +118,9 @@ tr (char const *trans, char c)
   return trans ? trans[U(c)] : c;
 }
 
-static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t acexec (kwset_t, char const *, idx_t,
                          struct kwsmatch *, bool);
-static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t bmexec (kwset_t, char const *, idx_t,
                          struct kwsmatch *, bool);
 
 /* Return a newly allocated keyword set.  A nonnull TRANS specifies a
@@ -142,7 +141,7 @@ kwsalloc (char const *trans)
   kwset->trie->fail = NULL;
   kwset->trie->depth = 0;
   kwset->trie->shift = 0;
-  kwset->mind = PTRDIFF_MAX;
+  kwset->mind = IDX_MAX;
   kwset->target = NULL;
   kwset->trans = trans;
   kwset->kwsexec = acexec;
@@ -156,7 +155,7 @@ enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
 
 /* Add the given string to the contents of the keyword set.  */
 void
-kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
+kwsincr (kwset_t kwset, char const *text, idx_t len)
 {
   assume (0 <= len);
   struct trie *trie = kwset->trie;
@@ -181,7 +180,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
       enum { L, R } dirs[DEPTH_SIZE];
       links[0] = (struct tree *) &trie->links;
       dirs[0] = L;
-      ptrdiff_t depth = 1;
+      idx_t depth = 1;
 
       while (cur && label != cur->label)
         {
@@ -292,10 +291,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
   /* Mark the node finally reached as accepting, encoding the
      index number of this word in the keyword set so far.  */
   if (!trie->accepting)
-    {
-      size_t words = kwset->words;
-      trie->accepting = 2 * words + 1;
-    }
+    trie->accepting = 2 * kwset->words + 1;
   ++kwset->words;
 
   /* Keep track of the longest and shortest string of the keyword set.  */
@@ -303,7 +299,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
     kwset->mind = trie->depth;
 }
 
-ptrdiff_t
+idx_t
 kwswords (kwset_t kwset)
 {
   return kwset->words;
@@ -350,7 +346,7 @@ treefails (struct tree const *tree, struct trie const *fail,
         {
           tree->trie->fail = cur->trie;
           if (!reverse && cur->trie->accepting && !tree->trie->accepting)
-            tree->trie->accepting = SIZE_MAX;
+            tree->trie->accepting = -1;
           return;
         }
       fail = fail->fail;
@@ -362,7 +358,7 @@ treefails (struct tree const *tree, struct trie const *fail,
 /* Set delta entries for the links of the given tree such that
    the preexisting delta value is larger than the current depth.  */
 static void
-treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[])
+treedelta (struct tree const *tree, idx_t depth, unsigned char delta[])
 {
   if (!tree)
     return;
@@ -407,7 +403,6 @@ void
 kwsprep (kwset_t kwset)
 {
   char const *trans = kwset->trans;
-  ptrdiff_t i;
   unsigned char deltabuf[NCHAR];
   unsigned char *delta = trans ? deltabuf : kwset->delta;
   struct trie *curr, *last;
@@ -425,7 +420,8 @@ kwsprep (kwset_t kwset)
 
       /* Looking for just one string.  Extract it from the trie.  */
       kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
-      for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
+      curr = kwset->trie;
+      for (idx_t i = 0; i < kwset->mind; i++)
         {
           kwset->target[i] = curr->links->label;
           curr = curr->next;
@@ -504,7 +500,7 @@ kwsprep (kwset_t kwset)
   treenext (kwset->trie->links, next);
   int gc1 = -2;
   int gc1help = -1;
-  for (i = 0; i < NCHAR; i++)
+  for (int i = 0; i < NCHAR; i++)
     {
       int ti = i;
       if (trans)
@@ -534,9 +530,10 @@ kwsprep (kwset_t kwset)
     {
       /* Looking for just one string.  Extract it from the trie.  */
       kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
-      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+      curr = kwset->trie;
+      for (idx_t i = kwset->mind; 0 < i; i--)
         {
-          kwset->target[i] = curr->links->label;
+          kwset->target[i - 1] = curr->links->label;
           curr = curr->next;
         }
 
@@ -547,7 +544,8 @@ kwsprep (kwset_t kwset)
           kwset->shift
             = obstack_alloc (&kwset->obstack,
                              sizeof *kwset->shift * (kwset->mind - 1));
-          for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i)
+          curr = kwset->trie->next;
+          for (idx_t i = 0; i < kwset->mind - 1; i++)
             {
               kwset->shift[i] = curr->shift;
               curr = curr->next;
@@ -560,7 +558,7 @@ kwsprep (kwset_t kwset)
 
   /* Fix things up for any translation table.  */
   if (trans)
-    for (i = 0; i < NCHAR; ++i)
+    for (int i = 0; i < NCHAR; ++i)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
@@ -574,16 +572,16 @@ kwsprep (kwset_t kwset)
    when failing.  KWSET->shift says how much to shift.  */
 static inline bool
 bm_delta2_search (char const **tpp, char const *ep, char const *sp,
-                  ptrdiff_t len,
+                  idx_t len,
                   char const *trans, char gc1, char gc2,
                   unsigned char const *d1, kwset_t kwset)
 {
   char const *tp = *tpp;
-  ptrdiff_t d = len, skip = 0;
+  idx_t d = len, skip = 0;
 
   while (true)
     {
-      ptrdiff_t i = 2;
+      idx_t i = 2;
       if (tr (trans, tp[-2]) == gc2)
         {
           while (++i <= d)
@@ -622,7 +620,7 @@ bm_delta2_search (char const **tpp, char const *ep, char 
const *sp,
    that matches the terminal byte specified by KWSET, or NULL if there
    is no match.  KWSET->gc1 should be nonnegative.  */
 static char const *
-memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
+memchr_kwset (char const *s, idx_t n, kwset_t kwset)
 {
   char const *slim = s + n;
   if (kwset->gc1help < 0)
@@ -634,7 +632,7 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
   else
     {
       int small_heuristic = 2;
-      size_t small_bytes = small_heuristic * sizeof (unsigned long int);
+      idx_t small_bytes = small_heuristic * sizeof (unsigned long int);
       while (s < slim)
         {
           if (kwset->next[U(*s)])
@@ -649,13 +647,13 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
 
 /* Fast Boyer-Moore search (inlinable version).  */
 static inline ptrdiff_t _GL_ATTRIBUTE_PURE
-bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
+bmexec_trans (kwset_t kwset, char const *text, idx_t size)
 {
   assume (0 <= size);
   unsigned char const *d1;
   char const *ep, *sp, *tp;
   int d;
-  ptrdiff_t len = kwset->mind;
+  idx_t len = kwset->mind;
   char const *trans = kwset->trans;
 
   if (len == 0)
@@ -675,8 +673,8 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
size)
   char gc2 = kwset->gc2;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2).  */
-  ptrdiff_t len12;
-  if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size)
+  idx_t len12;
+  if (INT_MULTIPLY_OK (len, 12, &len12) && len12 < size)
     /* 11 is not a bug, the initial offset happens only once.  */
     for (ep = text + size - 11 * len; tp <= ep; )
       {
@@ -735,7 +733,7 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
size)
 
 /* Fast Boyer-Moore search.  */
 static ptrdiff_t
-bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
+bmexec (kwset_t kwset, char const *text, idx_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
   /* Help the compiler inline in two ways, depending on whether
@@ -753,7 +751,7 @@ bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
 /* Hairy multiple string search with the Aho-Corasick algorithm.
    (inlinable version)  */
 static inline ptrdiff_t
-acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
+acexec_trans (kwset_t kwset, char const *text, idx_t len,
               struct kwsmatch *kwsmatch, bool longest)
 {
   struct trie const *trie, *accept;
@@ -831,7 +829,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
 
  match:
   accept = trie;
-  while (accept->accepting == SIZE_MAX)
+  while (accept->accepting < 0)
     accept = accept->fail;
   left = tp - accept->depth;
 
@@ -858,7 +856,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
           if (trie->accepting)
             {
               accept1 = trie;
-              while (accept1->accepting == SIZE_MAX)
+              while (accept1->accepting < 0)
                 accept1 = accept1->fail;
               left1 = tp - accept1->depth;
               if (left1 <= left)
@@ -870,7 +868,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
         }
     }
 
-  kwsmatch->index = accept->accepting / 2;
+  kwsmatch->index = accept->accepting >> 1;
   kwsmatch->offset = left - text;
   kwsmatch->size = accept->depth;
 
@@ -879,7 +877,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
 
 /* Hairy multiple string search with Aho-Corasick algorithm.  */
 static ptrdiff_t
-acexec (kwset_t kwset, char const *text, ptrdiff_t size,
+acexec (kwset_t kwset, char const *text, idx_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
   assume (0 <= size);
@@ -898,7 +896,7 @@ acexec (kwset_t kwset, char const *text, ptrdiff_t size,
    value), and length.  If LONGEST, find the longest match; otherwise
    any match will do.  */
 ptrdiff_t
-kwsexec (kwset_t kwset, char const *text, ptrdiff_t size,
+kwsexec (kwset_t kwset, char const *text, idx_t size,
          struct kwsmatch *kwsmatch, bool longest)
 {
   return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
diff --git a/src/kwset.h b/src/kwset.h
index 24e13e2..cb94cf4 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -22,23 +22,26 @@
 #include <stddef.h>
 #include <stdbool.h>
 
+#include <idx.h>
+
 struct kwsmatch
 {
-  ptrdiff_t index;     /* Index number of matching keyword.  */
-  ptrdiff_t offset;    /* Offset of match.  */
-  ptrdiff_t size;      /* Length of match.  */
+  idx_t index; /* Index number of matching keyword.  */
+  idx_t offset;        /* Offset of match.  */
+  idx_t size;  /* Length of match.  */
 };
 
-#include "arg-nonnull.h"
+#include <arg-nonnull.h>
+#include <idx.h>
 
 struct kwset;
 typedef struct kwset *kwset_t;
 
 extern kwset_t kwsalloc (char const *);
-extern void kwsincr (kwset_t, char const *, ptrdiff_t);
-extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
+extern void kwsincr (kwset_t, char const *, idx_t);
+extern idx_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
 extern void kwsprep (kwset_t);
-extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t,
+extern ptrdiff_t kwsexec (kwset_t, char const *, idx_t,
                           struct kwsmatch *, bool)
   _GL_ARG_NONNULL ((4));
 extern void kwsfree (kwset_t);
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 37f7e40..3bdaee9 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -113,7 +113,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
    followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
-Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
 {
   int e;
   char const *ep;
@@ -202,8 +202,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
   return pc;
 }
 
-size_t
-Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
           char const *start_ptr)
 {
   int sub[NSUB];
diff --git a/src/search.h b/src/search.h
index 6a5814a..acc282c 100644
--- a/src/search.h
+++ b/src/search.h
@@ -48,38 +48,55 @@ typedef signed char mb_len_map_t;
 /* searchutils.c */
 extern void wordinit (void);
 extern kwset_t kwsinit (bool);
-extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_prev (char const *, char const *, char const *)
+extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_prev (char const *, char const *, char const *)
   _GL_ATTRIBUTE_PURE;
-extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
-                            char const *);
+extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const 
*);
 
 /* dfasearch.c */
-extern void *GEAcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const 
*);
 
 /* kwsearch.c */
-extern void *Fcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Fcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
 
 /* pcresearch.c */
-extern void *Pcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
 
 /* grep.c */
 extern struct localeinfo localeinfo;
-extern void fgrep_to_grep_pattern (char **, size_t *);
+extern void fgrep_to_grep_pattern (char **, idx_t *);
+
+/* Return the number of bytes in the character at the start of S, which
+   is of size N.  N must be positive.  MBS is the conversion state.
+   This acts like mbrlen, except it returns -1 and -2 instead of
+   (size_t) -1 and (size_t) -2.  */
+SEARCH_INLINE ptrdiff_t
+imbrlen (char const *s, idx_t n, mbstate_t *mbs)
+{
+  size_t len = mbrlen (s, n, mbs);
+
+  /* Convert result to ptrdiff_t portably, even on oddball platforms.
+     When optimizing, this typically uses no machine instructions.  */
+  if (len <= MB_LEN_MAX)
+    return len;
+  ptrdiff_t neglen = -len;
+  return -neglen;
+}
 
 /* Return the number of bytes in the character at the start of S, which
    is of size N.  N must be positive.  MBS is the conversion state.
    This acts like mbrlen, except it returns 1 when mbrlen would return 0,
+   it returns -1 and -2 instead of (size_t) -1 and (size_t) -2,
    and it is typically faster because of the cache.  */
-SEARCH_INLINE size_t
-mb_clen (char const *s, size_t n, mbstate_t *mbs)
+SEARCH_INLINE ptrdiff_t
+mb_clen (char const *s, idx_t n, mbstate_t *mbs)
 {
   signed char len = localeinfo.sbclen[to_uchar (*s)];
-  return len == -2 ? mbrlen (s, n, mbs) : len;
+  return len == -2 ? imbrlen (s, n, mbs) : len;
 }
 
 extern char const *input_filename (void);
diff --git a/src/searchutils.c b/src/searchutils.c
index 0080dd7..ebc4a11 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -47,7 +47,7 @@ kwsinit (bool mb_trans)
 
   if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
     {
-      trans = xmalloc (NCHAR);
+      trans = ximalloc (NCHAR);
       /* If I is a single-byte character that becomes a different
          single-byte character when uppercased, set trans[I]
          to that character.  Otherwise, set trans[I] to I.  */
@@ -88,7 +88,7 @@ kwsinit (bool mb_trans)
 
    Treat encoding errors as if they were single-byte characters.  */
 ptrdiff_t
-mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
            char const *end)
 {
   const char *p = *mb_start;
@@ -114,8 +114,8 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
               if (long_enough)
                 {
                   mbstate_t mbs = { 0 };
-                  size_t clen = mbrlen (cur - i, end - (cur - i), &mbs);
-                  if (clen <= MB_LEN_MAX)
+                  ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
+                  if (0 <= clen)
                     {
                       /* This multibyte character contains *CUR.  */
                       p0 = cur - i;
@@ -130,13 +130,13 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
       /* In non-UTF-8 encodings, to find character boundaries one must
          in general scan forward from the start of the buffer.  */
       mbstate_t mbs = { 0 };
-      size_t clen;
+      ptrdiff_t clen;
 
       do
         {
           clen = mb_clen (p, end - p, &mbs);
 
-          if (MB_LEN_MAX < clen)
+          if (clen < 0)
             {
               /* An invalid sequence, or a truncated multibyte character.
                  Treat it as a single byte character.  */
@@ -159,10 +159,10 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
 /* Examine the start of BUF (which goes to END) for word constituents.
    If COUNTALL, examine as many as possible; otherwise, examine at most one.
    Return the total number of bytes in the examined characters.  */
-static size_t
+static idx_t
 wordchars_count (char const *buf, char const *end, bool countall)
 {
-  size_t n = 0;
+  idx_t n = 0;
   mbstate_t mbs = { 0 };
   while (n < end - buf)
     {
@@ -188,7 +188,7 @@ wordchars_count (char const *buf, char const *end, bool 
countall)
 /* Examine the start of BUF for the longest prefix containing just
    word constituents.  Return the total number of bytes in the prefix.
    The buffer ends at END.  */
-size_t
+idx_t
 wordchars_size (char const *buf, char const *end)
 {
   return wordchars_count (buf, end, true);
@@ -196,7 +196,7 @@ wordchars_size (char const *buf, char const *end)
 
 /* If BUF starts with a word constituent, return the number of bytes
    used to represent it; otherwise, return zero.  The buffer ends at END.  */
-size_t
+idx_t
 wordchar_next (char const *buf, char const *end)
 {
   return wordchars_count (buf, end, false);
@@ -205,7 +205,7 @@ wordchar_next (char const *buf, char const *end)
 /* In the buffer BUF, return nonzero if the character whose encoding
    contains the byte before CUR is a word constituent.  The buffer
    ends at END.  */
-size_t
+idx_t
 wordchar_prev (char const *buf, char const *cur, char const *end)
 {
   if (buf == cur)
-- 
2.31.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]