m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

more buffer vs. byte speedups


From: Eric Blake
Subject: more buffer vs. byte speedups
Date: Wed, 18 Feb 2009 18:54:18 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

I'm applying these patches to all three active branches.  The first fixes a 
potential bug in undivert due to a stack allocation of a 16k buffer; on some 
systems, a stack overflow that overshoots a 4k guard page can lead to silent 
termination of the program without invoking the SIGSEGV fault handler, or other 
equally bad behavior.

Next, I made esyscmd dump directly into the obstack via fread, rather than 
repeatedly calling fgetc.  I also added detection for (unlikely) read errors 
during esyscmd.

Finally, I found more places that could use memchr instead of byte-wise 
searches, although the relative frequency of regexp (\), patsubst (\), and 
format (%) don't give as much noticeable speedup as today's earlier patch to 
macro expansion ($).

>From 01f216c3c98d41db7878b18c93d23e8dc38d8643 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:25:01 -0700
Subject: [PATCH 1/3] Avoid risk of stack overflow.

* src/output.c (insert_file): Avoid stack allocation of large
buffer.
(freeze_diversions): Avoid spurious semicolon.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog    |    7 +++++++
 src/output.c |   14 ++++++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0176fe4..cb35601 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2009-02-18  Eric Blake  <address@hidden>
+
+       Avoid risk of stack overflow.
+       * src/output.c (insert_file): Avoid stack allocation of large
+       buffer.
+       (freeze_diversions): Avoid spurious semicolon.
+
 2009-02-16  Eric Blake  <address@hidden>

        Avoid test failure due to different errno.
diff --git a/src/output.c b/src/output.c
index 0586b89..a39d1e5 100644
--- a/src/output.c
+++ b/src/output.c
@@ -1,7 +1,7 @@
 /* GNU m4 -- A simple macro processor

    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006,
-   2007, 2008 Free Software Foundation, Inc.
+   2007, 2008, 2009 Free Software Foundation, Inc.

    This file is part of GNU M4.

@@ -812,21 +812,19 @@ make_diversion (int divnum)
 void
 insert_file (FILE *file)
 {
-  char buffer[COPY_BUFFER_SIZE];
+  static char buffer[COPY_BUFFER_SIZE];
   size_t length;

   /* Optimize out inserting into a sink.  */
-
   if (!output_diversion)
     return;

   /* Insert output by big chunks.  */
-
-  for (;;)
+  while (1)
     {
-      length = fread (buffer, 1, COPY_BUFFER_SIZE, file);
+      length = fread (buffer, 1, sizeof buffer, file);
       if (ferror (file))
-       M4ERROR ((EXIT_FAILURE, errno, "ERROR: reading inserted file"));
+       M4ERROR ((EXIT_FAILURE, errno, "error reading inserted file"));
       if (length == 0)
        break;
       output_text (buffer, length);
@@ -982,7 +980,7 @@ freeze_diversions (FILE *file)
   iter = gl_oset_iterator (diversion_table);
   while (gl_oset_iterator_next (&iter, &elt))
     {
-      m4_diversion *diversion = (m4_diversion *) elt;;
+      m4_diversion *diversion = (m4_diversion *) elt;
       if (diversion->size || diversion->used)
        {
          if (diversion->size)
-- 
1.6.1.2


>From 1974da270a84d8c0cf2b3e3e08ce5292fdb211f3 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:38:54 -0700
Subject: [PATCH 2/3] Speed up esyscmd with buffer reads.

* src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
rather than repeatedly reading bytes.  Detect read errors.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog     |    4 ++++
 src/builtin.c |   24 ++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index cb35601..0a6c47e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2009-02-18  Eric Blake  <address@hidden>

+       Speed up esyscmd with buffer reads.
+       * src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
+       rather than repeatedly reading bytes.  Detect read errors.
+
        Avoid risk of stack overflow.
        * src/output.c (insert_file): Avoid stack allocation of large
        buffer.
diff --git a/src/builtin.c b/src/builtin.c
index 50f10cd..4a89fcc 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -1,7 +1,7 @@
 /* GNU m4 -- A simple macro processor

    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006,
-   2007, 2008 Free Software Foundation, Inc.
+   2007, 2008, 2009 Free Software Foundation, Inc.

    This file is part of GNU M4.

@@ -999,7 +999,6 @@ static void
 m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
 {
   FILE *pin;
-  int ch;

   if (bad_argc (argv[0], argc, 2, 2))
     {
@@ -1019,8 +1018,25 @@ m4_esyscmd (struct obstack *obs, int argc, token_data 
**argv)
     }
   else
     {
-      while ((ch = getc (pin)) != EOF)
-       obstack_1grow (obs, (char) ch);
+      while (1)
+       {
+         size_t avail = obstack_room (obs);
+         size_t len;
+         if (!avail)
+           {
+             int ch = getc (pin);
+             if (ch == EOF)
+               break;
+             obstack_1grow (obs, ch);
+             continue;
+           }
+         len = fread (obstack_next_free (obs), 1, avail, pin);
+         if (len <= 0)
+           break;
+         obstack_blank_fast (obs, len);
+       }
+      if (ferror (pin))
+       M4ERROR ((EXIT_FAILURE, errno, "cannot read pipe"));
       sysval = pclose (pin);
     }
 }
-- 
1.6.1.2


>From 45a154e6d70517bf1b837715aae2fe366c9c1116 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:03:55 -0700
Subject: [PATCH 3/3] Prefer buffer over byte operations.

* src/format.c (expand_format): Use strchr for speed.
* src/builtin.c (substitute, expand_user_macro): Likewise.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog     |    4 ++++
 src/builtin.c |   38 +++++++++++++++++++++-----------------
 src/format.c  |   16 +++++++++-------
 3 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0a6c47e..3a80a51 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2009-02-18  Eric Blake  <address@hidden>

+       Prefer buffer over byte operations.
+       * src/format.c (expand_format): Use strchr for speed.
+       * src/builtin.c (substitute, expand_user_macro): Likewise.
+
        Speed up esyscmd with buffer reads.
        * src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
        rather than repeatedly reading bytes.  Detect read errors.
diff --git a/src/builtin.c b/src/builtin.c
index 4a89fcc..a89b861 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -1902,17 +1902,18 @@ substitute (struct obstack *obs, const char *victim, 
const char *repl,
            struct re_registers *regs)
 {
   int ch;
-
-  for (;;)
+  while (1)
     {
-      while ((ch = *repl++) != '\\')
+      const char *backslash = strchr (repl, '\\');
+      if (!backslash)
        {
-         if (ch == '\0')
-           return;
-         obstack_1grow (obs, ch);
+         obstack_grow (obs, repl, strlen (repl));
+         return;
        }
-
-      switch ((ch = *repl++))
+      obstack_grow (obs, repl, backslash - repl);
+      repl = backslash;
+      ch = *++repl;
+      switch (ch)
        {
        case '0':
          if (!substitute_warned)
@@ -1926,6 +1927,7 @@ Warning: \\0 will disappear, use \\& instead in 
replacements"));
        case '&':
          obstack_grow (obs, victim + regs->start[0],
                        regs->end[0] - regs->start[0]);
+         repl++;
          break;

        case '1': case '2': case '3': case '4': case '5': case '6':
@@ -1937,6 +1939,7 @@ Warning: \\0 will disappear, use \\& instead in 
replacements"));
          else if (regs->end[ch] > 0)
            obstack_grow (obs, victim + regs->start[ch],
                          regs->end[ch] - regs->start[ch]);
+         repl++;
          break;

        case '\0':
@@ -1946,6 +1949,7 @@ Warning: \\0 will disappear, use \\& instead in 
replacements"));

        default:
          obstack_1grow (obs, ch);
+         repl++;
          break;
        }
     }
@@ -2151,19 +2155,19 @@ void
 expand_user_macro (struct obstack *obs, symbol *sym,
                   int argc, token_data **argv)
 {
-  const char *text;
+  const char *text = SYMBOL_TEXT (sym);
   int i;
-
-  for (text = SYMBOL_TEXT (sym); *text != '\0';)
+  while (1)
     {
-      if (*text != '$')
+      const char *dollar = strchr (text, '$');
+      if (!dollar)
        {
-         obstack_1grow (obs, *text);
-         text++;
-         continue;
+         obstack_grow (obs, text, strlen (text));
+         return;
        }
-      text++;
-      switch (*text)
+      obstack_grow (obs, text, dollar - text);
+      text = dollar;
+      switch (*++text)
        {
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
diff --git a/src/format.c b/src/format.c
index 518394a..a0519a0 100644
--- a/src/format.c
+++ b/src/format.c
@@ -1,7 +1,7 @@
 /* GNU m4 -- A simple macro processor

-   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008
-   Free Software Foundation, Inc.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008,
+   2009 Free Software Foundation, Inc.

    This file is part of GNU M4.

@@ -89,14 +89,16 @@ expand_format (struct obstack *obs, int argc, token_data 
**argv)

   f = fmt = ARG_STR (argc, argv);
   memset (ok, 0, sizeof ok);
-  for (;;)
+  while (1)
     {
-      while ((c = *fmt++) != '%')
+      const char *percent = strchr (fmt, '%');
+      if (!percent)
        {
-         if (c == '\0')
-           return;
-         obstack_1grow (obs, c);
+         obstack_grow (obs, fmt, strlen (fmt));
+         return;
        }
+      obstack_grow (obs, fmt, percent - fmt);
+      fmt = percent + 1;

       if (*fmt == '%')
        {
-- 
1.6.1.2








reply via email to

[Prev in Thread] Current Thread [Next in Thread]