m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

26-gary-changeresyntax.patch


From: Gary V. Vaughan
Subject: 26-gary-changeresyntax.patch
Date: Fri, 07 Jul 2006 12:49:52 +0100
User-agent: Thunderbird 1.5.0.4 (Macintosh/20060530)

Well, it was easier than I thought, so I wrote the patch last night, and
the ChangeLog over lunch today :-)

Okay to commit to HEAD?

Cheers,
        Gary.
-- 
Gary V. Vaughan      ())_.  address@hidden,gnu.org}
Research Scientist   ( '/   http://blog.azazil.net
GNU Hacker           / )=   http://trac.azazil.net/projects/libtool
Technical Author   `(_~)_   http://sources.redhat.com/autobook
Index: m4--devo--0/modules/gnu.c
===================================================================
--- m4--devo--0.orig/modules/gnu.c      2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/modules/gnu.c   2006-07-07 12:28:41.000000000 +0100
@@ -41,34 +41,6 @@
 #  include "m4private.h"
 #endif
 
-#define RE_SYNTAX_BRE RE_SYNTAX_EMACS
-
-#define RE_SYNTAX_ERE \
-  (/* Allow char classes. */                                   \
-    RE_CHAR_CLASSES                                            \
-  /* Anchors are OK in groups. */                              \
-  | RE_CONTEXT_INDEP_ANCHORS                                   \
-  /* Be picky, `/^?/', for instance, makes no sense. */                \
-  | RE_CONTEXT_INVALID_OPS                                     \
-  /* Allow intervals with `{' and `}', forbid invalid ranges. */\
-  | RE_INTERVALS | RE_NO_BK_BRACES | RE_NO_EMPTY_RANGES                \
-  /* `(' and `)' are the grouping operators. */                        \
-  | RE_NO_BK_PARENS                                            \
-  /* `|' is the alternation. */                                        \
-  | RE_NO_BK_VBAR)
-
-#include "format.c"
-
-
-/* The regs_allocated field in an re_pattern_buffer refers to the
-   state of the re_registers struct used in successive matches with
-   the same compiled pattern:  */
-typedef struct {
-  struct re_pattern_buffer pat;        /* compiled regular expression */
-  struct re_registers regs;    /* match registers */
-} m4_pattern_buffer;
-
-
 /* Rename exported symbols for dlpreload()ing.  */
 #define m4_builtin_table       gnu_LTX_m4_builtin_table
 #define m4_macro_table         gnu_LTX_m4_macro_table
@@ -78,22 +50,20 @@
    with their details in a single table for easy maintenance.
 
                function        macros  blind argmin  argmax */
-#define builtin_functions                      \
+#define builtin_functions                                      \
        BUILTIN(__file__,       false,  false,  1,      1  )    \
        BUILTIN(__line__,       false,  false,  1,      1  )    \
        BUILTIN(builtin,        false,  true,   2,      -1 )    \
+       BUILTIN(changeresyntax, false,  true,   1,      2  )    \
        BUILTIN(changesyntax,   false,  true,   1,      -1 )    \
        BUILTIN(debugmode,      false,  false,  1,      2  )    \
        BUILTIN(debugfile,      false,  false,  1,      2  )    \
-       BUILTIN(eregexp,        false,  true,   3,      4  )    \
-       BUILTIN(epatsubst,      false,  true,   3,      4  )    \
-       BUILTIN(erenamesyms,    false,  true,   3,      3  )    \
        BUILTIN(esyscmd,        false,  true,   2,      2  )    \
        BUILTIN(format,         false,  true,   2,      -1 )    \
        BUILTIN(indir,          false,  true,   2,      -1 )    \
-       BUILTIN(patsubst,       false,  true,   3,      4  )    \
-       BUILTIN(regexp,         false,  true,   3,      4  )    \
-       BUILTIN(renamesyms,     false,  true,   3,      3  )    \
+       BUILTIN(patsubst,       false,  true,   3,      5  )    \
+       BUILTIN(regexp,         false,  true,   3,      5  )    \
+       BUILTIN(renamesyms,     false,  true,   3,      4  )    \
        BUILTIN(symbols,        false,  false,  0,      -1 )    \
        BUILTIN(syncoutput,     false,  true,   2,      2  )    \
 
@@ -131,21 +101,190 @@
   { 0, 0 },
 };
 
-static bool regsub     (m4 *context, m4_obstack *obs, const char *caller,
-                        const char *victim, const char *regexp,
-                        m4_pattern_buffer *buf, const char *replace,
-                        bool ignore_duplicates);
-static void substitute (m4 *context, m4_obstack *obs, const char *victim,
-                        const char *repl, m4_pattern_buffer *buf);
-
-static void m4_regexp_do       (m4 *context, m4_obstack *obs, int argc,
-                                m4_symbol_value **argv, int syntax);
-static void m4_patsubst_do     (m4 *context, m4_obstack *obs, int argc,
-                                m4_symbol_value **argv, int syntax);
-static void m4_renamesyms_do   (m4 *context, m4_obstack *obs, int argc,
-                                m4_symbol_value **argv, int syntax);
 
 
+/* The regs_allocated field in an re_pattern_buffer refers to the
+   state of the re_registers struct used in successive matches with
+   the same compiled pattern:  */
+typedef struct {
+  struct re_pattern_buffer pat;        /* compiled regular expression */
+  struct re_registers regs;    /* match registers */
+} m4_pattern_buffer;
+
+
+/* Compile a REGEXP using the Regex SYNTAX bits return the buffer.
+   Report errors on behalf of CALLER.  */
+
+static m4_pattern_buffer *
+m4_regexp_compile (m4 *context, const char *caller,
+                  const char *regexp, int resyntax)
+{
+  static m4_pattern_buffer buf;        /* compiled regular expression */
+  static bool buf_initialized = false;
+  const char *msg;             /* error message from re_compile_pattern */
+
+  if (!buf_initialized)
+    {
+      buf_initialized  = true;
+      buf.pat.buffer   = NULL;
+      buf.pat.allocated        = 0;
+      buf.pat.fastmap  = NULL;
+      buf.pat.translate        = NULL;
+    }
+
+  re_set_syntax (resyntax);
+  msg = re_compile_pattern (regexp, strlen (regexp), &buf.pat);
+
+  if (msg != NULL)
+    {
+      M4ERROR ((m4_get_warning_status_opt (context), 0,
+               _("%s: bad regular expression `%s': %s"),
+               caller, regexp, msg));
+      return NULL;
+    }
+
+  return &buf;
+}
+
+static int
+m4_regexp_search (m4_pattern_buffer *buf, const char *string,
+                 const int size, const int start, const int range)
+{
+  return re_search (&(buf->pat), string, size, start, range, &(buf->regs));
+}
+
+
+/* Function to perform substitution by regular expressions.  Used by the
+   builtins regexp, patsubst and renamesyms.  The changed text is placed on
+   the obstack.  The substitution is REPL, with \& substituted by this part
+   of VICTIM matched by the last whole regular expression, taken from
+   REGS[0], and \N substituted by the text matched by the Nth parenthesized
+   sub-expression, taken from REGS[N].  */
+static int substitute_warned = 0;
+
+static void
+substitute (m4 *context, m4_obstack *obs, const char *victim,
+           const char *repl, m4_pattern_buffer *buf)
+{
+  register unsigned int ch;
+
+  for (;;)
+    {
+      while ((ch = *repl++) != '\\')
+       {
+         if (ch == '\0')
+           return;
+         obstack_1grow (obs, ch);
+       }
+
+      switch ((ch = *repl++))
+       {
+       case '0':
+         if (!substitute_warned)
+           {
+             M4ERROR ((m4_get_warning_status_opt (context), 0, _("\
+WARNING: \\0 will disappear, use \\& instead in replacements")));
+             substitute_warned = 1;
+           }
+         /* Fall through.  */
+
+       case '&':
+         obstack_grow (obs, victim + buf->regs.start[0],
+                       buf->regs.end[0] - buf->regs.start[0]);
+         break;
+
+       case '1': case '2': case '3': case '4': case '5': case '6':
+       case '7': case '8': case '9':
+         ch -= '0';
+         if (buf->regs.end[ch] > 0)
+           obstack_grow (obs, victim + buf->regs.start[ch],
+                         buf->regs.end[ch] - buf->regs.start[ch]);
+         break;
+
+       default:
+         obstack_1grow (obs, ch);
+         break;
+       }
+    }
+}
+
+
+static bool
+m4_regexp_substitute (m4 *context, m4_obstack *obs, const char *caller,
+                     const char *victim, const char *regexp,
+                     m4_pattern_buffer *buf, const char *replace,
+                     bool ignore_duplicates)
+{
+  int matchpos = 0;            /* start position of match */
+  int offset   = 0;            /* current match offset */
+  int length   = strlen (victim);
+
+  while (offset < length)
+    {
+      matchpos = m4_regexp_search (buf, victim, length,
+                                  offset, length - offset);
+
+      if (matchpos < 0)
+       {
+
+         /* Match failed -- either error or there is no match in the
+            rest of the string, in which case the rest of the string is
+            copied verbatim.  */
+
+         if (matchpos == -2)
+           M4ERROR ((m4_get_warning_status_opt (context), 0,
+                     _("%s: error matching regular expression `%s'"),
+                     caller, regexp));
+         else if (!ignore_duplicates && (offset < length))
+           obstack_grow (obs, victim + offset, length - offset);
+         break;
+       }
+
+      /* Copy the part of the string that was skipped by re_search ().  */
+
+      if (matchpos > offset)
+       obstack_grow (obs, victim + offset, matchpos - offset);
+
+      /* Handle the part of the string that was covered by the match.  */
+
+      substitute (context, obs, victim, replace, buf);
+
+      /* Update the offset to the end of the match.  If the regexp
+        matched a null string, advance offset one more, to avoid
+        infinite loops.  */
+
+      offset = buf->regs.end[0];
+      if (buf->regs.start[0] == buf->regs.end[0])
+       obstack_1grow (obs, victim[offset++]);
+    }
+
+  if (!ignore_duplicates || (matchpos >= 0))
+    obstack_1grow (obs, '\0');
+
+  return (matchpos >= 0);
+}
+
+
+
+
+/**
+ * __file__
+ **/
+M4BUILTIN_HANDLER (__file__)
+{
+  m4_shipout_string (context, obs, m4_current_file, 0, true);
+}
+
+
+/**
+ * __line__
+ **/
+M4BUILTIN_HANDLER (__line__)
+{
+  m4_shipout_int (obs, m4_current_line);
+}
+
+
 /* The builtin "builtin" allows calls to builtin macros, even if their
    definition has been overridden or shadowed.  It is thus possible to
    redefine builtins, and still access their original definition.  */
@@ -170,8 +309,7 @@
 
 /* The builtin "indir" allows indirect calls to macros, even if their name
    is not a proper macro name.  It is thus possible to define macros with
-   ill-formed names for internal use in larger macro packages.  This macro
-   is not available in compatibility mode.  */
+   ill-formed names for internal use in larger macro packages.  */
 
 /**
  * indir(MACRO, [...])
@@ -188,9 +326,45 @@
     m4_macro_call (context, symbol, obs, argc - 1, argv + 1);
 }
 
-/* Change the current input syntax.  The function set_syntax () lives
-   in input.c.  For compability reasons, this function is not called,
-   if not followed by a` SYNTAX_OPEN.  Also, any changes to comment
+
+/* Change the current regexp syntax.  Currently this affects the
+   builtins: `patsubst', `regexp' and `renamesyms'.  */
+
+static int
+m4_resyntax_encode_safe (m4 *context, const char *caller, const char *spec)
+{
+  int resyntax = -1;
+
+  if (spec)
+    {
+      resyntax = m4_regexp_syntax_encode (spec);
+
+      if (resyntax < 0)
+       {
+         M4ERROR ((m4_get_warning_status_opt (context), 0,
+                   _("%s: bad syntax-spec: `%s'"),
+                   caller, spec));
+       }
+    }
+
+  return resyntax;
+}
+
+/**
+ * changeresyntax([RESYNTAX-SPEC])
+ **/
+M4BUILTIN_HANDLER (changeresyntax)
+{
+  int resyntax = m4_resyntax_encode_safe (context, M4ARG (0), M4ARG (1));
+
+  if (resyntax >= 0)
+    m4_set_regexp_syntax_opt (context, resyntax);
+}
+
+
+/* Change the current input syntax.  The function m4_set_syntax () lives
+   in syntax.c.  For compability reasons, this function is not called,
+   if not followed by a SYNTAX_OPEN.  Also, any changes to comment
    delimiters and quotes made here will be overridden by a call to
    `changecom' or `changequote'.  */
 
@@ -212,12 +386,16 @@
              && (key != '\0'))
            {
              M4ERROR ((m4_get_warning_status_opt (context), 0,
-                       _("Undefined syntax code %c"), key));
+                       _("%s: undefined syntax code: `%c'"),
+                       M4ARG (0), key));
            }
        }
     }
+  else
+    assert (!"Unable to import from m4 module");
 }
 
+
 /* On-the-fly control of the format of the tracing output.  It takes one
    argument, which is a character string like given to the -d option, or
    none in which case the debug_level is zeroed.  */
@@ -248,7 +426,7 @@
 
       if (new_debug_level < 0)
        M4ERROR ((m4_get_warning_status_opt (context), 0,
-                 _("Debugmode: bad debug flags: `%s'"), M4ARG (1)));
+                 _("%s: bad debug flags: `%s'"), M4ARG(0), M4ARG (1)));
       else
        {
          switch (change_flag)
@@ -269,6 +447,7 @@
     }
 }
 
+
 /* Specify the destination of the debugging output.  With one argument, the
    argument is taken as a file name, with no arguments, revert to stderr.  */
 
@@ -281,49 +460,90 @@
     m4_debug_set_output (context, NULL);
   else if (!m4_debug_set_output (context, M4ARG (1)))
     M4ERROR ((m4_get_warning_status_opt (context), errno,
-             _("Cannot set error file: %s"), M4ARG (1)));
+             _("%s: cannot set error file `%s'"), M4ARG (0), M4ARG (1)));
 }
 
 
-/* Compile a REGEXP using the Regex SYNTAX bits return the buffer.
-   Report errors on behalf of CALLER.  */
+/**
+ * esyscmd(SHELL-COMMAND)
+ **/
 
-static m4_pattern_buffer *
-m4_regexp_compile (m4 *context, const char *caller,
-                  const char *regexp, int syntax)
+M4BUILTIN_HANDLER (esyscmd)
 {
-  static m4_pattern_buffer buf;        /* compiled regular expression */
-  static bool buf_initialized = false;
-  const char *msg;             /* error message from re_compile_pattern */
+  M4_MODULE_IMPORT (m4, m4_set_sysval);
+  M4_MODULE_IMPORT (m4, m4_sysval_flush);
 
-  if (!buf_initialized)
+  if (m4_set_sysval && m4_sysval_flush)
     {
-      buf_initialized  = true;
-      buf.pat.buffer   = NULL;
-      buf.pat.allocated        = 0;
-      buf.pat.fastmap  = NULL;
-      buf.pat.translate        = NULL;
+      FILE *pin;
+      int ch;
+
+      m4_sysval_flush (context);
+      errno = 0;
+      pin = popen (M4ARG (1), "r");
+      if (pin == NULL)
+       {
+         M4ERROR ((m4_get_warning_status_opt (context), errno,
+                   _("%s: cannot open pipe to command `%s'"),
+                   M4ARG (0), M4ARG (1)));
+         m4_set_sysval (0xffff);
+       }
+      else
+       {
+         while ((ch = getc (pin)) != EOF)
+           obstack_1grow (obs, (char) ch);
+         m4_set_sysval (pclose (pin));
+       }
     }
+  else
+    assert (!"Unable to import from m4 module");
+}
 
-  re_set_syntax (syntax);
-  msg = re_compile_pattern (regexp, strlen (regexp), &buf.pat);
 
-  if (msg != NULL)
-    {
-      M4ERROR ((m4_get_warning_status_opt (context), 0,
-               _("%s: bad regular expression `%s': %s"),
-               caller, regexp, msg));
-      return NULL;
-    }
+/* Frontend for printf like formatting.  The function format () lives in
+   the file format.c.  */
 
-  return &buf;
+#include "format.c"
+
+/**
+ * format(FORMAT-STRING, [...])
+ **/
+M4BUILTIN_HANDLER (format)
+{
+  format (obs, argc - 1, argv + 1);
 }
 
-static int
-m4_regexp_search (m4_pattern_buffer *buf, const char *string,
-                 const int size, const int start, const int range)
+
+/* Substitute all matches of a regexp occuring in a string.  Each match of
+   the second argument (a regexp) in the first argument is changed to the
+   third argument, with \& substituted by the matched text, and \N
+   substituted by the text matched by the Nth parenthesized sub-expression.  */
+
+/**
+ * patsubst(VICTIM, REGEXP, [REPLACEMENT], [RESYNTAX])
+ **/
+M4BUILTIN_HANDLER (patsubst)
 {
-  return re_search (&(buf->pat), string, size, start, range, &(buf->regs));
+  const char *me;              /* name of this macro */
+  m4_pattern_buffer *buf;      /* compiled regular expression */
+  int resyntax;
+
+  me = M4ARG (0);
+
+  resyntax = m4_get_regexp_syntax_opt (context);
+  if (argc == 5)
+    {
+      resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+      if (resyntax < 0)
+       return;
+    }
+
+  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
+  if (!buf)
+    return;
+
+  m4_regexp_substitute (context, obs, me, M4ARG (1), M4ARG (2), buf,
+                       M4ARG (3), false);
 }
 
 
@@ -333,167 +553,117 @@
    the expansion to this argument.  */
 
 /**
- * regexp(VICTIM, REGEXP, [REPLACEMENT])
- * eregexp(VICTIM, REGEXP, [REPLACEMENT])
+ * regexp(VICTIM, REGEXP, [REPLACEMENT], [RESYNTAX])
  **/
-
-static void
-m4_regexp_do (m4 *context, m4_obstack *obs, int argc,
-             m4_symbol_value **argv, int syntax)
+M4BUILTIN_HANDLER (regexp)
 {
-  const char *caller;          /* calling macro name */
-  const char *victim;          /* first argument */
-  const char *regexp;          /* regular expression */
-
+  const char *me;              /* name of this macro */
   m4_pattern_buffer *buf;      /* compiled regular expression */
   int startpos;                        /* start position of match */
   int length;                  /* length of first argument */
+  int resyntax;
 
-  caller = M4ARG (0);
-  victim = M4ARG (1);
-  regexp = M4ARG (2);
+  me = M4ARG (0);
 
-  buf = m4_regexp_compile (context, caller, regexp, syntax);
+  resyntax = m4_get_regexp_syntax_opt (context);
+  if (argc == 5)
+    {
+      resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+      if (resyntax < 0)
+       return;
+    }
+
+  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
   if (!buf)
     return;
 
-  length = strlen (victim);
-  startpos = m4_regexp_search (buf, victim, length, 0, length);
+  length = strlen (M4ARG (1));
+  startpos = m4_regexp_search (buf, M4ARG (1), length, 0, length);
 
   if (startpos  == -2)
     {
       M4ERROR ((m4_get_warning_status_opt (context), 0,
                _("%s: error matching regular expression `%s'"),
-               caller, regexp));
+               me, M4ARG (2)));
       return;
     }
 
   if (argc == 3)
     m4_shipout_int (obs, startpos);
   else if (startpos >= 0)
-    substitute (context, obs, victim, M4ARG (3), buf);
+    substitute (context, obs, M4ARG (1), M4ARG (3), buf);
 
   return;
 }
 
 
-/**
- * regexp(VICTIM, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (regexp)
-{
-  m4_regexp_do (context, obs, argc, argv, RE_SYNTAX_BRE);
-}
+/* Rename all current symbols that match REGEXP according to the
+   REPLACEMENT specification.  */
 
 /**
- * eregexp(VICTIM, REGEXP, [REPLACEMENT])
+ * renamesyms(REGEXP, REPLACEMENT, [RESYNTAX])
  **/
-M4BUILTIN_HANDLER (eregexp)
+M4BUILTIN_HANDLER (renamesyms)
 {
-  m4_regexp_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
+  M4_MODULE_IMPORT (m4, m4_dump_symbols);
 
+  if (m4_dump_symbols)
+    {
+      const char *me;          /* name of this macro */
+      const char *regexp;      /* regular expression string */
+      const char *replace;     /* replacement expression string */
 
+      m4_pattern_buffer *buf;  /* compiled regular expression */
 
-/* Substitute all matches of a regexp occuring in a string.  Each match of
-   the second argument (a regexp) in the first argument is changed to the
-   third argument, with \& substituted by the matched text, and \N
-   substituted by the text matched by the Nth parenthesized sub-expression.  */
+      m4_dump_symbol_data      data;
+      m4_obstack               data_obs;
+      m4_obstack               rename_obs;
 
-/**
- * patsubst(VICTIM, REGEXP, [REPLACEMENT])
- * epatsubst(VICTIM, REGEXP, [REPLACEMENT])
- **/
-static void
-m4_patsubst_do (m4 *context, m4_obstack *obs, int argc,
-               m4_symbol_value **argv, int syntax)
-{
-  const char *caller;          /* calling macro name */
-  const char *victim;          /* first argument */
-  const char *regexp;          /* regular expression */
-  m4_pattern_buffer *buf;      /* compiled regular expression */
+      int resyntax;
 
-  caller = M4ARG (0);
-  victim = M4ARG (1);
-  regexp = M4ARG (2);
+      me      = M4ARG (0);
+      regexp  = M4ARG (1);
+      replace = M4ARG (2);
 
-  buf = m4_regexp_compile (context, caller, regexp, syntax);
-  if (!buf)
-    return;
+      resyntax = m4_get_regexp_syntax_opt (context);
+      if (argc == 4)
+       {
+         resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3));
+         if (resyntax < 0)
+           return;
+       }
 
-  regsub (context, obs, caller, victim, regexp, buf, M4ARG (3), false);
-}
+      buf = m4_regexp_compile (context, me, regexp, resyntax);
+      if (!buf)
+       return;
 
-static bool
-regsub (m4 *context, m4_obstack *obs, const char *caller,
-       const char *victim, const char *regexp, m4_pattern_buffer *buf,
-       const char *replace, bool ignore_duplicates)
-{
-  int matchpos = 0;            /* start position of match */
-  int offset   = 0;            /* current match offset */
-  int length   = strlen (victim);
+      obstack_init (&rename_obs);
+      obstack_init (&data_obs);
+      data.obs = &data_obs;
 
-  while (offset < length)
-    {
-      matchpos = m4_regexp_search (buf, victim, length,
-                                  offset, length - offset);
+      m4_dump_symbols (context, &data, 1, argv, false);
 
-      if (matchpos < 0)
+      for (; data.size > 0; --data.size, data.base++)
        {
+         const char *  name    = data.base[0];
+         int           length  = strlen (name);
 
-         /* Match failed -- either error or there is no match in the
-            rest of the string, in which case the rest of the string is
-            copied verbatim.  */
+         if (m4_regexp_substitute (context, &rename_obs, me, name, regexp,
+                                   buf, replace, true))
+           {
+             const char *renamed = obstack_finish (&rename_obs);
 
-         if (matchpos == -2)
-           M4ERROR ((m4_get_warning_status_opt (context), 0,
-                     _("%s: error matching regular expression `%s'"),
-                     caller, regexp));
-         else if (!ignore_duplicates && (offset < length))
-           obstack_grow (obs, victim + offset, length - offset);
-         break;
+             m4_symbol_rename (M4SYMTAB, name, renamed);
+           }
        }
 
-      /* Copy the part of the string that was skipped by re_search ().  */
-
-      if (matchpos > offset)
-       obstack_grow (obs, victim + offset, matchpos - offset);
-
-      /* Handle the part of the string that was covered by the match.  */
-
-      substitute (context, obs, victim, replace, buf);
-
-      /* Update the offset to the end of the match.  If the regexp
-        matched a null string, advance offset one more, to avoid
-        infinite loops.  */
-
-      offset = buf->regs.end[0];
-      if (buf->regs.start[0] == buf->regs.end[0])
-       obstack_1grow (obs, victim[offset++]);
+      obstack_free (&data_obs, NULL);
+      obstack_free (&rename_obs, NULL);
     }
-
-  if (!ignore_duplicates || (matchpos >= 0))
-    obstack_1grow (obs, '\0');
-
-  return (matchpos >= 0);
-}
-
-
-/**
- * patsubst(STRING, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (patsubst)
-{
-  m4_patsubst_do (context, obs, argc, argv, RE_SYNTAX_BRE);
+  else
+    assert (!"Unable to import from m4 module");
 }
 
-/**
- * epatsubst(STRING, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (epatsubst)
-{
-  m4_patsubst_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
 
 /* Implementation of "symbols".  It builds up a table of pointers to
    symbols, sorts it and ships out the symbol names.  */
@@ -526,7 +696,7 @@
     assert (!"Unable to import from m4 module");
 }
 
-
+
 /* This contains macro which implements syncoutput() which takes one arg
      1, on, yes - turn on sync lines
      0, off, no - turn off sync lines
@@ -549,196 +719,3 @@
        m4_set_sync_output_opt (context, true);
     }
 }
-
-
-/**
- * esyscmd(SHELL-COMMAND)
- **/
-
-M4BUILTIN_HANDLER (esyscmd)
-{
-  M4_MODULE_IMPORT (m4, m4_set_sysval);
-  M4_MODULE_IMPORT (m4, m4_sysval_flush);
-
-  if (m4_set_sysval && m4_sysval_flush)
-    {
-      FILE *pin;
-      int ch;
-
-      m4_sysval_flush (context);
-      errno = 0;
-      pin = popen (M4ARG (1), "r");
-      if (pin == NULL)
-       {
-         M4ERROR ((m4_get_warning_status_opt (context), errno,
-                   _("Cannot open pipe to command `%s'"), M4ARG (1)));
-         m4_set_sysval (0xffff);
-       }
-      else
-       {
-         while ((ch = getc (pin)) != EOF)
-           obstack_1grow (obs, (char) ch);
-         m4_set_sysval (pclose (pin));
-       }
-    }
-}
-
-
-
-/* Rename all current symbols that match REGEXP according to the
-   REPLACEMENT specification.  */
-
-/**
- * renamesyms(REGEXP, REPLACEMENT)
- * erenamesyms(REGEXP, REPLACEMENT)
- **/
-static void
-m4_renamesyms_do (m4 *context, m4_obstack *obs, int argc,
-                 m4_symbol_value **argv, int syntax)
-{
-  const char *caller;          /* calling macro name */
-  const char *regexp;          /* regular expression string */
-  const char *replace;         /* replacement expression string */
-
-  m4_pattern_buffer *buf;      /* compiled regular expression */
-
-  m4_dump_symbol_data  data;
-  m4_obstack           data_obs;
-  m4_obstack           rename_obs;
-
-  M4_MODULE_IMPORT (m4, m4_dump_symbols);
-
-  assert (m4_dump_symbols);
-
-  caller  = M4ARG (0);
-  regexp  = M4ARG (1);
-  replace = M4ARG (2);
-
-  buf = m4_regexp_compile (context, caller, regexp, syntax);
-  if (!buf)
-    return;
-
-  obstack_init (&rename_obs);
-  obstack_init (&data_obs);
-  data.obs = &data_obs;
-
-  m4_dump_symbols (context, &data, 1, argv, false);
-
-  for (; data.size > 0; --data.size, data.base++)
-    {
-      const char *     name    = data.base[0];
-      int              length  = strlen (name);
-
-      if (regsub (context, &rename_obs, caller, name, regexp, buf,
-                 replace, true))
-       {
-         const char *renamed = obstack_finish (&rename_obs);
-
-         m4_symbol_rename (M4SYMTAB, name, renamed);
-       }
-    }
-
-  obstack_free (&data_obs, NULL);
-  obstack_free (&rename_obs, NULL);
-}
-
-/**
- * renamesyms(REGEXP, REPLACEMENT)
- **/
-M4BUILTIN_HANDLER (renamesyms)
-{
-  m4_renamesyms_do (context, obs, argc, argv, RE_SYNTAX_BRE);
-}
-
-/**
- * erenamesyms(REGEXP, REPLACEMENT)
- **/
-M4BUILTIN_HANDLER (erenamesyms)
-{
-  m4_renamesyms_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
-
-
-
-/* Frontend for printf like formatting.  The function format () lives in
-   the file format.c.  */
-
-/**
- * format(FORMAT-STRING, [...])
- **/
-M4BUILTIN_HANDLER (format)
-{
-  format (obs, argc - 1, argv + 1);
-}
-
-
-/**
- * __file__
- **/
-M4BUILTIN_HANDLER (__file__)
-{
-  m4_shipout_string (context, obs, m4_current_file, 0, true);
-}
-
-
-/**
- * __line__
- **/
-M4BUILTIN_HANDLER (__line__)
-{
-  m4_shipout_int (obs, m4_current_line);
-}
-
-/* Function to perform substitution by regular expressions.  Used by the
-   builtins regexp, patsubst and renamesyms.  The changed text is placed on
-   the obstack.  The substitution is REPL, with \& substituted by this part
-   of VICTIM matched by the last whole regular expression, taken from
-   REGS[0], and \N substituted by the text matched by the Nth parenthesized
-   sub-expression, taken from REGS[N].  */
-static int substitute_warned = 0;
-
-static void
-substitute (m4 *context, m4_obstack *obs, const char *victim,
-           const char *repl, m4_pattern_buffer *buf)
-{
-  register unsigned int ch;
-
-  for (;;)
-    {
-      while ((ch = *repl++) != '\\')
-       {
-         if (ch == '\0')
-           return;
-         obstack_1grow (obs, ch);
-       }
-
-      switch ((ch = *repl++))
-       {
-       case '0':
-         if (!substitute_warned)
-           {
-             M4ERROR ((m4_get_warning_status_opt (context), 0, _("\
-WARNING: \\0 will disappear, use \\& instead in replacements")));
-             substitute_warned = 1;
-           }
-         /* Fall through.  */
-
-       case '&':
-         obstack_grow (obs, victim + buf->regs.start[0],
-                       buf->regs.end[0] - buf->regs.start[0]);
-         break;
-
-       case '1': case '2': case '3': case '4': case '5': case '6':
-       case '7': case '8': case '9':
-         ch -= '0';
-         if (buf->regs.end[ch] > 0)
-           obstack_grow (obs, victim + buf->regs.start[ch],
-                         buf->regs.end[ch] - buf->regs.start[ch]);
-         break;
-
-       default:
-         obstack_1grow (obs, ch);
-         break;
-       }
-    }
-}
Index: m4--devo--0/src/main.c
===================================================================
--- m4--devo--0.orig/src/main.c 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/src/main.c      2006-07-07 12:14:52.000000000 +0100
@@ -103,7 +103,14 @@
   -e, --interactive            unbuffer output, ignore interrupts\n\
   -E, --fatal-warnings         stop execution after first warning\n\
   -Q, --quiet, --silent        suppress some warnings for builtins\n\
-  -P, --prefix-builtins        force a `m4_' prefix to all builtins\n"),
+  -P, --prefix-builtins        force a `m4_' prefix to all builtins\n\
+  -r, --regexp-syntax=[SPEC]   change the default regexp syntax\n"),
+            stdout);
+      fputs (_("\
+\n\
+SPEC is any one of:\n\
+  AWK, BASIC, BSD_M4, ED, EMACS, EXTENDED, GNU_AWK, GNU_EGREP, GNU_M4,\n\
+  GREP, POSIX_AWK, POSIX_EGREP, MINIMAL, MINIMAL_BASIC, SED.\n"),
             stdout);
       printf (_("\
 \n\
@@ -183,6 +190,7 @@
   {"nesting-limit", required_argument, NULL, 'L'},
   {"prefix-builtins", no_argument, NULL, 'P'},
   {"quiet", no_argument, NULL, 'Q'},
+  {"regexp-syntax", required_argument, NULL, 'r'},
   {"reload-state", required_argument, NULL, 'R'},
   {"silent", no_argument, NULL, 'Q'},
   {"synclines", no_argument, NULL, 's'},
@@ -202,7 +210,7 @@
   { 0, 0, 0, 0 },
 };
 
-#define OPTSTRING "B:D:EF:GH:I:L:M:N:PQR:S:T:U:bcd::el:m:o:st:"
+#define OPTSTRING "B:D:EF:GH:I:L:M:N:PQR:S:T:U:bcd::el:m:o:r:st:"
 
 int
 main (int argc, char *const *argv, char *const *envp)
@@ -271,6 +279,7 @@
       case 'U':
       case 't':
       case 'm':
+      case 'r':
        /* Arguments that cannot be handled until later are accumulated.  */
 
        new = xmalloc (sizeof *new);
@@ -432,6 +441,7 @@
       {
        macro_definition *next;
        char *macro_value;
+       char *optarg = defines->macro;
 
        switch (defines->code)
          {
@@ -439,27 +449,38 @@
            {
              m4_symbol_value *value = m4_symbol_value_create ();
 
-             macro_value = strchr (defines->macro, '=');
+             macro_value = strchr (optarg, '=');
              if (macro_value == NULL)
                macro_value = "";
              else
                *macro_value++ = '\0';
              m4_set_symbol_value_text (value, xstrdup (macro_value));
 
-             m4_symbol_pushdef (M4SYMTAB, defines->macro, value);
+             m4_symbol_pushdef (M4SYMTAB, optarg, value);
            }
            break;
 
          case 'U':
-           m4_symbol_delete (M4SYMTAB, defines->macro);
+           m4_symbol_delete (M4SYMTAB, optarg);
            break;
 
          case 't':
-           m4_set_symbol_name_traced (M4SYMTAB, defines->macro);
+           m4_set_symbol_name_traced (M4SYMTAB, optarg);
            break;
 
          case 'm':
-           m4_module_load (context, defines->macro, 0);
+           m4_module_load (context, optarg, 0);
+           break;
+
+         case 'r':
+           m4_set_regexp_syntax_opt (context,
+                                     m4_regexp_syntax_encode (optarg));
+           if (m4_get_regexp_syntax_opt (context) < 0)
+             {
+               M4ERROR ((m4_get_warning_status_opt (context), 0,
+                         _("Bad regexp syntax option: `%s'"), optarg));
+               abort ();
+             }
            break;
 
          default:
Index: m4--devo--0/m4/m4module.h
===================================================================
--- m4--devo--0.orig/m4/m4module.h      2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/m4/m4module.h   2006-07-07 12:14:52.000000000 +0100
@@ -1,7 +1,7 @@
 /* GNU m4 -- A simple macro processor
 
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1999, 2000, 2003,
-   2004, 2005 Free Software Foundation, Inc.
+   2004, 2005, 2006 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -113,11 +113,12 @@
        M4FIELD(m4_syntax_table *, syntax_table,   syntax)              \
        M4FIELD(FILE *,            debug_file,     debug_file)          \
        M4FIELD(m4_obstack,        trace_messages, trace_messages)      \
-       M4FIELD(int,     warning_status_opt,       warning_status)      \
-       M4FIELD(bool, no_gnu_extensions_opt,    no_gnu_extensions)      \
-       M4FIELD(int,     nesting_limit_opt,        nesting_limit)       \
-       M4FIELD(int,     debug_level_opt,          debug_level)         \
-       M4FIELD(int,     max_debug_arg_length_opt, max_debug_arg_length)\
+       M4FIELD(int,    warning_status_opt,        warning_status)      \
+       M4FIELD(bool,   no_gnu_extensions_opt,     no_gnu_extensions)   \
+       M4FIELD(int,    nesting_limit_opt,         nesting_limit)       \
+       M4FIELD(int,    debug_level_opt,           debug_level)         \
+       M4FIELD(int,    max_debug_arg_length_opt,  max_debug_arg_length)\
+       M4FIELD(int,    regexp_syntax_opt,         regexp_syntax)       \
 
 
 #define m4_context_opt_bit_table                                       \
@@ -274,6 +275,13 @@
 
 
 
+/* --- REGEXP SYNTAX --- */
+
+extern const char *    m4_regexp_syntax_decode (int);
+extern int             m4_regexp_syntax_encode (const char *);
+
+
+
 /* --- SYNTAX TABLE DEFINITIONS --- */
 
 extern m4_syntax_table *m4_syntax_create       (void);
Index: m4--devo--0/m4/m4private.h
===================================================================
--- m4--devo--0.orig/m4/m4private.h     2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/m4/m4private.h  2006-07-07 12:14:52.000000000 +0100
@@ -60,10 +60,11 @@
 
   /* Option flags  (set in src/main.c).  */
   int          warning_status;                 /* -E */
-  bool no_gnu_extensions;              /* -G */
+  bool         no_gnu_extensions;              /* -G */
   int          nesting_limit;                  /* -L */
   int          debug_level;                    /* -d */
   int          max_debug_arg_length;           /* -l */
+  int          regexp_syntax;                  /* -r */
   int          opt_flags;
 
   /* __PRIVATE__: */
@@ -98,6 +99,8 @@
 #  define m4_set_debug_level_opt(C, V)         ((C)->debug_level = (V))
 #  define m4_get_max_debug_arg_length_opt(C)   ((C)->max_debug_arg_length)
 #  define m4_set_max_debug_arg_length_opt(C, V)        
((C)->max_debug_arg_length=(V))
+#  define m4_get_regexp_syntax_opt(C)          ((C)->regexp_syntax)
+#  define m4_set_regexp_syntax_opt(C, V)       ((C)->regexp_syntax = (V))
 
 #  define m4_get_prefix_builtins_opt(C)                                        
\
                (BIT_TEST((C)->opt_flags, M4_OPT_PREFIX_BUILTINS_BIT))
Index: m4--devo--0/m4/resyntax.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ m4--devo--0/m4/resyntax.c   2006-07-07 12:14:52.000000000 +0100
@@ -0,0 +1,117 @@
+/* GNU m4 -- A simple macro processor
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301  USA
+*/
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <regex.h>
+#include <string.h>
+
+#include "m4private.h"
+#include "strcase.h"
+
+typedef struct {
+  const char   *spec;
+  const int    code;
+} m4_resyntax;
+
+/* The syntaxes named in this table are saved into frozen files.  Changing
+   the mappings will break programs that load a frozen file made before
+   such a change...  */
+
+m4_resyntax m4_resyntax_map[] =
+{
+  { "AWK",                     RE_SYNTAX_AWK },
+  { "BASIC",                   RE_SYNTAX_POSIX_BASIC },
+  { "BSD_M4",                  RE_SYNTAX_POSIX_EXTENDED },
+  { "ED",                      RE_SYNTAX_ED },
+  { "EGREP",                   RE_SYNTAX_EGREP },
+  { "EMACS",                   RE_SYNTAX_EMACS },
+  { "EXTENDED",                        RE_SYNTAX_POSIX_EXTENDED },
+  { "GAWK",                    RE_SYNTAX_GNU_AWK },
+  { "GNU_AWK",                 RE_SYNTAX_GNU_AWK },
+  { "GNU_EGREP",               RE_SYNTAX_EGREP },
+  { "GNU_EMACS",               RE_SYNTAX_EMACS },
+  { "GNU_M4",                  RE_SYNTAX_EMACS },
+  { "GREP",                    RE_SYNTAX_GREP },
+  { "MINIMAL",                 RE_SYNTAX_POSIX_MINIMAL_BASIC },
+  { "MINIMAL_BASIC",           RE_SYNTAX_POSIX_MINIMAL_BASIC },
+  { "POSIX_AWK",               RE_SYNTAX_POSIX_AWK },
+  { "POSIX_BASIC",             RE_SYNTAX_POSIX_BASIC },
+  { "POSIX_EGREP",             RE_SYNTAX_POSIX_EGREP },
+  { "POSIX_EXTENDED",          RE_SYNTAX_POSIX_EXTENDED },
+  { "POSIX_MINIMAL",           RE_SYNTAX_POSIX_MINIMAL_BASIC },
+  { "POSIX_MINIMAL_BASIC",     RE_SYNTAX_POSIX_MINIMAL_BASIC },
+  { "SED",                     RE_SYNTAX_SED },
+
+  { NULL,                      -1 }
+};
+
+
+/* Return the internal code representing the syntax SPEC, or -1 if
+   SPEC is invalid.  The `m4_syntax_map' table is searched case
+   insensitively, after replacing any spaces or dashes in SPEC with
+   underscore characters.  Possible matches for the "GNU_M4" element
+   then, are "gnu m4", "GNU-m4" or "Gnu_M4".  */
+int
+m4_regexp_syntax_encode (const char *spec)
+{
+  const m4_resyntax *resyntax;
+  char *canonical;
+  char *p;
+
+  assert (spec);
+
+  canonical = strdup (spec);
+
+  /* Canonicalise SPEC.  */
+  for (p = canonical; *p != '\0'; ++p)
+    {
+      if ((*p == ' ') || (*p == '-'))
+       *p = '_';
+    }
+
+  for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax)
+    {
+      if (!strcasecmp (resyntax->spec, spec))
+       break;
+    }
+
+  free (canonical);
+
+  return resyntax->code;
+}
+
+
+/* Return the syntax specifier that matches CODE, or NULL if there is
+   no match.  */
+const char *
+m4_regexp_syntax_decode (int code)
+{
+  const m4_resyntax *resyntax;
+
+  for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax)
+    {
+      if (resyntax->code == code)
+       break;
+    }
+
+  return resyntax->spec;
+}
Index: m4--devo--0/Makefile.am
===================================================================
--- m4--devo--0.orig/Makefile.am        2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/Makefile.am     2006-07-07 12:14:52.000000000 +0100
@@ -228,6 +228,7 @@
                  m4/output.c \
                  m4/path.c \
                  m4/pathconf.h \
+                 m4/resyntax.c \
                  m4/symtab.c \
                  m4/syntax.c \
                  m4/utility.c
Index: m4--devo--0/doc/m4.texinfo
===================================================================
--- m4--devo--0.orig/doc/m4.texinfo     2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/doc/m4.texinfo  2006-07-07 12:22:41.000000000 +0100
@@ -140,6 +140,7 @@
 * Other tokens::                Other kinds of input tokens
 * Comments::                    Comments in m4 input
 * Input processing::            How m4 copies input to output
+* Regular expression syntax::  How m4 interprets regular expressions
 
 How to invoke macros
 
@@ -157,7 +158,7 @@
 * Undefine::                    Deleting a macro
 * Defn::                        Renaming macros
 * Pushdef::                     Temporarily redefining macros
-* Erenamesyms and Renamesyms::  Renaming macros with regular expressions
+* Renamesyms::                  Renaming macros with regular expressions
 
 * Indir::                       Indirect call of macros
 * Builtin::                     Indirect call of builtins
@@ -182,6 +183,7 @@
 * Dnl::                         Deleting whitespace in input
 * Changequote::                 Changing the quote characters
 * Changecom::                   Changing the comment delimiters
+* Changeresyntax::              Changing the regular expression syntax
 * Changesyntax::                Changing the lexical structure of the input
 * M4wrap::                      Saving input until end of input
 
@@ -208,10 +210,10 @@
 
 * Len::                         Calculating length of strings
 * Index::                       Searching for substrings
-* Eregexp and Regexp::          Searching for regular expressions
+* Regexp::                      Searching for regular expressions
 * Substr::                      Extracting substrings
 * Translit::                    Translating characters
-* Epatsubst and Patsubst::      Substituting text by regular expression
+* Patsubst::                    Substituting text by regular expression
 * Format::                      Formatting strings (printf-like)
 
 Macros for doing arithmetic
@@ -392,6 +394,13 @@
 @samp{m4_define} instead of @samp{define}, and @samp{m4___file__}
 instead of @samp{__file__}.
 
address@hidden -r @var{RESYNTAX-SPEC}
address@hidden address@hidden
+Set the regular expression syntax according to @var{RESYNTAX-SPEC}.
+When this option is not given, @sc{gnu} M4 uses emacs compatible
+regular expressions.  @xref{Changeresyntax}, for more details on the
+format and meaning of @var{RESYNTAX-SPEC}.
+
 @item -M @var{DIRECTORY}
 @itemx address@hidden
 Specify an alternate @var{DIRECTORY} to search for modules.  This option
@@ -603,7 +612,7 @@
 call of the macro will be shown, giving descriptive names to the
 arguments, e.g.,
 
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt 
@var{replacement})}
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt 
@var{replacement},} @w{opt @var{resyntax})}
 Description of @samp{regexp}.
 @end deffn
 
@@ -649,6 +658,7 @@
 * Other tokens::                Other kinds of input tokens
 * Comments::                    Comments in m4 input
 * Input processing::            How m4 copies input to output
+* Regular expression syntax::  How m4 interprets regular expressions
 @end menu
 
 @node Names
@@ -771,6 +781,11 @@
 This process continues until there are no more macro calls to expand and
 all the input has been consumed.
 
address@hidden Regular expression syntax
address@hidden Regular Expression Syntax
+
address@hidden regexprops-generic.texi
+
 @node Macros
 @chapter How to invoke macros
 
@@ -1024,7 +1039,7 @@
 * Undefine::                    Deleting a macro
 * Defn::                        Renaming macros
 * Pushdef::                     Temporarily redefining macros
-* Erenamesyms and Renamesyms::  Renaming macros with regular expressions
+* Renamesyms::                  Renaming macros with regular expressions
 
 * Indir::                       Indirect call of macros
 * Builtin::                     Indirect call of builtins
@@ -1408,18 +1423,23 @@
 It is possible to temporarily redefine a builtin with @code{pushdef}
 and @code{defn}.
 
address@hidden Erenamesyms and Renamesyms
address@hidden Renamesyms
 @section Renaming macros with regular expressions
 
 @cindex regular expressions
 @cindex macros, how to rename
 @cindex renaming macros
 @cindex GNU extensions
address@hidden {Builtin (gnu)} erenamesyms (@var{regexp}, @var{replacement})
-Global renaming of macros is done by @code{erenamesyms}, which selects
address@hidden {Builtin (gnu)} renamesyms (@var{regexp}, @var{replacement}, 
@w{opt @var{resyntax})}
+Global renaming of macros is done by @code{renamesyms}, which selects
 all macros with names that match @var{regexp}, and renames each match
 according to @var{replacement}.
 
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default.  @xref{Changeresyntax}, for details of the values
+that can be given for this argument.
+
 A macro that does not have a name that matches @var{regexp} is left
 with its original name.  If only part of the name matches, any part of
 the name that is not covered by @var{regexp} is copied to the
@@ -1435,37 +1455,35 @@
 @var{regexp}, and @samp{\&} being the text matched by the entire
 regular expression.
 
-The builtin macro @code{erenamesyms} is recognized only when given
+The builtin macro @code{renamesyms} is recognized only when given
 arguments.
 @end deffn
 
 Here is an example that performs the same renaming as the
 @option{--prefix-builtins} option.  Where @option{--prefix-builtins}
-only renames M4 builtin macros, @code{erenamesyms} will rename any
+only renames M4 builtin macros, @code{renamesyms} will rename any
 macros that match when it runs, including text macros.
 
 @example
-erenamesyms(`^.*$', `m4_\&')
+renamesyms(`^.*$', `m4_\&')
 @result{}
 @end example
 
-Here is a more realistic example that performs a similar renaming on
-macros with lowercase names, except that it ignores macros with names
-that begin with @samp{_}, and avoids creating macros with names that
-begin with @samp{m4_m4}.
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations.  Here is a more realistic
+example that performs a similar renaming on macros with lowercase
+names, except that it ignores macros with names that begin with
address@hidden, and avoids creating macros with names that begin with
address@hidden
 
 @example
-erenamesyms(`^[^_]\w*$', `m4_\&')
+renamesyms(`^[^_]\w*$', `m4_\&')
 @result{}
-m4_erenamesyms(`^m4_m4(\w*)$', `m4_\1')
+m4_renamesyms(`^m4_m4(\w*)$', `m4_\1', `POSIX_EXTENDED')
 @result{}
 @end example
 
address@hidden {Builtin (gnu)} renamesyms (@var{regexp}, @var{replacement})
-Same as @code{erenamesyms}, but using Basic Regular Expression syntax,
-see @xref{Eregexp and Regexp}, for more details.
address@hidden deffn
-
 
 @node Indir
 @section Indirect call of macros
@@ -1945,6 +1963,7 @@
 * Dnl::                         Deleting whitespace in input
 * Changequote::                 Changing the quote characters
 * Changecom::                   Changing the comment delimiters
+* Changeresyntax::              Changing the regular expression syntax
 * Changesyntax::                Changing the lexical structure of the input
 * M4wrap::                      Saving input until end of input
 @end menu
@@ -2088,6 +2107,79 @@
 @end example
 
 
address@hidden Changeresyntax
address@hidden Changing the regular expression syntax
+
address@hidden regular expression syntax, changing
address@hidden GNU extensions
address@hidden {Builtin (gnu)} changeresyntax (@w{opt @var{resyntax}})
+By default, the @sc{gnu} extensions @code{patsubst}, @code{regexp} and
+more recently @code{renamesyms} continue to use emacs style regular
+expression syntax (@pxref{Regular expression syntax}).
+
+The @code{changeresyntax} macro expands to nothing, but changes the
+default regular expression syntax used by M4 according to the value of
address@hidden, equivalent to passing @var{resyntax} as the argument to
address@hidden when invoking @code{m4}.  @xref{Invoking m4},
+for more details.
address@hidden deffn
+
+Any one of the values below, case is not important, and optionally
+with @kbd{-} or @kbd{ } substituted for @kbd{_} in the given names,
+will set the default regular expression syntax as described:
+
address@hidden @dfn
address@hidden AWK
address@hidden regular expression syntax}, for details.
+
address@hidden BASIC
address@hidden POSIX_BASIC
address@hidden regular expression syntax}, for details.
+
address@hidden BSD_M4
address@hidden regular expression syntax}, for details.
+
address@hidden ED
address@hidden regular expression syntax}, for details.
+
address@hidden EMACS
address@hidden GNU_EMACS
address@hidden regular expression syntax}, for details.
+
address@hidden EXTENDED
address@hidden POSIX_EXTENDED
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_AWK
address@hidden GAWK
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_EGREP
address@hidden EGREP
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_M4
address@hidden regular expression syntax}, for details.
+
address@hidden GREP
address@hidden regular expression syntax}, for details.
+
address@hidden MINIMAL
address@hidden POSIX_MINIMAL
address@hidden POSIX_MINIMAL_BASIC
address@hidden regular expression syntax}, for details.
+
address@hidden POSIX_AWK
address@hidden regular expression syntax}, for details.
+
address@hidden POSIX_EGREP
address@hidden regular expression syntax}, for details.
+
address@hidden SED
address@hidden regular expression syntax}, for details.
address@hidden table
+
+
 @node Changesyntax
 @section Changing the lexical structure of the input
 
@@ -2952,10 +3044,10 @@
 @menu
 * Len::                         Calculating length of strings
 * Index::                       Searching for substrings
-* Eregexp and Regexp::          Searching for regular expressions
+* Regexp::                      Searching for regular expressions
 * Substr::                      Extracting substrings
 * Translit::                    Translating characters
-* Epatsubst and Patsubst::      Substituting text by regular expression
+* Patsubst::                    Substituting text by regular expression
 * Format::                      Formatting strings (printf-like)
 @end menu
 
@@ -2998,12 +3090,12 @@
 @result{}-1
 @end example
 
address@hidden Eregexp and Regexp
address@hidden Regexp
 @section Searching for regular expressions
 
 @cindex regular expressions
 @cindex GNU extensions
address@hidden {Builtin (gnu)} eregexp (@var{string}, @var{regexp}, @w{opt 
@var{replacement})}
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt 
@var{replacement},} @w{opt @var{resyntax})}
 Searching for regular expressions is done with the builtin
 @code{regexp}, which searches for @var{regexp} in @var{string}.  The
 syntax of regular expressions is similar to that of Perl, @sc{gnu} Awk
@@ -3014,13 +3106,18 @@
 is specified and matches, then it expands into @var{replacement}. If
 @var{regexp} does not match anywhere in @var{string}, it expands to -1.
 
-The builtin macro @code{eregexp} is recognized only when given arguments.
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default.  @xref{Changeresyntax}, for details of the values
+that can be given for this argument.
+
+The builtin macro @code{regexp} is recognized only when given arguments.
 @end deffn
 
 @example
-eregexp(`GNUs not Unix', `\<[a-z]\w+')
+regexp(`GNUs not Unix', `\<[a-z]\w+')
 @result{}5
-eregexp(`GNUs not Unix', `\<Q\w*')
+regexp(`GNUs not Unix', `\<Q\w*')
 @result{}-1
 @end example
 
@@ -3030,27 +3127,21 @@
 @samp{\&} being the text the entire regular expression matched.
 
 @example
-eregexp(`GNUs not Unix', `\w(\w+)$', `*** \& *** \1 ***')
+regexp(`GNUs not Unix', `\w\(\w+\)$', `*** \& *** \1 ***')
 @result{}*** Unix *** nix ***
 @end example
 
-Originally, regular expressions were much less powerful (basically only
address@hidden was available), but to keep backward compatibility, new
-operators were implemented with previously invalid sequences, such as
address@hidden(}.  The following macro is exactly equivalent to @code{eregexp},
-but using the old, clumsy syntax.
-
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt 
@var{replacement})}
-Same as @code{eregexp}, but using the old and clumsy ``Basic Regular
-Expression'' syntax, the same as in @sc{gnu} Emacs.  @xref{Regexps, ,
-Syntax of Regular Expressions, emacs, The @sc{gnu} Emacs Manual}.
address@hidden deffn
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations:
 
 @example
-regexp(`GNUs not Unix', `\w\(\w+\)$', `*** \& *** \1 ***')
+regexp(`GNUs not Unix', `\w(\w+)$', `*** \& *** \1 ***', `POSIX_EXTENDED')
 @result{}*** Unix *** nix ***
 @end example
 
+
+
 @node Substr
 @section Extracting substrings
 
@@ -3114,18 +3205,22 @@
 while converting them to lowercase.  The two first cases are by far the
 most common.
 
address@hidden Epatsubst and Patsubst
address@hidden Patsubst
 @section Substituting text by regular expression
 
 @cindex regular expressions
 @cindex pattern substitution
 @cindex substitution by regular expression
 @cindex GNU extensions
address@hidden {Builtin (gnu)} epatsubst (@var{string}, @var{regexp}, @w{opt 
@var{replacement})}
-Global substitution in a string is done by @code{epatsubst}, which
address@hidden {Builtin (gnu)} patsubst (@var{string}, @var{regexp}, @w{opt 
@var{replacement},} @w{opt @var{resyntax})}
+Global substitution in a string is done by @code{patsubst}, which
 searches @var{string} for matches of @var{regexp}, and substitutes
address@hidden for each match.  It uses Extended Regular Expressions
-syntax.
address@hidden for each match.
+
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default.  @xref{Changeresyntax}, for details of the values
+that can be given for this parameter.
 
 The parts of @var{string} that are not covered by any match of
 @var{regexp} are copied to the expansion.  Whenever a match is found, the
@@ -3142,23 +3237,23 @@
 The @var{replacement} argument can be omitted, in which case the text
 matched by @var{regexp} is deleted.
 
-The builtin macro @code{epatsubst} is recognized only when given
+The builtin macro @code{patsubst} is recognized only when given
 arguments.
 @end deffn
 
-When used with two arguments, while @code{eregexp} returns the position
-of the match, @code{epatsusbt} deletes it:
+When used with two arguments, while @code{regexp} returns the position
+of the match, @code{patsubst} deletes it:
 
 @example
-epatsubst(`GNUs not Unix', `^', `OBS: ')
+patsubst(`GNUs not Unix', `^', `OBS: ')
 @result{}OBS: GNUs not Unix
-epatsubst(`GNUs not Unix', `\<', `OBS: ')
+patsubst(`GNUs not Unix', `\<', `OBS: ')
 @result{}OBS: GNUs OBS: not OBS: Unix
-epatsubst(`GNUs not Unix', `\w*', `(\&)')
+patsubst(`GNUs not Unix', `\w*', `(\&)')
 @result{}(GNUs)() (not)() (Unix)
-epatsubst(`GNUs not Unix', `\w+', `(\&)')
+patsubst(`GNUs not Unix', `\w+', `(\&)')
 @result{}(GNUs) (not) (Unix)
-epatsubst(`GNUs not Unix', `[A-Z][a-z]+')
+patsubst(`GNUs not Unix', `[A-Z][a-z]+')
 @result{}GN not @comment
 @end example
 
@@ -3170,63 +3265,43 @@
 define(`upcase',   `translit(`$*', `a-z', `A-Z')')dnl
 define(`downcase', `translit(`$*', `A-Z', `a-z')')dnl
 define(`capitalize1',
-       `eregexp(`$1', `^(\w)(\w*)', `upcase(`\1')`'downcase(`\2')')')dnl
+       `regexp(`$1', `^\(\w\)\(\w*\)', `upcase(`\1')`'downcase(`\2')')')dnl
 define(`capitalize',
-       `epatsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
+       `patsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
 capitalize(`GNUs not Unix')
 @result{}Gnus Not Unix
 @end example
 
-While @code{eregexp} replaces the whole input with the replacement as
-soon as there is a match, @code{epatsubst} replaces each
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations:
+
address@hidden
+define(`epatsubst',
+       `builtin(`patsubst', `$1', `$2', `$3', `POSIX_EXTENDED')')dnl
+epatsubst(`bar foo baz Foo', `(\w*) (foo|Foo)', `_\1_')
address@hidden _baz_
+patsubst(`bar foo baz Foo', `\(\w*\) \(foo\|Foo\)', `_\1_')
address@hidden _baz_
address@hidden example
+
+While @code{regexp} replaces the whole input with the replacement as
+soon as there is a match, @code{patsubst} replaces each
 @emph{occurrence} of a match and preserves non matching pieces:
 
 @example
 define(`patreg',
-`epatsubst($@@)
-eregexp($@@)')dnl
-patreg(`bar foo baz Foo', `foo|Foo', `FOO')
+`patsubst($@@)
+regexp($@@)')dnl
+patreg(`bar foo baz Foo', `foo\|Foo', `FOO')
 @result{}bar FOO baz FOO
 @result{}FOO
-patreg(`aba abb 121', `(.)(.)\1', `\2\1\2')
+patreg(`aba abb 121', `\(.\)\(.\)\1', `\2\1\2')
 @result{}bab abb 212
 @result{}bab
 @end example
 
 
address@hidden {Builtin (gnu)} patsubst (@var{string}, @var{regexp}, @w{opt 
@var{replacement})}
-Same as @code{epatsubst}, but using Basic Regular Expression syntax, see
address@hidden and Regexp}, for more details.
address@hidden deffn
-
address@hidden No longer interesting for the documentation per se, but good
address@hidden for testing.
address@hidden
address@hidden
-patsubst(`GNUs not Unix', `^', `OBS: ')
address@hidden: GNUs not Unix
-patsubst(`GNUs not Unix', `\<', `OBS: ')
address@hidden: GNUs OBS: not OBS: Unix
-patsubst(`GNUs not Unix', `\w*', `(\&)')
address@hidden(GNUs)() (not)() (Unix)
-patsubst(`GNUs not Unix', `\w+', `(\&)')
address@hidden(GNUs) (not) (Unix)
-patsubst(`GNUs not Unix', `[A-Z][a-z]+')
address@hidden not @comment
address@hidden example
-
address@hidden
-define(`upcase',   `translit(`$*', `a-z', `A-Z')')dnl
-define(`downcase', `translit(`$*', `A-Z', `a-z')')dnl
-define(`capitalize1',
-       `regexp(`$1', `^\(\w\)\(\w*\)', `upcase(`\1')`'downcase(`\2')')')dnl
-define(`capitalize',
-       `patsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
-capitalize(`GNUs not Unix')
address@hidden Not Unix
address@hidden example
address@hidden ignore
-
 @node Format
 @section Formatted output
 
@@ -3814,6 +3889,11 @@
 Uses @var{string1} and @var{string2} as the beginning quote and end quote
 strings.
 
address@hidden R @var{length} @key{NL} @var{string} @key{NL}
+Sets the default regexp syntax, where @var{string} encodes one of the
+regular expression syntaxes supported by @sc{gnu} M4.
address@hidden, for more details.
+
 @item M @var{length} @key{NL} @var{string} @key{NL}
 Names a module which will be searched for according to the module search path
 and loaded.  Modules loaded from a frozen file don't add their builtin entries
@@ -3913,16 +3993,15 @@
 
 @item
 Searches and text substitution through regular expressions are supported
-by the @code{eregexp}, @code{regexp} (@pxref{Eregexp and Regexp}) and
address@hidden, @code{patsubst} (@pxref{Epatsubst and Patsubst})
-builtins.
+by the @code{regexp} (@pxref{Regexp}) and @code{patsubst}
+(@pxref{Patsubst}) builtins.
 
 @item
 The syntax of regular expressions in M4 has never clearly formalized.
 While Open BSD M4 uses extended regular expressions for @code{regexp}
 and @code{patsubst}, @sc{gnu} M4 uses basic regular expression.  Use
address@hidden (@pxref{Eregexp and Regexp}) and @code{epatsubst}
-(@pxref{Epatsubst and Patsubst}) for extended regular expressions.
address@hidden (@pxref{Changeresyntax}) to change the regular
+expression syntax used by @sc{gnu} M4.
 
 @item
 The output of shell commands can be read into @code{m4} with
@@ -3956,8 +4035,8 @@
 In addition to the above extensions, GNU @code{m4} implements the
 following command line options: @samp{-F}, @samp{-G}, @samp{-I},
 @samp{-L}, @samp{-R}, @samp{-V}, @samp{-W}, @samp{-d},
address@hidden, @samp{-o} and @samp{-t}.  @xref{Invoking m4}, for a
-description of these options.
address@hidden, @samp{-o}, @samp{-r} and @samp{-t}.  @xref{Invoking m4},
+for a description of these options.
 
 Also, the debugging and tracing facilities in GNU @code{m4} are much
 more extensive than in most other versions of @code{m4}.
Index: m4--devo--0/tests/generate.awk
===================================================================
--- m4--devo--0.orig/tests/generate.awk 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/tests/generate.awk      2006-07-07 12:14:52.000000000 +0100
@@ -31,8 +31,8 @@
   print ;
   print "AT_BANNER([Documentation examples.])";
   print ;
-  # stop spurious warnings in the erenamesyms checks
-  print "m4_pattern_allow([^m4_(m4|erenamesyms|)$])"
+  # stop spurious warnings in the renamesyms checks
+  print "m4_pattern_allow([^m4_(m4|renamesyms|)$])"
   print ;
 }
 
Index: m4--devo--0/src/freeze.c
===================================================================
--- m4--devo--0.orig/src/freeze.c       2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/src/freeze.c    2006-07-07 12:14:52.000000000 +0100
@@ -24,20 +24,19 @@
 #include "m4.h"
 #include "m4private.h"
 
-static int   decode_char          (FILE *in);
-static void  issue_expect_message (int expected);
-static int   produce_char_dump    (char *buf, int ch);
-static void  produce_syntax_dump  (FILE *file, m4_syntax_table *syntax,
-                                   char ch);
-static void  produce_module_dump  (FILE *file, lt_dlhandle handle);
-static void  produce_symbol_dump  (m4 *context, FILE *file,
-                                   m4_symbol_table *symtab);
-static void *dump_symbol_CB       (m4_symbol_table *symtab,
-                                   const char *symbol_name, m4_symbol *symbol,
-                                   void *userdata);
+static int   decode_char           (FILE *);
+static void  issue_expect_message  (int);
+static int   produce_char_dump     (char *, int);
+static void  produce_resyntax_dump (m4 *, FILE *);
+static void  produce_syntax_dump   (FILE *, m4_syntax_table *, char);
+static void  produce_module_dump   (FILE *, lt_dlhandle);
+static void  produce_symbol_dump   (m4 *, FILE *, m4_symbol_table *);
+static void *dump_symbol_CB        (m4_symbol_table *, const char *,
+                                    m4_symbol *, void *);
 
 
 /* Produce a frozen state to the given file NAME. */
+
 static int
 produce_char_dump (char *buf, int ch)
 {
@@ -77,6 +76,32 @@
   return strlen (buf);
 }
 
+
+/* Produce the 'R14\nPOSIX_EXTENDED\n' frozen file dump of the current
+   default regular expression syntax.  Note that it would be a little
+   faster to use the encoded syntax in this format as used by re_compile(),
+   but the representation of RE_SYNTAX_POSIX_EXTENDED may change in
+   future (or alternative) implementations of re_compile, so we use an
+   unencoded representation here.  */
+
+static void
+produce_resyntax_dump (m4 *context, FILE *file)
+{
+  int code  = m4_get_regexp_syntax_opt (context);
+
+  /* Don't dump default syntax code (`0' for GNU_EMACS).  */
+  if (code)
+    {
+      const char *resyntax = m4_regexp_syntax_decode (code);
+
+      if (!resyntax)
+       M4ERROR ((EXIT_FAILURE, 0,
+                 _("Invalid regexp syntax code `%d'"), code));
+
+      fprintf (file, "R%d\n%s\n", strlen(resyntax), resyntax);
+    }
+}
+
 #define MAX_CHAR_LENGTH 4      /* '\377' -> 4 characters */
 
 static void
@@ -238,6 +263,10 @@
       fputc ('\n', file);
     }
 
+  /* Dump regular expression syntax.  */
+
+  produce_resyntax_dump (context, file);
+
   /* Dump syntax table. */
 
   produce_syntax_dump (file, M4SYNTAX, 'I');
@@ -515,6 +544,30 @@
 
        break;
 
+      case 'R':
+
+       if (version < 2)
+         {
+           /* 'R' operator is not supported in format version 1. */
+           M4ERROR ((EXIT_FAILURE, 0, _("Ill-formed frozen file")));
+         }
+
+       GET_CHARACTER;
+       GET_NUMBER (number[0]);
+       VALIDATE ('\n');
+       GET_STRING (file, string[0], allocated[0], number[0]);
+       VALIDATE ('\n');
+
+       m4_set_regexp_syntax_opt (context,
+                                 m4_regexp_syntax_encode (string[0]));
+       if (m4_get_regexp_syntax_opt (context) < 0)
+         {
+           M4ERROR ((EXIT_FAILURE, 0,
+                     _("Unknown regexp syntax code %s"), string[0]));
+         }
+
+       break;
+
       case 'S':
 
        if (version < 2)
Index: m4--devo--0/NEWS
===================================================================
--- m4--devo--0.orig/NEWS       2006-07-07 12:14:48.000000000 +0100
+++ m4--devo--0/NEWS    2006-07-07 11:55:54.000000000 +0100
@@ -14,6 +14,20 @@
 
 * The '$' syntax class is now enabled.  See the info docs for examples.
 
+* New builtin `renamesyms' allows programmatic renaming of all symbols
+  according to a regular expression.
+
+* New `-r' command-line option changes the default regular expression
+  syntax used by M4.  Without this option, M4 continues to use
+  RE_SYNTAX_EMACS style expressions.  A new section in the info docs
+  explains the differences between them, and what builtins are affected.
+
+* The experimental `epatsubst' and `eregexp' have been removed in favor
+  of a new `changeresyntax' builtin.
+
+* `patsubst' and `regexp' have a new optional 4th argument to use a
+  different regular expression syntax for the duration of that invocation.
+
 Version beta 1.4q - August 2001, by Gary V. Vaughan
 
 * Support for the experimental `changeword' has been dropped.
Index: m4--devo--0/ChangeLog
===================================================================
--- m4--devo--0.orig/ChangeLog  2006-07-06 00:30:27.000000000 +0100
+++ m4--devo--0/ChangeLog       2006-07-07 12:45:02.000000000 +0100
@@ -1,3 +1,36 @@
+2006-07-07  Gary V. Vaughan  <address@hidden>
+
+       * m4/m4module.h (m4_regexp_syntax_decode, m4_regexp_syntax_encode)
+       (m4_get_regexp_syntax_opt, m4_set_regexp_syntax_opt): Declare
+       new functions for managing regexp syntax options.
+       * m4/m4private.h (m4): Add regexp_syntax field.
+       * m4/resyntax.c: New file implements the above.
+       * Makefile.am (m4_libm4_la_SOURCES): Add m4/resyntax.c.
+       * modules/gnu.c:  Put builtin definitions in alphabetical order.
+       (RE_SYNTAX_BRE, RE_SYNTAX_ERE, builtin_eregexp, builtin_epatsubst)
+       (builtin_erenamsyms, m4_regexp_do, m4_patsubst_do)
+       (m4_renamesyms_do): Removed.
+       (builtin_changeresyntax): New builtin to change regular expression
+       syntax.
+       (m4_resyntax_encode_safe): Factor out diagnostics code.
+       * src/freeze.c (produce_resyntax_dump): New function to dump
+       default regexp syntax specifier to frozen file.
+       (reload_frozen_state): Updated to action 'R' directive.
+       * src/main.c (usage): Describe new -r option.
+       (long_options, OPTSTRING): Declare it.
+       (main): Encode and store cli regexp syntax option argument.
+       * tests/generate.awk (m4_pattern_allow): Updated for renamesyms.
+       * doc/m4.texinfo (Erenamesyms and Renamesyms, Eregexp and Regexp)
+       (Epatsubst and Patsubst): Renamed to...
+       (Renamesyms, Regexp, Patsubst): ...these respectively. Updated
+       documentation and added new examples.
+       (Changeresyntax): New section describing changeresyntax builtin,
+       and regexp syntax names.
+       (Regular expression syntax): New section describing differences
+       between various regular expression syntaxes.
+       (Frozen files): Document 'R' directive.
+       * NEWS: Updated.
+
 2006-07-05  Eric Blake  <address@hidden>
 
        Fix all testsuite failures on cygwin.

Attachment: signature.asc
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]