bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

m4 ebcdic patch


From: Michael Elizabeth Chastain
Subject: m4 ebcdic patch
Date: Fri, 12 Dec 2003 12:54:22 -0500 (EST)

Here's a patch to make m4 work better on ebcdic platforms.

I changed character range code so that range expressions such as
'[A-Z]' copy characters out of a fixed string
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" instead of doing character arithmetic
from 'A' to 'Z'.  This handles the noncontiguous ranges of EBCDIC.

I grabbed a bug fix from Tom Erdevig:

  http://mail.gnu.org/archive/html/bug-gnu-utils/2002-10/msg00308.html

I added new tests for regexp, translit, and patsubst.  All the new
tests pass on an ascii platform.  I need some feedback from ebcdic
people.

I haven't read regex.c line by line -- there might be more gotchas
lurking in it.

This patch is copyrighted by me, but licensed on exactly the same terms
as m4.  It's my intention to contribute it to the FSF.

How about it, ebcdic people, does this work for you?
Please try this patch, run "make check", and send me "make check" results.

Michael C

===

diff -u -r -N ORIGINAL-m4-1.4/checks/43.regexp m4-1.4/checks/43.regexp
--- ORIGINAL-m4-1.4/checks/43.regexp    1994-11-01 13:15:11.000000000 -0500
+++ m4-1.4/checks/43.regexp     2003-12-12 12:29:40.000000000 -0500
@@ -3,3 +3,8 @@
 dnl @result{}5
 regexp(`GNUs not Unix', `\<Q\w*')
 dnl @result{}-1
+dnl
+regexp(`~{}\mec.was.here', `[A-Za-z]')
+dnl @result{}4
+regexp(`~{}\mec.was.here', `\w')
+dnl @result{}4
diff -u -r -N ORIGINAL-m4-1.4/checks/46.translit m4-1.4/checks/46.translit
--- ORIGINAL-m4-1.4/checks/46.translit  1994-11-01 13:15:12.000000000 -0500
+++ m4-1.4/checks/46.translit   2003-12-12 12:11:38.000000000 -0500
@@ -5,3 +5,6 @@
 dnl @result{}GNUS NOT UNIX
 translit(`GNUs not Unix', `A-Z', `z-a')
 dnl @result{}tmfs not fnix
+dnl
+translit(`ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'A-Z', 'z-a')
+dnl @result{}zyxwvutsrqponmlkjihgfedcba
diff -u -r -N ORIGINAL-m4-1.4/checks/47.patsubst m4-1.4/checks/47.patsubst
--- ORIGINAL-m4-1.4/checks/47.patsubst  1994-11-01 13:15:13.000000000 -0500
+++ m4-1.4/checks/47.patsubst   2003-12-12 12:39:07.000000000 -0500
@@ -9,3 +9,8 @@
 dnl @result{}(GNUs) (not) (Unix)
 patsubst(`GNUs not Unix', `[A-Z][a-z]+')
 dnl @result{}GN not 
+dnl
+patsubst(`Alpha_{}\~-bet.', `\w', `\& ')
+dnl @result{}A l p h a _ {}\~-b e t .
+patsubst(`Mec{}\~was{}\~here', `\W+', ` ')
+dnl @result{}Mec was here
diff -u -r -N ORIGINAL-m4-1.4/lib/Makefile.in m4-1.4/lib/Makefile.in
--- ORIGINAL-m4-1.4/lib/Makefile.in     1994-11-01 20:40:16.000000000 -0500
+++ m4-1.4/lib/Makefile.in      2003-12-12 02:22:56.000000000 -0500
@@ -38,11 +38,11 @@
 
 INCLUDES = -I.. -I$(srcdir)
 
-HEADERS = getopt.h obstack.h regex.h
+HEADERS = getopt.h obstack.h regex.h range.h
 SOURCES = regex.c getopt.c getopt1.c error.c obstack.c xmalloc.c \
-xstrdup.c alloca.c strtol.c
+xstrdup.c alloca.c strtol.c range.c
 OBJECTS = regex.o getopt.o getopt1.o error.o obstack.o xmalloc.o \
-xstrdup.o @ALLOCA@ @LIBOBJS@
+xstrdup.o range.o @ALLOCA@ @LIBOBJS@
 
 DISTFILES = COPYING.LIB Makefile.in $(HEADERS) $(SOURCES) \
 TAGS
diff -u -r -N ORIGINAL-m4-1.4/lib/range.c m4-1.4/lib/range.c
--- ORIGINAL-m4-1.4/lib/range.c 1969-12-31 19:00:00.000000000 -0500
+++ m4-1.4/lib/range.c  2003-12-12 03:33:13.000000000 -0500
@@ -0,0 +1,73 @@
+/* Copyright 2003, Michael Chastain, <address@hidden>
+   Licensed under the GNU GPL, version 2.
+
+   Character range functions.
+   There is just enough stuff here to make m4 work on ebcdic.  */
+
+#include <string.h>
+#include "range.h"
+
+/* EXPAND_RANGE expands a range such as '0-9' to "0123456789".
+   The input arguments are two characters.
+   The return arguments are two pointer-to-characters.
+
+   if *PTO is NULL on return, then *PFROM points to a malloc'ed string.
+   Otherwise, *PFROM and *PTO point into a permanent const string.
+   Also as a subtle effect, if *PTO is NULL, it means the range
+   is strange (not just a simple numeric or alpha range).
+
+   For a range such as '[f-a]', is legal for *PFROM to be greater
+   than *PTO.  */
+
+void make_range (char from, char to, const char ** ppfrom, const char ** ppto)
+{
+  /* These ranges are well-formed in both ascii and ebcdic.  */
+  static const char * range_list [] =
+  {
+    "0123456789",
+    "abcdefghijklmnopqrstuvwxyz",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+    0
+  };
+
+
+  /* Look in each line in the nice range list.
+     If both characters are on one line, return pointers into that line.  */
+  {
+    const char ** ppconv;
+    const char * pfrom;
+    const char * pto;
+
+    for (ppconv = range_list; *ppconv; ppconv++)
+    {
+      if ((pfrom = strchr (*ppconv, from)) != 0 &&
+         (pto   = strchr (*ppconv, to))   != 0)
+      {
+       *ppfrom = pfrom;
+       *ppto   = pto;
+       return;
+      }
+    }
+  }
+
+  /* Too bad.  Now I have to synthesize a strange range.  */
+  {
+    unsigned char ufrom = (unsigned char) from;
+    unsigned char uto   = (unsigned char) to;
+    int step = (ufrom < uto) ? 1 : (ufrom == uto) ? 0 : -1;
+    int length = (ufrom < uto) ? uto - ufrom + 1 : ufrom - uto + 1;
+    char * string = (char *) xmalloc (length+1);
+    char * p = string;
+    while (from != to)
+    {
+      *p++ = from;
+      from += step;
+    }
+    *p++ = to;
+    *p = '\0';
+
+    *ppfrom = string;
+    *ppto   = 0;
+    return;
+  }
+}
diff -u -r -N ORIGINAL-m4-1.4/lib/range.h m4-1.4/lib/range.h
--- ORIGINAL-m4-1.4/lib/range.h 1969-12-31 19:00:00.000000000 -0500
+++ m4-1.4/lib/range.h  2003-12-12 02:16:51.000000000 -0500
@@ -0,0 +1,12 @@
+/* Copyright 2003, Michael Chastain, <address@hidden>
+   Licensed under the GNU GPL, version 2.
+
+   Character range functions.
+   There is just enough stuff here to make m4 work on ebcdic.  */
+
+#ifndef _RANGE_H
+#define _RANGE_H
+
+extern void make_range (char from, char to, const char ** pfrom, const char ** 
pto);
+
+#endif
diff -u -r -N ORIGINAL-m4-1.4/lib/regex.c m4-1.4/lib/regex.c
--- ORIGINAL-m4-1.4/lib/regex.c 1994-10-26 18:43:45.000000000 -0400
+++ m4-1.4/lib/regex.c  2003-12-12 12:41:58.000000000 -0500
@@ -4,6 +4,7 @@
    internationalization features.)
 
    Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+   Copyright 2003, Michael Chastain, <address@hidden>
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -95,24 +96,17 @@
 static void
 init_syntax_once ()
 {
-   register int c;
+   static const char word_chars [] = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopoqrstuvwxyz0123456789_";
    static int done = 0;
+   const char * pc;
 
    if (done)
      return;
 
    bzero (re_syntax_table, sizeof re_syntax_table);
 
-   for (c = 'a'; c <= 'z'; c++)
-     re_syntax_table[c] = Sword;
-
-   for (c = 'A'; c <= 'Z'; c++)
-     re_syntax_table[c] = Sword;
-
-   for (c = '0'; c <= '9'; c++)
-     re_syntax_table[c] = Sword;
-
-   re_syntax_table['_'] = Sword;
+   for (pc = word_chars; *pc; pc++)
+     re_syntax_table[(unsigned char) *pc] = Sword;
 
    done = 1;
 }
@@ -2711,15 +2705,27 @@
   if (range_start > range_end)
     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
 
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
+  /* Make the range. */
+  {
+    const char * p_start;
+    const char * p_end;
+    const char * p;
+    make_range (range_start, range_end, &p_start, &p_end);
+    if (p_end == NULL)
     {
-      SET_LIST_BIT (TRANSLATE (this_char));
+      /* TODO: I would like to issue a warning here! */
+      for (p = p_start; *p; p++)
+       SET_LIST_BIT (TRANSLATE (*p));
+      free (p_start);
     }
-  
+    else
+    {
+      /* I already disallowed backwards ranges. */
+      for (p = p_start; p <= p_end; p++)
+       SET_LIST_BIT (TRANSLATE (*p));
+    }
+  }
+
   return REG_NOERROR;
 }
 
@@ -4272,7 +4278,7 @@
                  {
                    int not = (re_opcode_t) p1[3] == charset_not;
                     
-                   if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+                   if (c < (unsigned int) (p1[4] * BYTEWIDTH)
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                      not = !not;
 
diff -u -r -N ORIGINAL-m4-1.4/src/builtin.c m4-1.4/src/builtin.c
--- ORIGINAL-m4-1.4/src/builtin.c       1994-08-31 12:45:12.000000000 -0400
+++ m4-1.4/src/builtin.c        2003-12-12 11:53:01.000000000 -0500
@@ -1,5 +1,6 @@
 /* GNU m4 -- A simple macro processor
    Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+   Copyright 2003, Michael Chastain, <address@hidden>
   
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -23,6 +24,7 @@
 
 extern FILE *popen ();
 
+#include "range.h"
 #include "regex.h"
 
 #define ARG(i) (argc > (i) ? TOKEN_DATA_TEXT (argv[i]) : "")
@@ -1381,15 +1383,34 @@
          to = *++s;
          if (to == '\0')
            obstack_1grow (obs, '-'); /* trailing dash */
-         else if (from <= to)
-           {
-             while (from++ < to)
-               obstack_1grow (obs, from);
-           }
          else
            {
-             while (--from >= to)
-               obstack_1grow (obs, from);
+             const char * pfrom;
+             const char * pto;
+             int step;
+
+             make_range (from, to, &pfrom, &pto);
+
+             /* remember, from was already pushed when it was seen */
+             if (pto == NULL)
+             {
+               M4ERROR ((warning_status, 0,
+                         "WARNING: strange range: [%c-%c] = %s",
+                         from, to, pfrom));
+               obstack_grow (obs, pfrom+1, strlen(pfrom+1));
+               free (pfrom);
+             }
+             else if (pto >= pfrom)
+             {
+               obstack_grow (obs, pfrom+1, pto-pfrom);
+             }
+             else
+             {
+               do
+               {
+                 obstack_1grow (obs, *--pfrom);
+               } while (pfrom != pto);
+             }
            }
        }
       else




reply via email to

[Prev in Thread] Current Thread [Next in Thread]