bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: printing big decimals


From: Paul Eggert
Subject: Re: printing big decimals
Date: 26 Mar 2003 11:45:17 -0800
User-agent: Gnus/5.0808 (Gnus v5.8.8) Emacs/20.3

Aharon Robbins <address@hidden> writes:

> I've chosen for now to leave things as they are, since this
> is documented in the gawk.texi file.  I have added a lint
> warning in the code in case this happens.

On many hosts, the current Gawk behavior causes problems when using
'gawk' to parse the output of 'ls', and of other programs that print
sizes of files.  This is because many files have sizes larger than
2**31 bytes these days, but 'long' is still only 32 bits on many
hosts.

How about the following patch to help fix this problem?  It causes
Gawk to use 64-bit integers if they are available, which they almost
always are these days.

I noticed a few other related problems while I was fixing this.
bitwise ops have the same problem as printf, so I altered them too.  I
also fixed another off-by-1 portability bug that I noticed: if you
shift an N-bit int by exactly N bits, the resulting behavior is
undefined in C.  Also, CHAR_BIT is now the portable way to get the
number of bits in a byte.

2003-03-26  Paul Eggert  <address@hidden>

        * builtin.c [HAVE_INTTYPES_H]: Include <inttypes.h>.
        [!HAVE_INTTYPES_H && HAVE_STDINT_H]: Include <stdint.h>.
        (CHAR_BIT, INTMAX_MIN, UINTMAX_MAX): Define if the system does not.
        (TYPE_SIGNED, TYPE_MINIMUM, TYPE_MAXIMUM): New macros, taken from
        coreutils and many other GNU utilities.
        (format_tree): When formatting, use widest possible integers
        rather than settling with 'long'.
        (do_lshift, do_rshift, do_and, do_or, do_xor, do_compl): Likewise,
        when doing bitwise operations.
        * configure.in (jm_AC_TYPE_LONG_LONG, jm_AC_TYPE_UNSIGNED_LONG_LONG,
        jm_AC_TYPE_INTMAX_T, jm_AC_TYPE_UINTMAX_T): Add, since the mainline
        code now needs this.
        * doc/gawk.texi (Control Letters, Bitwise Functions): Document this.
        * m4/intmax_t.m4: New file, taken from coreutils (but renamed to
        avoid collision with our m4/inttypes.m4).
        * m4/longlong.m4: New file, taken from coreutils.
        * m4/uintmax_t.m4, m4/ulonglong.m4: Remove; superseded by the above
        new m4 files.

        * builtin.c (BITS_PER_BYTE): Remove; use CHAR_BIT instead, since
        it's the standard name.
        (do_lshift, do_rshift): Complain if the shift width is exactly equal
        to the word size, too.

diff -Naurp gawk-3.1.1n/builtin.c gawk-3.1.1n-fix/builtin.c
--- gawk-3.1.1n/builtin.c       2003-02-28 01:04:08.000000000 -0800
+++ gawk-3.1.1n-fix/builtin.c   2003-03-26 10:54:09.825480000 -0800
@@ -31,9 +31,35 @@
 #undef HUGE
 #undef CHARBITS
 #undef INTBITS
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
 #include <math.h>
 #include "random.h"
 
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
+#endif
+
+/* The extra casts work around common compiler bugs.  */
+#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
+/* The outer cast is needed to work around a bug in Cray C 5.0.3.0.
+   It is necessary at least when t == time_t.  */
+#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
+                             ? ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1) : (t) 0))
+#define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
+
+#ifndef INTMAX_MIN
+# define INTMAX_MIN TYPE_MINIMUM (intmax_t)
+#endif
+#ifndef UINTMAX_MAX
+# define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
+#endif
+
 #ifndef SIZE_MAX       /* C99 constant, can't rely on it everywhere */
 #define SIZE_MAX ((size_t) -1)
 #endif
@@ -528,7 +554,7 @@ format_tree(
 #ifdef sun386          /* Can't cast unsigned (int/long) from ptr->value */
        long tmp_uval;  /* on 386i 4.0.1 C compiler -- it just hangs */
 #endif
-       unsigned long uval;
+       uintmax_t uval;
        int sgn;
        int base = 0;
        char cpbuf[30];         /* if we have numbers bigger than 30 */
@@ -843,7 +869,7 @@ check_pos:
                                tmp_uval = arg->numbr; 
                                uval = (unsigned long) tmp_uval;
 #else
-                               uval = (unsigned long) arg->numbr;
+                               uval = (uintmax_t) arg->numbr;
 #endif
                                cpbuf[0] = uval;
                                prec = 1;
@@ -880,17 +906,16 @@ check_pos:
                                goto pr_tail;
 
                        if (tmpval < 0) {
-                               if (tmpval < LONG_MIN)
+                               if (tmpval < INTMAX_MIN)
                                        goto out_of_range;
                                sgn = TRUE;
-                               uval = - (unsigned long) (long) tmpval;
+                               uval = - (uintmax_t) (intmax_t) tmpval;
                        } else {
-                               /* Use !, so that NaNs are out of range.
-                                  The cast avoids a SunOS 4.1.x cc bug.  */
-                               if (! (tmpval <= (unsigned long) ULONG_MAX))
+                               /* Use !, so that NaNs are out of range.  */
+                               if (! (tmpval <= UINTMAX_MAX))
                                        goto out_of_range;
                                sgn = FALSE;
-                               uval = (unsigned long) tmpval;
+                               uval = (uintmax_t) tmpval;
                        }
                        do {
                                *--cp = (char) ('0' + uval % 10);
@@ -958,15 +983,14 @@ check_pos:
                                goto pr_tail;
 
                        if (tmpval < 0) {
-                               if (tmpval < LONG_MIN)
+                               if (tmpval < INTMAX_MIN)
                                        goto out_of_range;
-                               uval = (unsigned long) (long) tmpval;
+                               uval = (uintmax_t) (intmax_t) tmpval;
                        } else {
-                               /* Use !, so that NaNs are out of range.
-                                  The cast avoids a SunOS 4.1.x cc bug.  */
-                               if (! (tmpval <= (unsigned long) ULONG_MAX))
+                               /* Use !, so that NaNs are out of range.  */
+                               if (! (tmpval <= UINTMAX_MAX))
                                        goto out_of_range;
-                               uval = (unsigned long) tmpval;
+                               uval = (uintmax_t) tmpval;
                        }
                        /*
                         * When to fill with zeroes is of course not simple.
@@ -2354,15 +2378,13 @@ sgfmt(char *buf,        /* return buffer; assum
 }
 #endif /* GFMT_WORKAROUND */
 
-#define BITS_PER_BYTE  8       /* if not true, you lose. too bad. */
-
 /* do_lshift --- perform a << operation */
 
 NODE *
 do_lshift(NODE *tree)
 {
        NODE *s1, *s2;
-       unsigned long uval, ushift, res;
+       uintmax_t uval, ushift, res;
        AWKNUM val, shift;
 
        s1 = tree_eval(tree->lnode);
@@ -2379,15 +2401,15 @@ do_lshift(NODE *tree)
                        lintwarn(_("lshift(%lf, %lf): negative values will give 
strange results"), val, shift);
                if (double_to_int(val) != val || double_to_int(shift) != shift)
                        lintwarn(_("lshift(%lf, %lf): fractional values will be 
truncated"), val, shift);
-               if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+               if (shift >= sizeof(uintmax_t) * CHAR_BIT)
                        lintwarn(_("lshift(%lf, %lf): too large shift value 
will give strange results"), val, shift);
        }
 
        free_temp(s1);
        free_temp(s2);
 
-       uval = (unsigned long) val;
-       ushift = (unsigned long) shift;
+       uval = (uintmax_t) val;
+       ushift = (uintmax_t) shift;
 
        res = uval << ushift;
        return tmp_number((AWKNUM) res);
@@ -2399,7 +2421,7 @@ NODE *
 do_rshift(NODE *tree)
 {
        NODE *s1, *s2;
-       unsigned long uval, ushift, res;
+       uintmax_t uval, ushift, res;
        AWKNUM val, shift;
 
        s1 = tree_eval(tree->lnode);
@@ -2416,15 +2438,15 @@ do_rshift(NODE *tree)
                        lintwarn(_("rshift(%lf, %lf): negative values will give 
strange results"), val, shift);
                if (double_to_int(val) != val || double_to_int(shift) != shift)
                        lintwarn(_("rshift(%lf, %lf): fractional values will be 
truncated"), val, shift);
-               if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+               if (shift >= sizeof(uintmax_t) * CHAR_BIT)
                        lintwarn(_("rshift(%lf, %lf): too large shift value 
will give strange results"), val, shift);
        }
 
        free_temp(s1);
        free_temp(s2);
 
-       uval = (unsigned long) val;
-       ushift = (unsigned long) shift;
+       uval = (uintmax_t) val;
+       ushift = (uintmax_t) shift;
 
        res = uval >> ushift;
        return tmp_number((AWKNUM) res);
@@ -2436,7 +2458,7 @@ NODE *
 do_and(NODE *tree)
 {
        NODE *s1, *s2;
-       unsigned long uleft, uright, res;
+       uintmax_t uleft, uright, res;
        AWKNUM left, right;
 
        s1 = tree_eval(tree->lnode);
@@ -2458,8 +2480,8 @@ do_and(NODE *tree)
        free_temp(s1);
        free_temp(s2);
 
-       uleft = (unsigned long) left;
-       uright = (unsigned long) right;
+       uleft = (uintmax_t) left;
+       uright = (uintmax_t) right;
 
        res = uleft & uright;
        return tmp_number((AWKNUM) res);
@@ -2471,7 +2493,7 @@ NODE *
 do_or(NODE *tree)
 {
        NODE *s1, *s2;
-       unsigned long uleft, uright, res;
+       uintmax_t uleft, uright, res;
        AWKNUM left, right;
 
        s1 = tree_eval(tree->lnode);
@@ -2493,8 +2515,8 @@ do_or(NODE *tree)
        free_temp(s1);
        free_temp(s2);
 
-       uleft = (unsigned long) left;
-       uright = (unsigned long) right;
+       uleft = (uintmax_t) left;
+       uright = (uintmax_t) right;
 
        res = uleft | uright;
        return tmp_number((AWKNUM) res);
@@ -2506,7 +2528,7 @@ NODE *
 do_xor(NODE *tree)
 {
        NODE *s1, *s2;
-       unsigned long uleft, uright, res;
+       uintmax_t uleft, uright, res;
        AWKNUM left, right;
 
        s1 = tree_eval(tree->lnode);
@@ -2528,8 +2550,8 @@ do_xor(NODE *tree)
        free_temp(s1);
        free_temp(s2);
 
-       uleft = (unsigned long) left;
-       uright = (unsigned long) right;
+       uleft = (uintmax_t) left;
+       uright = (uintmax_t) right;
 
        res = uleft ^ uright;
        return tmp_number((AWKNUM) res);
@@ -2542,7 +2564,7 @@ do_compl(NODE *tree)
 {
        NODE *tmp;
        double d;
-       unsigned long uval;
+       uintmax_t uval;
 
        tmp = tree_eval(tree->lnode);
        d = force_number(tmp);
@@ -2557,7 +2579,7 @@ do_compl(NODE *tree)
                        lintwarn(_("compl(%lf): fractional value will be 
truncated"), d);
        }
 
-       uval = (unsigned long) d;
+       uval = (uintmax_t) d;
        uval = ~ uval;
        return tmp_number((AWKNUM) uval);
 }
diff -Naurp gawk-3.1.1n/configure.in gawk-3.1.1n-fix/configure.in
--- gawk-3.1.1n/configure.in    2003-03-03 05:38:23.000000000 -0800
+++ gawk-3.1.1n-fix/configure.in        2003-03-26 11:04:27.373044000 -0800
@@ -148,6 +148,10 @@ AC_TYPE_PID_T
 AC_TYPE_SIGNAL
 AC_SIZE_T
 AC_TYPE_GETGROUPS
+jm_AC_TYPE_LONG_LONG
+jm_AC_TYPE_UNSIGNED_LONG_LONG
+jm_AC_TYPE_INTMAX_T
+jm_AC_TYPE_UINTMAX_T
 AC_CHECK_TYPE(ssize_t, int)
 AC_EGREP_HEADER([int.*sprintf], stdio.h,
        AC_DEFINE(SPRINTF_RET, int, [return type of sprintf]),
diff -Naurp gawk-3.1.1n/doc/gawk.texi gawk-3.1.1n-fix/doc/gawk.texi
--- gawk-3.1.1n/doc/gawk.texi   2003-03-04 02:29:04.000000000 -0800
+++ gawk-3.1.1n-fix/doc/gawk.texi       2003-03-26 11:20:22.873436000 -0800
@@ -6206,7 +6206,7 @@ argument and it ignores any modifiers.
 @cindex @command{gawk}, format-control characters
 @strong{Note:}
 When using the integer format-control letters for values that are outside
-the range of a C @code{long} integer, @command{gawk} switches to the
+the range of the widest C integer type, @command{gawk} switches to the
 @samp{%g} format specifier. Other versions of @command{awk} may print
 invalid values or do something else entirely.
 @value{DARKCORNER}
@@ -13772,7 +13772,7 @@ Return the value of @var{val}, shifted r
 @end multitable
 
 For all of these functions, first the double-precision floating-point value is
-converted to a C @code{unsigned long}, then the bitwise operation is
+converted to the widest C unsigned integer type, then the bitwise operation is
 performed and then the result is converted back into a C @code{double}. (If
 you don't understand this paragraph, don't worry about it.)
 
diff -Naurp gawk-3.1.1n/m4/intmax_t.m4 gawk-3.1.1n-fix/m4/intmax_t.m4
--- gawk-3.1.1n/m4/intmax_t.m4  1969-12-31 16:00:00.000000000 -0800
+++ gawk-3.1.1n-fix/m4/intmax_t.m4      2003-03-26 11:03:38.922447000 -0800
@@ -0,0 +1,32 @@
+#serial 6
+
+dnl From Paul Eggert.
+
+AC_PREREQ(2.52)
+
+# Define intmax_t to long or long long if <inttypes.h> doesn't define.
+
+AC_DEFUN([jm_AC_TYPE_INTMAX_T],
+[
+  AC_REQUIRE([jm_AC_TYPE_LONG_LONG])
+  AC_CHECK_TYPE(intmax_t, ,
+    [test $ac_cv_type_long_long = yes \
+       && ac_type='long long' \
+       || ac_type='long'
+     AC_DEFINE_UNQUOTED(intmax_t, $ac_type,
+       [Define to widest signed type if <inttypes.h> doesn't define.])])
+])
+
+# Define uintmax_t to unsigned long or unsigned long long
+# if <inttypes.h> doesn't define.
+
+AC_DEFUN([jm_AC_TYPE_UINTMAX_T],
+[
+  AC_REQUIRE([jm_AC_TYPE_UNSIGNED_LONG_LONG])
+  AC_CHECK_TYPE(uintmax_t, ,
+    [test $ac_cv_type_unsigned_long_long = yes \
+       && ac_type='unsigned long long' \
+       || ac_type='unsigned long'
+     AC_DEFINE_UNQUOTED(uintmax_t, $ac_type,
+       [Define to widest unsigned type if <inttypes.h> doesn't define.])])
+])
diff -Naurp gawk-3.1.1n/m4/longlong.m4 gawk-3.1.1n-fix/m4/longlong.m4
--- gawk-3.1.1n/m4/longlong.m4  1969-12-31 16:00:00.000000000 -0800
+++ gawk-3.1.1n-fix/m4/longlong.m4      2003-03-26 11:01:25.420743000 -0800
@@ -0,0 +1,35 @@
+#serial 2
+
+dnl From Paul Eggert.
+
+# Define HAVE_LONG_LONG if 'long long' works.
+
+AC_DEFUN([jm_AC_TYPE_LONG_LONG],
+[
+  AC_CACHE_CHECK([for long long], ac_cv_type_long_long,
+  [AC_TRY_LINK([long long ll = 1; int i = 63;],
+    [long long llmax = (long long) -1;
+     return ll << i | ll >> i | llmax / ll | llmax % ll;],
+    ac_cv_type_long_long=yes,
+    ac_cv_type_long_long=no)])
+  if test $ac_cv_type_long_long = yes; then
+    AC_DEFINE(HAVE_LONG_LONG, 1,
+      [Define if you have the long long type.])
+  fi
+])
+
+# Define HAVE_UNSIGNED_LONG_LONG if 'unsigned long long' works.
+
+AC_DEFUN([jm_AC_TYPE_UNSIGNED_LONG_LONG],
+[
+  AC_CACHE_CHECK([for unsigned long long], ac_cv_type_unsigned_long_long,
+  [AC_TRY_LINK([unsigned long long ull = 1; int i = 63;],
+    [unsigned long long ullmax = (unsigned long long) -1;
+     return ull << i | ull >> i | ullmax / ull | ullmax % ull;],
+    ac_cv_type_unsigned_long_long=yes,
+    ac_cv_type_unsigned_long_long=no)])
+  if test $ac_cv_type_unsigned_long_long = yes; then
+    AC_DEFINE(HAVE_UNSIGNED_LONG_LONG, 1,
+      [Define if you have the unsigned long long type.])
+  fi
+])
diff -Naurp gawk-3.1.1n/m4/uintmax_t.m4 gawk-3.1.1n-fix/m4/uintmax_t.m4
--- gawk-3.1.1n/m4/uintmax_t.m4 2002-09-19 01:00:08.000000000 -0700
+++ gawk-3.1.1n-fix/m4/uintmax_t.m4     1969-12-31 16:00:00.000000000 -0800
@@ -1,29 +0,0 @@
-# uintmax_t.m4 serial 6 (gettext-0.11)
-dnl Copyright (C) 1997-2002 Free Software Foundation, Inc.
-dnl This file is free software, distributed under the terms of the GNU
-dnl General Public License.  As a special exception to the GNU General
-dnl Public License, this file may be distributed as part of a program
-dnl that contains a configuration script generated by Autoconf, under
-dnl the same distribution terms as the rest of that program.
-
-dnl From Paul Eggert.
-
-AC_PREREQ(2.13)
-
-# Define uintmax_t to `unsigned long' or `unsigned long long'
-# if <inttypes.h> does not exist.
-
-AC_DEFUN([jm_AC_TYPE_UINTMAX_T],
-[
-  AC_REQUIRE([jm_AC_HEADER_INTTYPES_H])
-  AC_REQUIRE([jm_AC_HEADER_STDINT_H])
-  if test $jm_ac_cv_header_inttypes_h = no && test $jm_ac_cv_header_stdint_h = 
no; then
-    AC_REQUIRE([jm_AC_TYPE_UNSIGNED_LONG_LONG])
-    test $ac_cv_type_unsigned_long_long = yes \
-      && ac_type='unsigned long long' \
-      || ac_type='unsigned long'
-    AC_DEFINE_UNQUOTED(uintmax_t, $ac_type,
-  [Define to unsigned long or unsigned long long
-   if <inttypes.h> and <stdint.h> don't define.])
-  fi
-])
diff -Naurp gawk-3.1.1n/m4/ulonglong.m4 gawk-3.1.1n-fix/m4/ulonglong.m4
--- gawk-3.1.1n/m4/ulonglong.m4 2002-09-19 01:00:08.000000000 -0700
+++ gawk-3.1.1n-fix/m4/ulonglong.m4     1969-12-31 16:00:00.000000000 -0800
@@ -1,23 +0,0 @@
-# ulonglong.m4 serial 2 (fileutils-4.0.32, gettext-0.10.40)
-dnl Copyright (C) 1999-2002 Free Software Foundation, Inc.
-dnl This file is free software, distributed under the terms of the GNU
-dnl General Public License.  As a special exception to the GNU General
-dnl Public License, this file may be distributed as part of a program
-dnl that contains a configuration script generated by Autoconf, under
-dnl the same distribution terms as the rest of that program.
-
-dnl From Paul Eggert.
-
-AC_DEFUN([jm_AC_TYPE_UNSIGNED_LONG_LONG],
-[
-  AC_CACHE_CHECK([for unsigned long long], ac_cv_type_unsigned_long_long,
-  [AC_TRY_LINK([unsigned long long ull = 1; int i = 63;],
-    [unsigned long long ullmax = (unsigned long long) -1;
-     return ull << i | ull >> i | ullmax / ull | ullmax % ull;],
-    ac_cv_type_unsigned_long_long=yes,
-    ac_cv_type_unsigned_long_long=no)])
-  if test $ac_cv_type_unsigned_long_long = yes; then
-    AC_DEFINE(HAVE_UNSIGNED_LONG_LONG, 1,
-      [Define if you have the unsigned long long type.])
-  fi
-])




reply via email to

[Prev in Thread] Current Thread [Next in Thread]