bug-gawk
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

GNU Awk 5.1.0 - minor bugs


From: Michael Builov
Subject: GNU Awk 5.1.0 - minor bugs
Date: Fri, 24 Apr 2020 11:36:21 +0300

Hello, dear GNU Awk maintainers.

Here is a list of 11 minor bugs I found in gawk sources.
The patches presented demonstrate possible ways to fix detected bugs.


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1) awkgram.y - 2 issues:
- wrong size of "rule_block" (sizeof(ruletab) >= 24)
- get_comment(): isspace() asserts on END_FILE (END_FILE == -1000)


diff --git a/awkgram.y b/awkgram.y
index 00333ea0..ea119706 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -167,7 +167,7 @@ const char awk_namespace[] = "awk";
 const char *current_namespace = awk_namespace;
 bool namespace_changed = false;

-static INSTRUCTION *rule_block[sizeof(ruletab)];
+static INSTRUCTION *rule_block[sizeof(ruletab)/sizeof(ruletab[0])];

 static INSTRUCTION *ip_rec;
 static INSTRUCTION *ip_newfile;
@@ -2773,7 +2773,7 @@ parse_program(INSTRUCTION **pcode, bool from_eval)
        lexeof = false;
        lexptr = NULL;
        lasttok = 0;
-       memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
+       memset(rule_block, 0, sizeof(rule_block));
        errcount = 0;
        tok = tokstart != NULL ? tokstart : tokexpand();

@@ -3454,7 +3454,7 @@ get_comment(enum commenttype flag, INSTRUCTION
**comment_instruction)
                                        sourceline++;
                                        tokadd(c);
                                }
-                       } while (isspace(c) && c != END_FILE);
+                       } while (c != END_FILE && isspace(c));
                        if (c == END_FILE)
                                break;
                        else if (c != '#') {


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2) debug.c - 2 issues:
- set_breakpoint(): NULL pointer dereference (b may be NULL)
- serialize_list(): writing past the allocated buffer memory:
"buflen" is the allocated buffer length - 1, see:

        if (buf == NULL) {      /* first time */
                buflen = SERIALIZE_BUFSIZ;
                emalloc(buf, char *, buflen + 1, "serialize");
        }

or

                if (buflen - bl < SERIALIZE_BUFSIZ/2) {
enlarge_buffer:
                        buflen *= 2;
                        erealloc(buf, char *, buflen + 1, "serialize");
                }

Next check also assumes that buflen == allocated buffer length - 1:

                if (nchar == 0) /* skip empty history lines etc.*/
                        ;
                else if (nchar > 0 && nchar  < buflen - bl) {
                        bl += nchar;
                        buf[bl] = RSEP; /* record */
                        buf[++bl] = '\0';
                } else
                        goto enlarge_buffer;

But later, it is assumed that "buflen" is the allocated buffer length - 3:

                switch (type) {
                case BREAK:
                case WATCH:
                ..........
                        bl--;   /* undo RSEP from above */
                ..........
                        buf[bl++] = FSEP;               /* field */
                        buf[bl++] = RSEP;               /* record */
invalid write ---->     buf[bl] = '\0';


diff --git a/debug.c b/debug.c
index e0822605..fa3c9061 100644
--- a/debug.c
+++ b/debug.c
@@ -2403,9 +2403,11 @@ func:
                if ((b = set_breakpoint_at(rp, rp->source_line, false)) ==
NULL)
                        fprintf(out_fp, _("cannot set breakpoint in
function `%s'\n"),
                                                func->vname);
-               else if (temporary)
-                       b->flags |= BP_TEMP;
-               lineno = b->bpi->source_line;
+               else {
+                       if (temporary)
+                               b->flags |= BP_TEMP;
+                       lineno = b->bpi->source_line;
+               }
                break;

        default:
@@ -4605,10 +4607,10 @@ enlarge_buffer:
                        }

                        if (nchar > 0) {        /* non-empty commands list
*/
-                               nchar += (strlen("commands ") + 20 +
strlen("end") + 1); /* 20 for cnum (an int) */
-                               if (nchar > buflen - bl) {
-                                       buflen = bl + nchar;
-                                       erealloc(buf, char *, buflen + 3,
"serialize_list");
+                               nchar += (strlen("commands ") + 20/*cnum*/
+ 1/*CSEP*/ + strlen("end") + 1/*FSEP*/);
+                               if (nchar >= buflen - bl) {
+                                       buflen = bl + nchar + 1/*RSEP*/;
+                                       erealloc(buf, char *, buflen + 1,
"serialize_list");
                                }
                                nchar = sprintf(buf + bl, "commands %d",
cnum);
                                bl += nchar;
@@ -4634,8 +4636,8 @@ enlarge_buffer:
                                nchar = strlen("end");  /* end of
'commands' */
                                memcpy(buf + bl, "end", nchar);
                                bl += nchar;
+                               buf[bl++] = FSEP;               /* field */
                        }
-                       buf[bl++] = FSEP;               /* field */
                        buf[bl++] = RSEP;               /* record */
                        buf[bl] = '\0';

@@ -4643,9 +4645,9 @@ enlarge_buffer:
                        if (cndn->expr) {
                                bl--;   /* undo RSEP from above */
                                nchar = strlen(cndn->expr);
-                               if (nchar > buflen - bl) {
-                                       buflen = bl + nchar;
-                                       erealloc(buf, char *, buflen + 3,
"serialize_list");
+                               if (nchar + 1/*FSEP*/ >= buflen - bl) {
+                                       buflen = bl + nchar + 1/*FSEP*/ +
1/*RSEP*/;
+                                       erealloc(buf, char *, buflen + 1,
"serialize_list");
                                }
                                memcpy(buf + bl, cndn->expr, nchar);
                                bl += nchar;


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3) node.c - 4 issues:
- parse_escape(): isxdigit/isdigit/isupper called with possible negative int
- parse_escape(): dead code under "if (do_lint && j > 2)" - j cannot be > 2.
- init_btowc_cache(): wide-char at index 255 is not initialized
- GAWKDEBUG: fscaret.awk test fails with use-after-free:
set_field() allocates node:

        n->flags = (STRCUR|STRING|USER_INPUT); /* do not set MALLOC */

r_interpret() increments its reference counter:

                case Op_field_spec:
                ..........
                        UPREF(r);

and then r_interpret() calls unref() for this node:

                case Op_store_field:
                ..........
                        unref(*lhs);

but r_unref() frees it (with number of references == 2) instead of just
decrementing the reference counter.



diff --git a/node.c b/node.c
index c99af12a..bcb9a4a5 100644
--- a/node.c
+++ b/node.c
@@ -304,6 +304,7 @@ r_dupnode(NODE *n)
        assert(n->type == Node_val);

 #ifdef GAWKDEBUG
+       /* Do the same as in awk.h:dupnode().  */
        if ((n->flags & MALLOC) != 0) {
                n->valref++;
                return n;
@@ -496,20 +497,14 @@ void
 r_unref(NODE *tmp)
 {
 #ifdef GAWKDEBUG
-       if (tmp == NULL)
+       /* Do the same as in awk.h:unref().  */
+       assert(tmp == NULL || tmp->valref > 0);
+       if (tmp == NULL || --tmp->valref > 0)
                return;
-       if ((tmp->flags & MALLOC) != 0) {
-               if (tmp->valref > 1) {
-                       tmp->valref--;
-                       return;
-               }
-               if ((tmp->flags & STRCUR) != 0)
-                       efree(tmp->stptr);
-       }
-#else
+#endif
+
        if ((tmp->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
                efree(tmp->stptr);
-#endif

        mpfr_unset(tmp);

@@ -615,7 +610,7 @@ parse_escape(const char **string_ptr)
                start = *string_ptr;
                for (i = j = 0; j < 2; j++) {
                        /* do outside test to avoid multiple side effects */
-                       c = *(*string_ptr)++;
+                       c = (unsigned char) *(*string_ptr)++;
                        if (isxdigit(c)) {
                                i *= 16;
                                if (isdigit(c))
@@ -629,8 +624,8 @@ parse_escape(const char **string_ptr)
                                break;
                        }
                }
-               if (do_lint && j > 2)
-                       lintwarn(_("hex escape \\x%.*s of %d characters
probably not interpreted the way you expect"), j, start, j);
+               if (do_lint && j == 2 && isxdigit((unsigned
char)*(*string_ptr)))
+                       lintwarn(_("hex escape \\x%.*s of %d characters
probably not interpreted the way you expect"), 3, start, 3);
                return i;
        case '\\':
        case '"':
@@ -1018,7 +1013,7 @@ void init_btowc_cache()
 {
        int i;

-       for (i = 0; i < 255; i++) {
+       for (i = 0; i <= 255; i++) {
                btowc_cache[i] = btowc(i);
        }
 }


diff --git a/awk.h b/awk.h
index 70d132f4..e9225364 100644
--- a/awk.h
+++ b/awk.h
@@ -1265,8 +1265,11 @@ static inline void
 DEREF(NODE *r)
 {
        assert(r->valref > 0);
-       if (--r->valref == 0)
-               r_unref(r);
+#ifndef GAWKDEBUG
+       if (--r->valref > 0)
+               return;
+#endif
+       r_unref(r);
 }

 #define POP_NUMBER() force_number(POP_SCALAR())
@@ -1924,6 +1927,7 @@ force_string_fmt(NODE *s, const char *fmtstr, int
fmtidx)
 static inline void
 unref(NODE *r)
 {
+       assert(r == NULL || r->valref > 0);
        if (r != NULL && --r->valref <= 0)
                r_unref(r);
 }


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4) io.c - 1 issue:
- Windows: gawk_popen(): os_close_on_exec() already done for p[0]


diff --git a/io.c b/io.c
index f954d6e3..7c69fcf3 100644
--- a/io.c
+++ b/io.c
@@ -2681,8 +2681,8 @@ gawk_popen(const char *cmd, struct redirect *rp)
                close(p[0]);
                fatal(_("close of pipe failed: %s"), strerror(errno));
        }
-#endif
        os_close_on_exec(p[0], cmd, "pipe", "from");
+#endif
        if ((BINMODE & BINMODE_INPUT) != 0)
                os_setbinmode(p[0], O_BINARY);
        rp->iop = iop_alloc(p[0], cmd, 0);


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5) builtin.c - 1 issue
- !ENABLE_NLS: do_bindtextdomain(): strlen() called with NULL pointer.
"directory" may be NULL, "bindtextdomain(NULL)" returns NULL, so
"the_result" also may be NULL.


diff --git a/builtin.c b/builtin.c
index dc8d1a8e..7ef2acd8 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3997,6 +3997,9 @@ do_bindtextdomain(int nargs)
                DEREF(t2);
        }

+       if (the_result == NULL)
+               the_result = "";
+
        return make_string(the_result, strlen(the_result));
 }


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6) missing_d/fnmatch.c - 4 issues:
missing_d/fnmatch.c:401 "cold = c;"                 - "cold" should be
FOLD'ed.
missing_d/fnmatch.c:356 "if (c < 'a' || c >= 'z')"  - ignoring 'z'. Why?
missing_d/fnmatch.c:391 "#endif"                    - need to add "c =
*p++;" after "#endif"
missing_d/fnmatch.c:332 "goto matched"              - need to add "c =
*p++;" after "goto matched"

But, I think the missing_d/fnmatch.c code is too old and incomplete to fix
bugs in it.
So I rewrote most of this file to port gawk to Windows.


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7) extension/gawkfts.c - 2 issues:
- fts_open(): memory leak: fts_sort() allocates "sp->fts_array", but it's
not freed if next fts_alloc() fails.
- fts_build(): memory leak: need to free "head" list before returning NULL
on failure.


diff --git a/extension/gawkfts.c b/extension/gawkfts.c
index c7fbc17d..421318aa 100644
--- a/extension/gawkfts.c
+++ b/extension/gawkfts.c
@@ -228,7 +228,7 @@ fts_open(char * const *argv, int options,
         * so that everything about the "current" node is ignored.
         */
        if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
-               goto mem3;
+               goto mem4;
        sp->fts_cur->fts_link = root;
        sp->fts_cur->fts_info = FTS_INIT;

@@ -252,6 +252,8 @@ fts_open(char * const *argv, int options,

        return (sp);

+mem4:   if (sp->fts_array != NULL)
+               free(sp->fts_array);
 mem3:  fts_lfree(root);
        fts_free(parent);
 mem2:  free(sp->fts_path);
@@ -916,6 +918,7 @@ mem1:                               saved_errno = errno;
            (cur->fts_level == FTS_ROOTLEVEL ?
            FCHDIR(sp, sp->fts_rfd) :
            fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
+               fts_lfree(head);
                cur->fts_info = FTS_ERR;
                SET(FTS_STOP);
                return (NULL);


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8) extension/readdir.c - 1 issue:
- Windows: get_inode(): handle leak: opened handle do not closed.


diff --git a/extension/readdir.c b/extension/readdir.c
index cc1cd505..fb739754 100644
--- a/extension/readdir.c
+++ b/extension/readdir.c
@@ -144,6 +144,7 @@ get_inode(struct dirent *entry, const char *dirname)
 #ifdef __MINGW32__
        char fname[PATH_MAX];
        HANDLE fh;
+       BOOL ok;
        BY_HANDLE_FILE_INFORMATION info;

        sprintf(fname, "%s\\%s", dirname, entry->d_name);
@@ -151,7 +152,9 @@ get_inode(struct dirent *entry, const char *dirname)
                        FILE_FLAG_BACKUP_SEMANTICS, NULL);
        if (fh == INVALID_HANDLE_VALUE)
                return 0;
-       if (GetFileInformationByHandle(fh, &info)) {
+       ok = GetFileInformationByHandle(fh, &info);
+       CloseHandle(fh);
+       if (ok) {
                long long inode = info.nFileIndexHigh;

                inode <<= 32;


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9) extension/filefuncs.c - 4 issues:
- Windows: get_inode(): handle leak: opened handle do not closed,
- Windows: fill_stat_array(): wrong block size calculation,
- do_fts(): conflicting flags: FTS_SKIP == FTS_NOCHDIR:
According to the documentation (
http://man7.org/linux/man-pages/man3/fts.3.html),
setting the FTS_NOCHDIR flag should only disable the optimization of
changing the current directory when scanning directories.
However, now setting this flag in the gawk script also includes setting
FTS_SKIP, which prevents recursive directory traversal.


diff --git a/extension/filefuncs.c b/extension/filefuncs.c
index 86e60321..83141685 100644
--- a/extension/filefuncs.c
+++ b/extension/filefuncs.c
@@ -126,13 +126,16 @@ static long long
 get_inode(const char *fname)
 {
        HANDLE fh;
+       BOOL ok;
        BY_HANDLE_FILE_INFORMATION info;

        fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING,
                        FILE_FLAG_BACKUP_SEMANTICS, NULL);
        if (fh == INVALID_HANDLE_VALUE)
                return 0;
-       if (GetFileInformationByHandle(fh, &info)) {
+       ok = GetFileInformationByHandle(fh, &info);
+       CloseHandle(fh);
+       if (ok) {
                long long inode = info.nFileIndexHigh;

                inode <<= 32;
@@ -403,7 +406,8 @@ fill_stat_array(const char *name, awk_array_t array,
struct stat *sbuf)
        array_set_numeric(array, "gid", sbuf->st_gid);
        array_set_numeric(array, "size", sbuf->st_size);
 #ifdef __MINGW32__
-       array_set_numeric(array, "blocks", (sbuf->st_size + 4095) / 4096);
+       array_set_numeric(array, "blocks", (double)((sbuf->st_size +
+               device_blocksize() - 1) / device_blocksize()));
 #else
        array_set_numeric(array, "blocks", sbuf->st_blocks);
 #endif
@@ -568,6 +572,7 @@ init_filefuncs(void)

 #ifndef __MINGW32__
        /* at least right now, only FTS needs initializing */
+#define FTS_NON_RECURSIVE      FTS_STOP        /* Don't step into
directories.  */
        static struct flagtab {
                const char *name;
                int value;
@@ -579,7 +584,7 @@ init_filefuncs(void)
                ENTRY(FTS_PHYSICAL),
                ENTRY(FTS_SEEDOT),
                ENTRY(FTS_XDEV),
-               ENTRY(FTS_SKIP),
+               {"FTS_SKIP", FTS_NON_RECURSIVE},
                { NULL, 0 }
        };

@@ -836,7 +841,7 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
        int ret = -1;
        static const int mask = (
                  FTS_COMFOLLOW | FTS_LOGICAL | FTS_NOCHDIR | FTS_PHYSICAL
-               | FTS_SEEDOT | FTS_XDEV | FTS_SKIP);
+               | FTS_SEEDOT | FTS_XDEV | FTS_NON_RECURSIVE);

        assert(result != NULL);
        fts_errors = 0;         /* ensure a fresh start */
@@ -886,6 +891,9 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
        }
        flags &= mask;  /* turn off anything else */

+       if (flags & FTS_NON_RECURSIVE)
+               flags |= FTS_NOCHDIR;
+
        /* make pathvector */
        count = path_array->count + 1;
        ezalloc(pathvector, char **, count * sizeof(char *), "do_fts");
@@ -900,8 +908,11 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
        assert(clear_array(dest.array_cookie));

        /* let's do it! */
-       if ((hierarchy = fts_open(pathvector, flags, NULL)) != NULL) {
-               process(hierarchy, dest.array_cookie, (flags & FTS_SEEDOT)
!= 0, (flags & FTS_SKIP) != 0);
+       hierarchy = fts_open(pathvector, flags & ~FTS_NON_RECURSIVE, NULL);
+       if (hierarchy != NULL) {
+               process(hierarchy, dest.array_cookie,
+                       (flags & FTS_SEEDOT) != 0,
+                       (flags & FTS_NON_RECURSIVE) != 0);
                fts_close(hierarchy);

                if (fts_errors == 0)


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10) extension/rwarray0.c - 1 issue:
- result of read_array() is awk_bool_t, not error code.


diff --git a/extension/rwarray0.c b/extension/rwarray0.c
index 5298bea3..72a376c1 100644
--- a/extension/rwarray0.c
+++ b/extension/rwarray0.c
@@ -425,7 +425,7 @@ read_value(int fd, awk_value_t *value)
        if (code == 2) {
                awk_array_t array = create_array();

-               if (read_array(fd, array) != 0)
+               if (! read_array(fd, array))
                        return awk_false;

                /* hook into value */


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11) test/Gentests - 1 issue:
- incorrect locale key name for ja_JP.UTF-8, should be "JP", not "JA".
Because of this bug, mbprintf2 test is run without "GAWKLOCALE=ja_JP.UTF-8".
See:

Makefile.in:1770
NEED_LOCALE_JP = mbprintf2

Makefile.in:4990
mbprintf2:
        @echo $@ $(ZOS_FAIL)
        @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=; export GAWKLOCALE; \
        AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE:
$$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@


diff --git a/test/Gentests b/test/Gentests
index 78833f0f..392cbf2a 100755
--- a/test/Gentests
+++ b/test/Gentests
@@ -16,7 +16,7 @@ BEGIN {
        Locale["EN"] = "en_US.UTF-8"
        Locale["FR"] = "fr_FR.UTF-8"
        Locale["GR"] = "el_GR.iso88597"
-       Locale["JA"] = "ja_JP.UTF-8"
+       Locale["JP"] = "ja_JP.UTF-8"
        Locale["RU"] = "ru_RU.UTF-8"
 }



Best regards,
Michael Builov.


reply via email to

[Prev in Thread] Current Thread [Next in Thread]