>From e7e61aa05a66340f6c6e50efe03f1749b9944357 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 01:42:46 -0600 Subject: [PATCH 1/6] sed: do not close stderr on exit Not needed, and prevents address-sanitizing from working. * sed/utils.c (ck_fclose): Do not close stderr. --- sed/utils.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sed/utils.c b/sed/utils.c index a662b30..ccf1c1e 100644 --- a/sed/utils.c +++ b/sed/utils.c @@ -267,10 +267,7 @@ ck_fclose(FILE *stream) last output operations might fail and it is important to signal this as an error (perhaps to make). */ if (!stream) - { - do_ck_fclose (stdout); - do_ck_fclose (stderr); - } + do_ck_fclose (stdout); } /* Close a single file. */ -- 2.11.0 >From ffcaaeee90c54b1c56a72021058a21fe2a0125b8 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 02:13:12 -0600 Subject: [PATCH 2/6] sed: free allocated memory under DEBUG_LEAKS Under normal operation there is no need for explicit freeing, as all memory will be released with sed terminates. During development (and specifically, valgrind and address-sanitizing) enabling DEBUG_LEAKS prevents false-positive warnings about memory leaks. * sed/sed.h (struct subst): Add member variable to keep the address of allocated bufferin compile.c:setup_replacement(). (release_regex): Add declaration. (finish_program): Function now takes an argument: the sed program vector. * sed/sed.c (main): Adjust call to finish_program. * sed/compile.c (finish_program): Release program allocations. (setup_replacement): Remember the allocated buffer address. (compile_program): Free temporary array in 'y' command. * sed/execute.c (execute_program): Free allocated but unused buffer in 'R' command when reaching EOF. * sed/regexp.c (release_regex): Remove 'static', free the allocated dfa struct. * sed/utils.c (panic): Free open files linked-list elements. --- sed/compile.c | 32 +++++++++++++++++++++++++++++++- sed/execute.c | 9 +++++++++ sed/regexp.c | 6 ++++++ sed/sed.c | 2 +- sed/sed.h | 5 ++++- sed/utils.c | 7 +++++++ 6 files changed, 58 insertions(+), 3 deletions(-) diff --git a/sed/compile.c b/sed/compile.c index 06a750d..a7e7fe2 100644 --- a/sed/compile.c +++ b/sed/compile.c @@ -763,6 +763,10 @@ setup_replacement (struct subst *sub, const char *text, size_t length) base = MEMDUP(text, length, char); length = normalize_text(base, length, TEXT_REPLACEMENT); + #ifdef DEBUG_LEAKS + sub->replacement_buffer = base; + #endif + text_end = base + length; tail = &root; @@ -1337,6 +1341,10 @@ compile_program(struct vector *vector) trans_pairs[2 * i] = NULL; if (idx != dest_len) bad_prog(_(Y_CMD_LEN)); + + #ifdef DEBUG_LEAKS + free (src_lens); + #endif } else { @@ -1686,7 +1694,7 @@ rewind_read_files(void) /* Release all resources which were allocated in this module. */ void -finish_program(void) +finish_program(struct vector *program) { /* close all files... */ { @@ -1717,6 +1725,28 @@ finish_program(void) } #ifdef DEBUG_LEAKS + for (int i = 0; i < program->v_length; ++i) + { + const struct sed_cmd *sc = &program->v[i]; + + if (sc->a1 && sc->a1->addr_regex) + release_regex (sc->a1->addr_regex); + if (sc->a2 && sc->a2->addr_regex) + release_regex (sc->a2->addr_regex); + + switch (sc->cmd) + { + case 's': + free (sc->x.cmd_subst->replacement_buffer); + if (sc->x.cmd_subst->regx) + release_regex (sc->x.cmd_subst->regx); + break; + } + } + obstack_free (&obs, NULL); +#else + (void)program; #endif /*DEBUG_LEAKS*/ + } diff --git a/sed/execute.c b/sed/execute.c index 35c2eae..5587bc0 100644 --- a/sed/execute.c +++ b/sed/execute.c @@ -1503,6 +1503,15 @@ execute_program(struct vector *vec, struct input *input) aq->text = text; aq->textlen = result; } + #ifdef DEBUG_LEAKS + else + { + /* The external input file (for R command) reached EOF, + the 'text' buffer will not be added to the append queue + so release it */ + free (text); + } + #endif } break; diff --git a/sed/regexp.c b/sed/regexp.c index 3b060ec..fbe53cf 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -430,6 +430,12 @@ match_regex(struct regex *regex, char *buf, size_t buflen, void release_regex(struct regex *regex) { + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } regfree(®ex->pattern); free(regex); } diff --git a/sed/sed.c b/sed/sed.c index 23b2153..0250406 100644 --- a/sed/sed.c +++ b/sed/sed.c @@ -375,7 +375,7 @@ main (int argc, char **argv) return_code = process_files(the_program, argv+optind); - finish_program(); + finish_program(the_program); ck_fclose(NULL); return return_code; diff --git a/sed/sed.h b/sed/sed.h index 290f0ce..730b02d 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -126,6 +126,9 @@ struct subst { unsigned print : 2; /* 'p' option given (before/after eval) */ unsigned eval : 1; /* 'e' option given */ unsigned max_id : 4; /* maximum backreference on the RHS */ + #ifdef DEBUG_LEAKS + char* replacement_buffer; + #endif }; #ifdef REG_PERL @@ -191,7 +194,7 @@ struct vector *compile_string (struct vector *, char *str, size_t len); struct vector *compile_file (struct vector *, const char *cmdfile); void check_final_program (struct vector *); void rewind_read_files (void); -void finish_program (void); +void finish_program (struct vector *); struct regex *compile_regex (struct buffer *b, int flags, int needed_sub); int match_regex (struct regex *regex, diff --git a/sed/utils.c b/sed/utils.c index ccf1c1e..5a2fb70 100644 --- a/sed/utils.c +++ b/sed/utils.c @@ -74,7 +74,14 @@ panic(const char *str, ...) strerror (errno)); } + #ifdef DEBUG_LEAKS + struct open_file *next = open_files->link; + free (open_files->name); + free (open_files); + open_files = next; + #else open_files = open_files->link; + #endif } exit(EXIT_PANIC); -- 2.11.0 >From 00d0376408b0d8ed19d7c3bad9e568dc42e5b558 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 11:25:35 -0600 Subject: [PATCH 3/6] sed: fix memory leak (TODO: REVIEW) TODO: should this be enabled regardless of DEBUG_LEAK ? * sed/regexp.c (): Free the previously allocated regex struct before re-building the regex during program execution. --- sed/regexp.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/sed/regexp.c b/sed/regexp.c index fbe53cf..ab5ed46 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -271,7 +271,25 @@ match_regex(struct regex *regex, char *buf, size_t buflen, return (ret == 0); #else if (regex->pattern.no_sub && regsize) - compile_regex_1 (regex, regsize); + { + #ifdef DEBUG_LEAKS + /* Re-compiling an existing regex, free the previously allocated + structures. + + TODO: should these be freed regardless of DEBUG_LEAKS? + otherwise sed leaks memory proportionally to number + of executed regexes (O(n) of input size?). */ + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } + regfree(®ex->pattern); + #endif + + compile_regex_1 (regex, regsize); + } regex->pattern.regs_allocated = REGS_REALLOCATE; -- 2.11.0 >From c2e196134f2ee7e12c81667023318818fca34c2b Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 20:15:08 -0600 Subject: [PATCH 4/6] gnulib: update to latest (for regex memory leaks) Specifically for gnulib commits 66b99e5259,c5e76a9560. see https://lists.gnu.org/r/bug-gnulib/2018-07/msg00127.html --- gnulib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnulib b/gnulib index ad52c65..c5e76a9 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit ad52c652a64ebe3523437fb9bb714f99c1ffa9e6 +Subproject commit c5e76a95605b1ef70559f90ebb656e0a4efb8fd9 -- 2.11.0 >From d83f198b99780a89688c3134b6adc7f1385088c9 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 22:23:12 -0600 Subject: [PATCH 5/6] maint: add address-sanitizer build target use 'make build-asan' to rebuild sed with gcc's address sanitizer. * cfg.mk (build-asan): New target. --- cfg.mk | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cfg.mk b/cfg.mk index 7d78796..011339d 100644 --- a/cfg.mk +++ b/cfg.mk @@ -202,3 +202,15 @@ static-analysis-make: .PHONY: static-analysis static-analysis: static-analysis-init static-analysis-config \ static-analysis-make + +ASAN_FLAGS=-fsanitize=address -fno-omit-frame-pointer +ASAN_CFLAGS=-O0 -g -DDEBUG_LEAKS $(ASAN_FLAGS) +ASAN_LDFLAGS=$(ASAN_FLAGS) + +.PHONY: build-asan +build-asan: + test -x ./configure || \ + { echo "./configure script not found" >&2; exit 1; } + ./configure CFLAGS="$(ASAN_CFLAGS)" LDFLAGS="$(ASAN_LDFLAGS)" + make + -- 2.11.0 >From c380a31e83827469b3b8ccbebcf50cd155512dd8 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 23:40:48 -0600 Subject: [PATCH 6/6] maint: add undefined-behavior build target Using gcc-specific options, a recent gcc is required. build with: make build-ubsan CC=gcc-8.2 * cfg.mk (build-ubsan): New target. --- cfg.mk | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cfg.mk b/cfg.mk index 011339d..35436fc 100644 --- a/cfg.mk +++ b/cfg.mk @@ -214,3 +214,31 @@ build-asan: ./configure CFLAGS="$(ASAN_CFLAGS)" LDFLAGS="$(ASAN_LDFLAGS)" make +# NOTE: These options require a recent gcc (tested with gcc 8.2) +UBSAN_FLAGS=-fsanitize=undefined \ + -fsanitize=signed-integer-overflow \ + -fsanitize=bounds-strict \ + -fsanitize=shift \ + -fsanitize=shift-exponent \ + -fsanitize=shift-base \ + -fsanitize=integer-divide-by-zero \ + -fsanitize=null \ + -fsanitize=return \ + -fsanitize=alignment \ + -fsanitize=object-size \ + -fsanitize=nonnull-attribute \ + -fsanitize=returns-nonnull-attribute \ + -fsanitize=bool \ + -fsanitize=enum \ + -fsanitize=pointer-overflow \ + -fsanitize=builtin +UBSAN_CFLAGS=-O0 -g -DDEUG_LEAKS $(UBSAN_FLAGS) +UBSAN_LDFLAGS=$(UBSAN_FLAGS) + +.PHONY: build-ubsan +build-ubsan: + test -x ./configure || \ + { echo "./configure script not found" >&2; exit 1; } + ./configure CFLAGS="$(UBSAN_CFLAGS)" LDFLAGS="$(UBSAN_LDFLAGS)" + make + -- 2.11.0