From 5d63e17e7ff25badfaad23ce01737b9b4dde2ab2 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Dec 2021 15:38:02 -0800 Subject: [PATCH] gzip: gzip -l now outputs accurate size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gzip -l now decompresses to see how long the uncompressed file was. This fixes what is by far the most common bug report for gzip. It has a significant performance cost, but it’s worth it nowadays. * gzip.c (main): -l now sets 'test' too. All uses of 'test' changed. (treat_stdin, treat_file): Call do_list after decompressing, so that the length is known. (do_list): Omit arg IFD, since it is no longer needed. All callers changed. Get the CRC and uncompressed size from input_crc and bytes_out instead of using lseek. * tests/list-big: New test. * unzip.c (unzip): Set unzip_crc before returning. * util.c (write_buf): If 'test', output nothing. Update bytes_out with output byte count, regardless of 'test'. All callers changed. static, or remove it if it is not used anywhere. --- gzip.c | 66 ++++++++++++++++++----------------------------- gzip.h | 1 + tests/Makefile.am | 1 + tests/list-big | 31 ++++++++++++++++++++++ unlzh.c | 5 ++-- unlzw.c | 17 ++++-------- unzip.c | 3 +++ util.c | 18 ++++++------- 8 files changed, 76 insertions(+), 66 deletions(-) create mode 100755 tests/list-big diff --git a/gzip.c b/gzip.c index 735ee0a..ecb19da 100644 --- a/gzip.c +++ b/gzip.c @@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags); local int make_ofname (void); local void shorten_name (char *name); local int get_method (int in); -local void do_list (int ifd, int method); +local void do_list (int method); local int check_ofname (void); local void copy_stat (struct stat *ifstat); local void install_signal_handlers (void); @@ -535,7 +535,7 @@ int main (int argc, char **argv) case 'k': keep = 1; break; case 'l': - list = decompress = to_stdout = 1; break; + list = decompress = test = to_stdout = 1; break; case 'L': license (); finish_out (); break; case 'm': /* undocumented, may change later */ @@ -655,7 +655,7 @@ int main (int argc, char **argv) /* And get to work */ if (file_count != 0) { - if (to_stdout && !test && !list && (!decompress || !ascii)) { + if (to_stdout && !test && (!decompress || !ascii)) { SET_BINARY_MODE (STDOUT_FILENO); } while (optind < argc) { @@ -673,7 +673,7 @@ int main (int argc, char **argv) { /* Output any totals, and check for output errors. */ if (!quiet && 1 < file_count) - do_list (-1, -1); + do_list (-1); if (fflush (stdout) != 0) write_error (); } @@ -759,7 +759,7 @@ local void treat_stdin() if (decompress || !ascii) { SET_BINARY_MODE (STDIN_FILENO); } - if (!test && !list && (!decompress || !ascii)) { + if (!test && (!decompress || !ascii)) { SET_BINARY_MODE (STDOUT_FILENO); } strcpy(ifname, "stdin"); @@ -786,10 +786,6 @@ local void treat_stdin() do_exit(exit_code); /* error message already emitted */ } } - if (list) { - do_list(ifd, method); - return; - } /* Actually do the compression/decompression. Loop over zipped members. */ @@ -805,6 +801,12 @@ local void treat_stdin() bytes_out = 0; /* required for length check */ } + if (list) + { + do_list (method); + return; + } + if (verbose) { if (test) { fprintf(stderr, " OK\n"); @@ -949,7 +951,7 @@ local void treat_file(iname) /* Generate output file name. For -r and (-t or -l), skip files * without a valid gzip suffix (check done in make_ofname). */ - if (to_stdout && !list && !test) { + if (to_stdout && !test) { strcpy(ofname, "stdout"); } else if (make_ofname() != OK) { @@ -967,12 +969,6 @@ local void treat_file(iname) return; /* error message already emitted */ } } - if (list) { - do_list(ifd, method); - if (close (ifd) != 0) - read_error (); - return; - } /* If compressing to a file, check if ofname is not ambiguous * because the operating system truncates names. Otherwise, generate @@ -992,7 +988,7 @@ local void treat_file(iname) /* Keep the name even if not truncated except with --no-name: */ if (!save_orig_name) save_orig_name = !no_name; - if (verbose) { + if (verbose && !list) { fprintf(stderr, "%s:\t", ifname); } @@ -1015,6 +1011,12 @@ local void treat_file(iname) if (close (ifd) != 0) read_error (); + if (list) + { + do_list (method); + return; + } + if (!to_stdout) { copy_stat (&istat); @@ -1066,7 +1068,7 @@ local void treat_file(iname) } else { display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr); } - if (!test && !to_stdout) + if (!test) fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with", ofname); fprintf(stderr, "\n"); @@ -1395,7 +1397,8 @@ local int make_ofname() /* With -t or -l, try all files (even without .gz suffix) * except with -r (behave as with just -dr). */ - if (!recursive && (list || test)) return OK; + if (!recursive && test) + return OK; /* Avoid annoying messages with -r */ if (verbose || (!recursive && !quiet)) { @@ -1688,7 +1691,6 @@ local int get_method(in) last_member = 1; if (imagic0 != EOF) { write_buf (STDOUT_FILENO, magic, 1); - bytes_out++; } } if (method >= 0) return method; @@ -1724,9 +1726,8 @@ local int get_method(in) * If the given method is < 0, display the accumulated totals. * IN assertions: time_stamp, header_bytes and ifile_size are initialized. */ -local void do_list(ifd, method) - int ifd; /* input file descriptor */ - int method; /* compression method */ +static void +do_list (int method) { ulg crc; /* original crc */ static int first_time = 1; @@ -1768,26 +1769,9 @@ local void do_list(ifd, method) return; } crc = (ulg)~0; /* unknown */ - bytes_out = -1L; - bytes_in = ifile_size; if (method == DEFLATED && !last_member) { - /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files. - * If the lseek fails, we could use read() to get to the end, but - * --list is used to get quick results. - * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if - * you are not concerned about speed. - */ - bytes_in = lseek(ifd, (off_t)(-8), SEEK_END); - if (bytes_in != -1L) { - uch buf[8]; - bytes_in += 8L; - if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) { - read_error(); - } - crc = LG(buf); - bytes_out = LG(buf+4); - } + crc = unzip_crc; } if (verbose) diff --git a/gzip.h b/gzip.h index db0305f..ebe3213 100644 --- a/gzip.h +++ b/gzip.h @@ -262,6 +262,7 @@ extern int zip (int in, int out); extern int file_read (char *buf, unsigned size); /* in unzip.c */ +extern ulg unzip_crc; extern int unzip (int in, int out); extern int check_zipfile (int in); diff --git a/tests/Makefile.am b/tests/Makefile.am index 256bbf7..18e7c8a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -21,6 +21,7 @@ TESTS = \ hufts \ keep \ list \ + list-big \ memcpy-abuse \ mixed \ null-suffix-clobber \ diff --git a/tests/list-big b/tests/list-big new file mode 100755 index 0000000..afa3310 --- /dev/null +++ b/tests/list-big @@ -0,0 +1,31 @@ +#!/bin/sh +# Exercise the --list option with a big file. + +# Copyright 2021 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ .. + +truncate --size 4G big || framework_failure_ + +gzip -1 big || fail=1 +gzip -l big.gz >out || fail=1 +case $(cat out) in + *' 4294967296 '*' big') ;; + *) cat out; fail=1;; +esac + +Exit $fail diff --git a/unlzh.c b/unlzh.c index 37084fe..f018922 100644 --- a/unlzh.c +++ b/unlzh.c @@ -390,9 +390,8 @@ int unlzh(in, out) decode_start(); while (!done) { n = decode((unsigned) DICSIZ, window); - if (!test && n > 0) { - write_buf(out, (char*)window, n); - } + if (n > 0) + write_buf (out, window, n); } return OK; } diff --git a/unlzw.c b/unlzw.c index d7714b5..ba824e4 100644 --- a/unlzw.c +++ b/unlzw.c @@ -225,10 +225,8 @@ int unlzw(in, out) "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n", posbits, p[-1],p[0],p[1],p[2],p[3]); #endif - if (!test && outpos > 0) { - write_buf(out, (char*)outbuf, outpos); - bytes_out += (off_t)outpos; - } + if (outpos > 0) + write_buf (out, outbuf, outpos); gzip_error (to_stdout ? "corrupt input." : "corrupt input. Use zcat to recover some data."); @@ -257,10 +255,7 @@ int unlzw(in, out) outpos += i; } if (outpos >= OUTBUFSIZ) { - if (!test) { - write_buf(out, (char*)outbuf, outpos); - bytes_out += (off_t)outpos; - } + write_buf (out, outbuf, outpos); outpos = 0; } stackp+= i; @@ -281,9 +276,7 @@ int unlzw(in, out) } } while (rsize != 0); - if (!test && outpos > 0) { - write_buf(out, (char*)outbuf, outpos); - bytes_out += (off_t)outpos; - } + if (outpos > 0) + write_buf (out, outbuf, outpos); return OK; } diff --git a/unzip.c b/unzip.c index dacfbaf..b52811e 100644 --- a/unzip.c +++ b/unzip.c @@ -51,6 +51,8 @@ /* Globals */ +ulg unzip_crc; /* CRC found by 'unzip'. */ + static int decrypt; /* flag to turn on decryption */ static int pkzip = 0; /* set for a pkzip file */ static int ext_header = 0; /* set if extended local header */ @@ -210,6 +212,7 @@ int unzip(in, out) } } ext_header = pkzip = 0; /* for next file */ + unzip_crc = orig_crc; if (err == OK) return OK; exit_code = ERROR; if (!test) abort_gzip(); diff --git a/util.c b/util.c index 4e73036..cd43886 100644 --- a/util.c +++ b/util.c @@ -112,7 +112,6 @@ int copy(in, out) errno = 0; while (insize > inptr) { write_buf(out, (char*)inbuf + inptr, insize - inptr); - bytes_out += insize - inptr; got = read_buffer (in, (char *) inbuf, INBUFSIZ); if (got == -1) read_error(); @@ -255,9 +254,7 @@ void flush_outbuf() { if (outcnt == 0) return; - if (!test) - write_buf (ofd, outbuf, outcnt); - bytes_out += (off_t)outcnt; + write_buf (ofd, outbuf, outcnt); outcnt = 0; } @@ -270,16 +267,13 @@ void flush_window() if (outcnt == 0) return; updcrc(window, outcnt); - if (!test) { - write_buf(ofd, (char *)window, outcnt); - } - bytes_out += (off_t)outcnt; + write_buf (ofd, window, outcnt); outcnt = 0; } /* =========================================================================== - * Does the same as write(), but also handles partial pipe writes and checks - * for error return. + * Update the count of output bytes. If testing, do not do any + * output. Otherwise, write the buffer, checking for errors. */ void write_buf(fd, buf, cnt) int fd; @@ -288,6 +282,10 @@ void write_buf(fd, buf, cnt) { unsigned n; + bytes_out += cnt; + if (test) + return; + while ((n = write_buffer (fd, buf, cnt)) != cnt) { if (n == (unsigned)(-1)) { write_error(); -- 2.33.1