[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
decompress.c now also compresses
From: |
Juan José García-Ripoll |
Subject: |
decompress.c now also compresses |
Date: |
Sun, 29 Mar 2020 17:52:03 +0200 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/26.3 (windows-nt) |
Hi,
I attach a patch that adds support for compressing buffers using
zlib. It is a minor extension to the file src/decompress.c but it may be
useful because of two reasons (i) in Windows, Emasc is shipped without
g[un]zip.exe, (ii) the whole process of compression takes about 20 times
less time than calling gzip.
(benchmark 1
'(mapc 'simple-zlib-compress
(directory-files "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 2.602588s (0.014894s in 1 GCs)
(benchmark 1
'(mapc 'simple-gzip-compress
(directory-files "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 61.986128s (0.039815s in 3 GCs)
I attach a patch that was produced against emacs-27 but also seems to
work against emacs-28 (at least the decompress.c part, I am unsure about
how NEWS should be edited).
I also attach a lisp file that test and benchmark the
compression/decompression using Emacs' lisp sources.
Best
Juanjo
--
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es
diff --git a/etc/NEWS b/etc/NEWS
index d3f27e3..96ddf5c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -3656,6 +3656,10 @@ easier to undo immediately afterwards.
** When called interactively, 'next-buffer' and 'previous-buffer' now
signal 'user-error' if there is no buffer to switch to.
+---
+** New function 'zlib-compress-region' compresses a unibyte buffer region using
+gzip's format, via the zlib library.
+
* Changes in Emacs 27.1 on Non-Free Operating Systems
diff --git a/src/decompress.c b/src/decompress.c
index 5d24638..68178db 100644
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -1,247 +1,394 @@
-/* Interface to zlib.
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GNU Emacs.
-
-GNU Emacs is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or (at
-your option) any later version.
-
-GNU Emacs is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#ifdef HAVE_ZLIB
-
-#include <zlib.h>
-
-#include "lisp.h"
-#include "buffer.h"
-#include "composite.h"
-
-#include <verify.h>
-
-#ifdef WINDOWSNT
-# include <windows.h>
-# include "w32common.h"
-# include "w32.h"
-
-DEF_DLL_FN (int, inflateInit2_,
- (z_streamp strm, int windowBits, const char *version,
- int stream_size));
-DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
-DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
-
-static bool zlib_initialized;
-
-static bool
-init_zlib_functions (void)
-{
- HMODULE library = w32_delayed_load (Qzlib);
-
- if (!library)
- return false;
-
- LOAD_DLL_FN (library, inflateInit2_);
- LOAD_DLL_FN (library, inflate);
- LOAD_DLL_FN (library, inflateEnd);
- return true;
-}
-
-# undef inflate
-# undef inflateEnd
-# undef inflateInit2_
-
-# define inflate fn_inflate
-# define inflateEnd fn_inflateEnd
-# define inflateInit2_ fn_inflateInit2_
-
-#endif /* WINDOWSNT */
-
-
-struct decompress_unwind_data
-{
- ptrdiff_t old_point, orig, start, nbytes;
- z_stream *stream;
-};
-
-static void
-unwind_decompress (void *ddata)
-{
- struct decompress_unwind_data *data = ddata;
- inflateEnd (data->stream);
-
- /* Delete any uncompressed data already inserted on error, but
- without calling the change hooks. */
- if (data->start)
- {
- del_range_2 (data->start, data->start, /* byte, char offsets the same */
- data->start + data->nbytes, data->start + data->nbytes,
- 0);
- update_compositions (data->start, data->start, CHECK_HEAD);
- /* "Balance" the before-change-functions call, which would
- otherwise be left "hanging". */
- signal_after_change (data->orig, data->start - data->orig,
- data->start - data->orig);
- }
- /* Put point where it was, or if the buffer has shrunk because the
- compressed data is bigger than the uncompressed, at
- point-max. */
- SET_PT (min (data->old_point, ZV));
-}
-
-DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
- doc: /* Return t if zlib decompression is available in this instance of
Emacs. */)
- (void)
-{
-#ifdef WINDOWSNT
- Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
- if (CONSP (found))
- return XCDR (found);
- else
- {
- Lisp_Object status;
- zlib_initialized = init_zlib_functions ();
- status = zlib_initialized ? Qt : Qnil;
- Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
- return status;
- }
-#else
- return Qt;
-#endif
-}
-
-DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
- Szlib_decompress_region,
- 2, 3, 0,
- doc: /* Decompress a gzip- or zlib-compressed region.
-Replace the text in the region by the decompressed data.
-
-If optional parameter ALLOW-PARTIAL is nil or omitted, then on
-failure, return nil and leave the data in place. Otherwise, return
-the number of bytes that were not decompressed and replace the region
-text by whatever data was successfully decompressed (similar to gzip).
-If decompression is completely successful return t.
-
-This function can be called only in unibyte buffers. */)
- (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
-{
- ptrdiff_t istart, iend, pos_byte;
- z_stream stream;
- int inflate_status;
- struct decompress_unwind_data unwind_data;
- ptrdiff_t count = SPECPDL_INDEX ();
-
- validate_region (&start, &end);
-
- if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
- error ("This function can be called only in unibyte buffers");
-
-#ifdef WINDOWSNT
- if (!zlib_initialized)
- zlib_initialized = init_zlib_functions ();
- if (!zlib_initialized)
- {
- message1 ("zlib library not found");
- return Qnil;
- }
-#endif
-
- /* This is a unibyte buffer, so character positions and bytes are
- the same. */
- istart = XFIXNUM (start);
- iend = XFIXNUM (end);
-
- /* Do the following before manipulating the gap. */
- modify_text (istart, iend);
-
- move_gap_both (iend, iend);
-
- stream.zalloc = Z_NULL;
- stream.zfree = Z_NULL;
- stream.opaque = Z_NULL;
- stream.avail_in = 0;
- stream.next_in = Z_NULL;
-
- /* The magic number 32 apparently means "autodetect both the gzip and
- zlib formats" according to zlib.h. */
- if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
- return Qnil;
-
- unwind_data.orig = istart;
- unwind_data.start = iend;
- unwind_data.stream = &stream;
- unwind_data.old_point = PT;
- unwind_data.nbytes = 0;
- record_unwind_protect_ptr (unwind_decompress, &unwind_data);
-
- /* Insert the decompressed data at the end of the compressed data. */
- SET_PT (iend);
-
- pos_byte = istart;
-
- /* Keep calling 'inflate' until it reports an error or end-of-input. */
- do
- {
- /* Maximum number of bytes that one 'inflate' call should read and write.
- Do not make avail_out too large, as that might unduly delay C-g.
- zlib requires that avail_in and avail_out not exceed UINT_MAX. */
- ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
- int avail_out = 16 * 1024;
- int decompressed;
-
- if (GAP_SIZE < avail_out)
- make_gap (avail_out - GAP_SIZE);
- stream.next_in = BYTE_POS_ADDR (pos_byte);
- stream.avail_in = avail_in;
- stream.next_out = GPT_ADDR;
- stream.avail_out = avail_out;
- inflate_status = inflate (&stream, Z_NO_FLUSH);
- pos_byte += avail_in - stream.avail_in;
- decompressed = avail_out - stream.avail_out;
- insert_from_gap (decompressed, decompressed, 0);
- unwind_data.nbytes += decompressed;
- maybe_quit ();
- }
- while (inflate_status == Z_OK);
-
- Lisp_Object ret = Qt;
- if (inflate_status != Z_STREAM_END)
- {
- if (!NILP (allow_partial))
- ret = make_int (iend - pos_byte);
- else
- return unbind_to (count, Qnil);
- }
-
- unwind_data.start = 0;
-
- /* Delete the compressed data. */
- del_range_2 (istart, istart, /* byte and char offsets are the same. */
- iend, iend, 0);
-
- signal_after_change (istart, iend - istart, unwind_data.nbytes);
- update_compositions (istart, istart, CHECK_HEAD);
-
- return unbind_to (count, ret);
-}
-
-
-/***********************************************************************
- Initialization
- ***********************************************************************/
-void
-syms_of_decompress (void)
-{
- defsubr (&Szlib_decompress_region);
- defsubr (&Szlib_available_p);
-}
-
-#endif /* HAVE_ZLIB */
+/* Interface to zlib.
+ Copyright (C) 2013-2020 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#ifdef HAVE_ZLIB
+
+#include <zlib.h>
+
+#include "lisp.h"
+#include "buffer.h"
+#include "composite.h"
+
+#include <verify.h>
+
+#ifdef WINDOWSNT
+# include <windows.h>
+# include "w32common.h"
+# include "w32.h"
+
+/* We import inflateInit2_ and deflateInit2_ because inflateInit and
+ deflateInit are macros defined on top of these symbols by zlib.h */
+DEF_DLL_FN (int, inflateInit2_,
+ (z_streamp strm, int windowBits, const char *version,
+ int stream_size));
+DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
+DEF_DLL_FN (int, deflateInit2_,
+ (z_streamp strm, int level, int method, int windowBits,
+ int memLevel, int strategy, const char *version,
+ int stream_size));
+DEF_DLL_FN (int, deflateInit2_,
+ (z_streamp strm, int level, int method, int windowBits,
+ int memLevel, int strategy, const char *version,
+ int stream_size));
+DEF_DLL_FN (int, deflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, deflateEnd, (z_streamp strm));
+
+static bool zlib_initialized;
+
+static bool
+init_zlib_functions (void)
+{
+ HMODULE library = w32_delayed_load (Qzlib);
+
+ if (!library)
+ return false;
+
+ LOAD_DLL_FN (library, inflateInit2_);
+ LOAD_DLL_FN (library, inflate);
+ LOAD_DLL_FN (library, inflateEnd);
+ LOAD_DLL_FN (library, deflateInit2_);
+ LOAD_DLL_FN (library, deflate);
+ LOAD_DLL_FN (library, deflateEnd);
+ return true;
+}
+
+# undef inflate
+# undef inflateEnd
+# undef inflateInit2_
+# undef deflate
+# undef deflateEnd
+# undef deflateInit2_
+
+# define inflate fn_inflate
+# define inflateEnd fn_inflateEnd
+# define inflateInit2_ fn_inflateInit2_
+# define deflate fn_deflate
+# define deflateEnd fn_deflateEnd
+# define deflateInit2_ fn_deflateInit2_
+
+#endif /* WINDOWSNT */
+
+
+struct decompress_unwind_data
+{
+ ptrdiff_t old_point, orig, start, nbytes;
+ z_stream *stream;
+ int deflating;
+};
+
+static void
+unwind_zlib (void *ddata)
+{
+ struct decompress_unwind_data *data = ddata;
+ (data->deflating? deflateEnd : inflateEnd) (data->stream);
+
+ /* Delete any uncompressed data already inserted on error, but
+ without calling the change hooks. */
+ if (data->start)
+ {
+ del_range_2 (data->start, data->start, /* byte, char offsets the same */
+ data->start + data->nbytes, data->start + data->nbytes,
+ 0);
+ update_compositions (data->start, data->start, CHECK_HEAD);
+ /* "Balance" the before-change-functions call, which would
+ otherwise be left "hanging". */
+ signal_after_change (data->orig, data->start - data->orig,
+ data->start - data->orig);
+ }
+ /* Put point where it was, or if the buffer has shrunk because the
+ compressed data is bigger than the uncompressed, at
+ point-max. */
+ SET_PT (min (data->old_point, ZV));
+}
+
+DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
+ doc: /* Return t if zlib decompression is available in this instance of
Emacs. */)
+ (void)
+{
+#ifdef WINDOWSNT
+ Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
+ if (CONSP (found))
+ return XCDR (found);
+ else
+ {
+ Lisp_Object status;
+ zlib_initialized = init_zlib_functions ();
+ status = zlib_initialized ? Qt : Qnil;
+ Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
+ return status;
+ }
+#else
+ return Qt;
+#endif
+}
+
+DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
+ Szlib_decompress_region,
+ 2, 3, 0,
+ doc: /* Decompress a gzip- or zlib-compressed region.
+Replace the text in the region by the decompressed data.
+
+If optional parameter ALLOW-PARTIAL is nil or omitted, then on
+failure, return nil and leave the data in place. Otherwise, return
+the number of bytes that were not decompressed and replace the region
+text by whatever data was successfully decompressed (similar to gzip).
+If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers. */)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
+{
+ ptrdiff_t istart, iend, pos_byte;
+ z_stream stream;
+ int inflate_status;
+ struct decompress_unwind_data unwind_data;
+ ptrdiff_t count = SPECPDL_INDEX ();
+
+ validate_region (&start, &end);
+
+ if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+ error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+ if (!zlib_initialized)
+ zlib_initialized = init_zlib_functions ();
+ if (!zlib_initialized)
+ {
+ message1 ("zlib library not found");
+ return Qnil;
+ }
+#endif
+
+ /* This is a unibyte buffer, so character positions and bytes are
+ the same. */
+ istart = XFIXNUM (start);
+ iend = XFIXNUM (end);
+
+ /* Do the following before manipulating the gap. */
+ modify_text (istart, iend);
+
+ move_gap_both (iend, iend);
+
+ stream.zalloc = Z_NULL;
+ stream.zfree = Z_NULL;
+ stream.opaque = Z_NULL;
+ stream.avail_in = 0;
+ stream.next_in = Z_NULL;
+
+ /* The magic number 32 apparently means "autodetect both the gzip and
+ zlib formats" according to zlib.h. */
+ if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
+ return Qnil;
+
+ unwind_data.orig = istart;
+ unwind_data.start = iend;
+ unwind_data.stream = &stream;
+ unwind_data.old_point = PT;
+ unwind_data.nbytes = 0;
+ unwind_data.deflating = 0;
+ record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+ /* Insert the decompressed data at the end of the compressed data. */
+ SET_PT (iend);
+
+ pos_byte = istart;
+
+ /* Keep calling 'inflate' until it reports an error or end-of-input. */
+ do
+ {
+ /* Maximum number of bytes that one 'inflate' call should read and write.
+ Do not make avail_out too large, as that might unduly delay C-g.
+ zlib requires that avail_in and avail_out not exceed UINT_MAX. */
+ ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+ int avail_out = 16 * 1024;
+ int decompressed;
+
+ if (GAP_SIZE < avail_out)
+ make_gap (avail_out - GAP_SIZE);
+ stream.next_in = BYTE_POS_ADDR (pos_byte);
+ stream.avail_in = avail_in;
+ stream.next_out = GPT_ADDR;
+ stream.avail_out = avail_out;
+ inflate_status = inflate (&stream, Z_NO_FLUSH);
+ pos_byte += avail_in - stream.avail_in;
+ decompressed = avail_out - stream.avail_out;
+ insert_from_gap (decompressed, decompressed, 0);
+ unwind_data.nbytes += decompressed;
+ maybe_quit ();
+ }
+ while (inflate_status == Z_OK);
+
+ Lisp_Object ret = Qt;
+ if (inflate_status != Z_STREAM_END)
+ {
+ if (!NILP (allow_partial))
+ ret = make_int (iend - pos_byte);
+ else
+ return unbind_to (count, Qnil);
+ }
+
+ unwind_data.start = 0;
+
+ /* Delete the compressed data. */
+ del_range_2 (istart, istart, /* byte and char offsets are the same. */
+ iend, iend, 0);
+
+ signal_after_change (istart, iend - istart, unwind_data.nbytes);
+ update_compositions (istart, istart, CHECK_HEAD);
+
+ return unbind_to (count, ret);
+}
+
+
+DEFUN ("zlib-compress-region", Fzlib_compress_region,
+ Szlib_compress_region,
+ 2, 3, 0,
+ doc: /* Compress a region to a gzip or zlib stream.
+Replace the text in the region by the compressed data.
+
+If optional parameter NO-WRAPPER is nil or omitted, use the GZIP
+wrapper format; otherwise, output just a deflated stream of
+bytes. If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.*/)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object zlib)
+{
+ ptrdiff_t istart, iend, pos_byte;
+ z_stream stream;
+ int deflate_status, flush;
+ struct decompress_unwind_data unwind_data;
+ ptrdiff_t count = SPECPDL_INDEX ();
+ bool gzipp = NILP (zlib);
+
+ validate_region (&start, &end);
+
+ if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+ error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+ if (!zlib_initialized)
+ zlib_initialized = init_zlib_functions ();
+ if (!zlib_initialized)
+ {
+ message1 ("zlib library not found");
+ return Qnil;
+ }
+#endif
+
+ /* This is a unibyte buffer, so character positions and bytes are
+ the same. */
+ istart = XFIXNUM (start);
+ iend = XFIXNUM (end);
+
+ /* Do the following before manipulating the gap. */
+ modify_text (istart, iend);
+
+ move_gap_both (iend, iend);
+
+ stream.zalloc = Z_NULL;
+ stream.zfree = Z_NULL;
+ stream.opaque = Z_NULL;
+ stream.avail_in = 0;
+ stream.next_in = Z_NULL;
+
+ /* Initiate the deflate() process, choosing the format, compression
+ strategy and level (9), and amount of memory used. */
+ if (deflateInit2 (&stream, 9, Z_DEFLATED, MAX_WBITS + (gzipp? 16: 0),
+ 8, Z_DEFAULT_STRATEGY) != Z_OK)
+ return Qnil;
+
+ unwind_data.orig = istart;
+ unwind_data.start = iend;
+ unwind_data.stream = &stream;
+ unwind_data.old_point = PT;
+ unwind_data.nbytes = 0;
+ unwind_data.deflating = 1;
+ record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+ /* Insert the decompressed data at the end of the compressed data. */
+ SET_PT (iend);
+
+ pos_byte = istart;
+
+ /* Keep calling 'deflate' until it reports an error or end-of-input. */
+ flush = Z_NO_FLUSH;
+ do
+ {
+ /* Maximum number of bytes that one 'deflate' call should read and write.
+ Do not make avail_out too large, as that might unduly delay C-g.
+ zlib requires that avail_in and avail_out not exceed UINT_MAX. */
+ ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+ int avail_out = 16 * 1024;
+ int compressed;
+
+ if (GAP_SIZE < avail_out)
+ make_gap (avail_out - GAP_SIZE);
+ stream.next_in = BYTE_POS_ADDR (pos_byte);
+ stream.avail_in = avail_in;
+ stream.next_out = GPT_ADDR;
+ stream.avail_out = avail_out;
+ deflate_status = deflate (&stream, flush);
+
+ pos_byte += avail_in - stream.avail_in;
+ compressed = avail_out - stream.avail_out;
+ insert_from_gap (compressed, compressed, 0);
+ unwind_data.nbytes += compressed;
+ if (deflate_status == Z_BUF_ERROR && flush == Z_NO_FLUSH) {
+ /* When we run out of input, zlib returns Z_BUF_ERROR.
+ We then have to flush all output. */
+ flush = Z_FINISH;
+ deflate_status = Z_OK;
+ }
+ maybe_quit ();
+ }
+ while (deflate_status == Z_OK);
+
+ Lisp_Object ret = Qt;
+ if (deflate_status != Z_STREAM_END)
+ {
+ /* When compression did not succeed, delete output. */
+ ret = make_int (iend - pos_byte);
+ }
+
+ unwind_data.start = 0;
+
+ /* Delete the uncompressed data. */
+ del_range_2 (istart, istart, /* byte and char offsets are the same. */
+ iend, iend, 0);
+
+ signal_after_change (istart, iend - istart, unwind_data.nbytes);
+ update_compositions (istart, istart, CHECK_HEAD);
+
+ return unbind_to (count, ret);
+}
+
+
+/***********************************************************************
+ Initialization
+ ***********************************************************************/
+void
+syms_of_decompress (void)
+{
+ defsubr (&Szlib_decompress_region);
+ defsubr (&Szlib_compress_region);
+ defsubr (&Szlib_available_p);
+}
+
+#endif /* HAVE_ZLIB */
test.lisp
Description: Binary data
- decompress.c now also compresses,
Juan José García-Ripoll <=