emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

decompress.c now also compresses


From: Juan José García-Ripoll
Subject: decompress.c now also compresses
Date: Sun, 29 Mar 2020 17:52:03 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (windows-nt)

Hi,

I attach a patch that adds support for compressing buffers using
zlib. It is a minor extension to the file src/decompress.c but it may be
useful because of two reasons (i) in Windows, Emasc is shipped without
g[un]zip.exe, (ii) the whole process of compression takes about 20 times
less time than calling gzip.

(benchmark 1
  '(mapc 'simple-zlib-compress
      (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 2.602588s (0.014894s in 1 GCs)

(benchmark 1
  '(mapc 'simple-gzip-compress
       (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 61.986128s (0.039815s in 3 GCs)

I attach a patch that was produced against emacs-27 but also seems to
work against emacs-28 (at least the decompress.c part, I am unsure about
how NEWS should be edited).

I also attach a lisp file that test and benchmark the
compression/decompression using Emacs' lisp sources.

Best

Juanjo

-- 
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es
diff --git a/etc/NEWS b/etc/NEWS
index d3f27e3..96ddf5c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -3656,6 +3656,10 @@ easier to undo immediately afterwards.
 ** When called interactively, 'next-buffer' and 'previous-buffer' now
 signal 'user-error' if there is no buffer to switch to.
 
+---
+** New function 'zlib-compress-region' compresses a unibyte buffer region using
+gzip's format, via the zlib library.
+
 
 * Changes in Emacs 27.1 on Non-Free Operating Systems
 
diff --git a/src/decompress.c b/src/decompress.c
index 5d24638..68178db 100644
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -1,247 +1,394 @@
-/* Interface to zlib.
-   Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GNU Emacs.
-
-GNU Emacs is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or (at
-your option) any later version.
-
-GNU Emacs is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-#ifdef HAVE_ZLIB
-
-#include <zlib.h>
-
-#include "lisp.h"
-#include "buffer.h"
-#include "composite.h"
-
-#include <verify.h>
-
-#ifdef WINDOWSNT
-# include <windows.h>
-# include "w32common.h"
-# include "w32.h"
-
-DEF_DLL_FN (int, inflateInit2_,
-           (z_streamp strm, int windowBits, const char *version,
-            int stream_size));
-DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
-DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
-
-static bool zlib_initialized;
-
-static bool
-init_zlib_functions (void)
-{
-  HMODULE library = w32_delayed_load (Qzlib);
-
-  if (!library)
-    return false;
-
-  LOAD_DLL_FN (library, inflateInit2_);
-  LOAD_DLL_FN (library, inflate);
-  LOAD_DLL_FN (library, inflateEnd);
-  return true;
-}
-
-# undef inflate
-# undef inflateEnd
-# undef inflateInit2_
-
-# define inflate fn_inflate
-# define inflateEnd fn_inflateEnd
-# define inflateInit2_ fn_inflateInit2_
-
-#endif /* WINDOWSNT */
-
-
-struct decompress_unwind_data
-{
-  ptrdiff_t old_point, orig, start, nbytes;
-  z_stream *stream;
-};
-
-static void
-unwind_decompress (void *ddata)
-{
-  struct decompress_unwind_data *data = ddata;
-  inflateEnd (data->stream);
-
-  /* Delete any uncompressed data already inserted on error, but
-     without calling the change hooks.  */
-  if (data->start)
-    {
-      del_range_2 (data->start, data->start, /* byte, char offsets the same */
-                   data->start + data->nbytes, data->start + data->nbytes,
-                   0);
-      update_compositions (data->start, data->start, CHECK_HEAD);
-      /* "Balance" the before-change-functions call, which would
-         otherwise be left "hanging". */
-      signal_after_change (data->orig, data->start - data->orig,
-                           data->start - data->orig);
-    }
-  /* Put point where it was, or if the buffer has shrunk because the
-     compressed data is bigger than the uncompressed, at
-     point-max.  */
-  SET_PT (min (data->old_point, ZV));
-}
-
-DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
-       doc: /* Return t if zlib decompression is available in this instance of 
Emacs.  */)
-     (void)
-{
-#ifdef WINDOWSNT
-  Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
-  if (CONSP (found))
-    return XCDR (found);
-  else
-    {
-      Lisp_Object status;
-      zlib_initialized = init_zlib_functions ();
-      status = zlib_initialized ? Qt : Qnil;
-      Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
-      return status;
-    }
-#else
-  return Qt;
-#endif
-}
-
-DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
-       Szlib_decompress_region,
-       2, 3, 0,
-       doc: /* Decompress a gzip- or zlib-compressed region.
-Replace the text in the region by the decompressed data.
-
-If optional parameter ALLOW-PARTIAL is nil or omitted, then on
-failure, return nil and leave the data in place.  Otherwise, return
-the number of bytes that were not decompressed and replace the region
-text by whatever data was successfully decompressed (similar to gzip).
-If decompression is completely successful return t.
-
-This function can be called only in unibyte buffers.  */)
-  (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
-{
-  ptrdiff_t istart, iend, pos_byte;
-  z_stream stream;
-  int inflate_status;
-  struct decompress_unwind_data unwind_data;
-  ptrdiff_t count = SPECPDL_INDEX ();
-
-  validate_region (&start, &end);
-
-  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
-    error ("This function can be called only in unibyte buffers");
-
-#ifdef WINDOWSNT
-  if (!zlib_initialized)
-    zlib_initialized = init_zlib_functions ();
-  if (!zlib_initialized)
-    {
-      message1 ("zlib library not found");
-      return Qnil;
-    }
-#endif
-
-  /* This is a unibyte buffer, so character positions and bytes are
-     the same.  */
-  istart = XFIXNUM (start);
-  iend = XFIXNUM (end);
-
-  /* Do the following before manipulating the gap. */
-  modify_text (istart, iend);
-
-  move_gap_both (iend, iend);
-
-  stream.zalloc = Z_NULL;
-  stream.zfree = Z_NULL;
-  stream.opaque = Z_NULL;
-  stream.avail_in = 0;
-  stream.next_in = Z_NULL;
-
-  /* The magic number 32 apparently means "autodetect both the gzip and
-     zlib formats" according to zlib.h.  */
-  if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
-    return Qnil;
-
-  unwind_data.orig = istart;
-  unwind_data.start = iend;
-  unwind_data.stream = &stream;
-  unwind_data.old_point = PT;
-  unwind_data.nbytes = 0;
-  record_unwind_protect_ptr (unwind_decompress, &unwind_data);
-
-  /* Insert the decompressed data at the end of the compressed data.  */
-  SET_PT (iend);
-
-  pos_byte = istart;
-
-  /* Keep calling 'inflate' until it reports an error or end-of-input.  */
-  do
-    {
-      /* Maximum number of bytes that one 'inflate' call should read and write.
-        Do not make avail_out too large, as that might unduly delay C-g.
-        zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
-      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
-      int avail_out = 16 * 1024;
-      int decompressed;
-
-      if (GAP_SIZE < avail_out)
-       make_gap (avail_out - GAP_SIZE);
-      stream.next_in = BYTE_POS_ADDR (pos_byte);
-      stream.avail_in = avail_in;
-      stream.next_out = GPT_ADDR;
-      stream.avail_out = avail_out;
-      inflate_status = inflate (&stream, Z_NO_FLUSH);
-      pos_byte += avail_in - stream.avail_in;
-      decompressed = avail_out - stream.avail_out;
-      insert_from_gap (decompressed, decompressed, 0);
-      unwind_data.nbytes += decompressed;
-      maybe_quit ();
-    }
-  while (inflate_status == Z_OK);
-
-  Lisp_Object ret = Qt;
-  if (inflate_status != Z_STREAM_END)
-    {
-      if (!NILP (allow_partial))
-        ret = make_int (iend - pos_byte);
-      else
-        return unbind_to (count, Qnil);
-    }
-
-  unwind_data.start = 0;
-
-  /* Delete the compressed data.  */
-  del_range_2 (istart, istart, /* byte and char offsets are the same. */
-               iend, iend, 0);
-
-  signal_after_change (istart, iend - istart, unwind_data.nbytes);
-  update_compositions (istart, istart, CHECK_HEAD);
-
-  return unbind_to (count, ret);
-}
-
-
-/***********************************************************************
-                           Initialization
- ***********************************************************************/
-void
-syms_of_decompress (void)
-{
-  defsubr (&Szlib_decompress_region);
-  defsubr (&Szlib_available_p);
-}
-
-#endif /* HAVE_ZLIB */
+/* Interface to zlib.
+   Copyright (C) 2013-2020 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#ifdef HAVE_ZLIB
+
+#include <zlib.h>
+
+#include "lisp.h"
+#include "buffer.h"
+#include "composite.h"
+
+#include <verify.h>
+
+#ifdef WINDOWSNT
+# include <windows.h>
+# include "w32common.h"
+# include "w32.h"
+
+/* We import inflateInit2_ and deflateInit2_ because inflateInit and
+   deflateInit are macros defined on top of these symbols by zlib.h */
+DEF_DLL_FN (int, inflateInit2_,
+           (z_streamp strm, int windowBits, const char *version,
+            int stream_size));
+DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
+DEF_DLL_FN (int, deflateInit2_,
+           (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflateInit2_,
+           (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, deflateEnd, (z_streamp strm));
+
+static bool zlib_initialized;
+
+static bool
+init_zlib_functions (void)
+{
+  HMODULE library = w32_delayed_load (Qzlib);
+
+  if (!library)
+    return false;
+
+  LOAD_DLL_FN (library, inflateInit2_);
+  LOAD_DLL_FN (library, inflate);
+  LOAD_DLL_FN (library, inflateEnd);
+  LOAD_DLL_FN (library, deflateInit2_);
+  LOAD_DLL_FN (library, deflate);
+  LOAD_DLL_FN (library, deflateEnd);
+  return true;
+}
+
+# undef inflate
+# undef inflateEnd
+# undef inflateInit2_
+# undef deflate
+# undef deflateEnd
+# undef deflateInit2_
+
+# define inflate fn_inflate
+# define inflateEnd fn_inflateEnd
+# define inflateInit2_ fn_inflateInit2_
+# define deflate fn_deflate
+# define deflateEnd fn_deflateEnd
+# define deflateInit2_ fn_deflateInit2_
+
+#endif /* WINDOWSNT */
+
+
+struct decompress_unwind_data
+{
+  ptrdiff_t old_point, orig, start, nbytes;
+  z_stream *stream;
+  int deflating;
+};
+
+static void
+unwind_zlib (void *ddata)
+{
+  struct decompress_unwind_data *data = ddata;
+  (data->deflating? deflateEnd : inflateEnd) (data->stream);
+
+  /* Delete any uncompressed data already inserted on error, but
+     without calling the change hooks.  */
+  if (data->start)
+    {
+      del_range_2 (data->start, data->start, /* byte, char offsets the same */
+                   data->start + data->nbytes, data->start + data->nbytes,
+                   0);
+      update_compositions (data->start, data->start, CHECK_HEAD);
+      /* "Balance" the before-change-functions call, which would
+         otherwise be left "hanging". */
+      signal_after_change (data->orig, data->start - data->orig,
+                           data->start - data->orig);
+    }
+  /* Put point where it was, or if the buffer has shrunk because the
+     compressed data is bigger than the uncompressed, at
+     point-max.  */
+  SET_PT (min (data->old_point, ZV));
+}
+
+DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
+       doc: /* Return t if zlib decompression is available in this instance of 
Emacs.  */)
+     (void)
+{
+#ifdef WINDOWSNT
+  Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
+  if (CONSP (found))
+    return XCDR (found);
+  else
+    {
+      Lisp_Object status;
+      zlib_initialized = init_zlib_functions ();
+      status = zlib_initialized ? Qt : Qnil;
+      Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
+      return status;
+    }
+#else
+  return Qt;
+#endif
+}
+
+DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
+       Szlib_decompress_region,
+       2, 3, 0,
+       doc: /* Decompress a gzip- or zlib-compressed region.
+Replace the text in the region by the decompressed data.
+
+If optional parameter ALLOW-PARTIAL is nil or omitted, then on
+failure, return nil and leave the data in place.  Otherwise, return
+the number of bytes that were not decompressed and replace the region
+text by whatever data was successfully decompressed (similar to gzip).
+If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.  */)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
+{
+  ptrdiff_t istart, iend, pos_byte;
+  z_stream stream;
+  int inflate_status;
+  struct decompress_unwind_data unwind_data;
+  ptrdiff_t count = SPECPDL_INDEX ();
+
+  validate_region (&start, &end);
+
+  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+  if (!zlib_initialized)
+    zlib_initialized = init_zlib_functions ();
+  if (!zlib_initialized)
+    {
+      message1 ("zlib library not found");
+      return Qnil;
+    }
+#endif
+
+  /* This is a unibyte buffer, so character positions and bytes are
+     the same.  */
+  istart = XFIXNUM (start);
+  iend = XFIXNUM (end);
+
+  /* Do the following before manipulating the gap. */
+  modify_text (istart, iend);
+
+  move_gap_both (iend, iend);
+
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+
+  /* The magic number 32 apparently means "autodetect both the gzip and
+     zlib formats" according to zlib.h.  */
+  if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
+    return Qnil;
+
+  unwind_data.orig = istart;
+  unwind_data.start = iend;
+  unwind_data.stream = &stream;
+  unwind_data.old_point = PT;
+  unwind_data.nbytes = 0;
+  unwind_data.deflating = 0;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  SET_PT (iend);
+
+  pos_byte = istart;
+
+  /* Keep calling 'inflate' until it reports an error or end-of-input.  */
+  do
+    {
+      /* Maximum number of bytes that one 'inflate' call should read and write.
+        Do not make avail_out too large, as that might unduly delay C-g.
+        zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
+      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+      int avail_out = 16 * 1024;
+      int decompressed;
+
+      if (GAP_SIZE < avail_out)
+       make_gap (avail_out - GAP_SIZE);
+      stream.next_in = BYTE_POS_ADDR (pos_byte);
+      stream.avail_in = avail_in;
+      stream.next_out = GPT_ADDR;
+      stream.avail_out = avail_out;
+      inflate_status = inflate (&stream, Z_NO_FLUSH);
+      pos_byte += avail_in - stream.avail_in;
+      decompressed = avail_out - stream.avail_out;
+      insert_from_gap (decompressed, decompressed, 0);
+      unwind_data.nbytes += decompressed;
+      maybe_quit ();
+    }
+  while (inflate_status == Z_OK);
+
+  Lisp_Object ret = Qt;
+  if (inflate_status != Z_STREAM_END)
+    {
+      if (!NILP (allow_partial))
+        ret = make_int (iend - pos_byte);
+      else
+        return unbind_to (count, Qnil);
+    }
+
+  unwind_data.start = 0;
+
+  /* Delete the compressed data.  */
+  del_range_2 (istart, istart, /* byte and char offsets are the same. */
+               iend, iend, 0);
+
+  signal_after_change (istart, iend - istart, unwind_data.nbytes);
+  update_compositions (istart, istart, CHECK_HEAD);
+
+  return unbind_to (count, ret);
+}
+
+
+DEFUN ("zlib-compress-region", Fzlib_compress_region,
+       Szlib_compress_region,
+       2, 3, 0,
+       doc: /* Compress a region to a gzip or zlib stream.
+Replace the text in the region by the compressed data.
+
+If optional parameter NO-WRAPPER is nil or omitted, use the GZIP
+wrapper format; otherwise, output just a deflated stream of
+bytes. If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.*/)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object zlib)
+{
+  ptrdiff_t istart, iend, pos_byte;
+  z_stream stream;
+  int deflate_status, flush;
+  struct decompress_unwind_data unwind_data;
+  ptrdiff_t count = SPECPDL_INDEX ();
+  bool gzipp = NILP (zlib);
+
+  validate_region (&start, &end);
+
+  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+  if (!zlib_initialized)
+    zlib_initialized = init_zlib_functions ();
+  if (!zlib_initialized)
+    {
+      message1 ("zlib library not found");
+      return Qnil;
+    }
+#endif
+
+  /* This is a unibyte buffer, so character positions and bytes are
+     the same.  */
+  istart = XFIXNUM (start);
+  iend = XFIXNUM (end);
+
+  /* Do the following before manipulating the gap. */
+  modify_text (istart, iend);
+
+  move_gap_both (iend, iend);
+
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+
+  /* Initiate the deflate() process, choosing the format, compression
+     strategy and level (9), and amount of memory used.  */
+  if (deflateInit2 (&stream, 9, Z_DEFLATED, MAX_WBITS + (gzipp? 16: 0),
+                    8, Z_DEFAULT_STRATEGY) != Z_OK)
+    return Qnil;
+
+  unwind_data.orig = istart;
+  unwind_data.start = iend;
+  unwind_data.stream = &stream;
+  unwind_data.old_point = PT;
+  unwind_data.nbytes = 0;
+  unwind_data.deflating = 1;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  SET_PT (iend);
+
+  pos_byte = istart;
+
+  /* Keep calling 'deflate' until it reports an error or end-of-input.  */
+  flush = Z_NO_FLUSH;
+  do
+    {
+      /* Maximum number of bytes that one 'deflate' call should read and write.
+        Do not make avail_out too large, as that might unduly delay C-g.
+        zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
+      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+      int avail_out = 16 * 1024;
+      int compressed;
+
+      if (GAP_SIZE < avail_out)
+       make_gap (avail_out - GAP_SIZE);
+      stream.next_in = BYTE_POS_ADDR (pos_byte);
+      stream.avail_in = avail_in;
+      stream.next_out = GPT_ADDR;
+      stream.avail_out = avail_out;
+      deflate_status = deflate (&stream, flush);
+
+      pos_byte += avail_in - stream.avail_in;
+      compressed = avail_out - stream.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      unwind_data.nbytes += compressed;
+      if (deflate_status == Z_BUF_ERROR && flush == Z_NO_FLUSH) {
+        /* When we run out of input, zlib returns Z_BUF_ERROR.
+           We then have to flush all output. */
+        flush = Z_FINISH;
+        deflate_status = Z_OK;
+      }
+      maybe_quit ();
+    }
+  while (deflate_status == Z_OK);
+
+  Lisp_Object ret = Qt;
+  if (deflate_status != Z_STREAM_END)
+    {
+      /* When compression did not succeed, delete output. */
+      ret = make_int (iend - pos_byte);
+    }
+
+  unwind_data.start = 0;
+
+  /* Delete the uncompressed data.  */
+  del_range_2 (istart, istart, /* byte and char offsets are the same. */
+               iend, iend, 0);
+
+  signal_after_change (istart, iend - istart, unwind_data.nbytes);
+  update_compositions (istart, istart, CHECK_HEAD);
+
+  return unbind_to (count, ret);
+}
+
+
+/***********************************************************************
+                           Initialization
+ ***********************************************************************/
+void
+syms_of_decompress (void)
+{
+  defsubr (&Szlib_decompress_region);
+  defsubr (&Szlib_compress_region);
+  defsubr (&Szlib_available_p);
+}
+
+#endif /* HAVE_ZLIB */

Attachment: test.lisp
Description: Binary data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]