[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] pickles: make jojodiff.pk to be compliant with upstream impl
From: |
Jose E. Marchesi |
Subject: |
Re: [PATCH] pickles: make jojodiff.pk to be compliant with upstream impl |
Date: |
Fri, 15 Mar 2024 11:26:59 +0100 |
User-agent: |
Gnus/5.13 (Gnus v5.13) |
OK. Thanks.
> 2024-03-15 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
>
> * pickles/jojodiff.pk (Jojo_Length2): Add initializer to
> constraint.
> (Jojo_Length2): Likewise.
> (Jojo_Length3): Likewise.
> (Jojo_Length5): Likewise.
> (Jojo_Length9): Likewise.
> (jojo_length_encode): New function to encode length according to
> the Jojo format.
> (Jojo_EscapedByte): Remove.
> (Jojo_Datum): Auxiliary type for `Jojo_Bytes'.
> (Jojo_Bytes): Type to describe sequence of data bytes in Jojo
> format.
> (Jojo_Bytes_Callback): Auxiliary type for `jojo_bytes_map' function.
> (jojo_bytes_map): Add function to run the provided callback
> for each decoded byte in `Jojo_Bytes'.
> (jojo_bytes): Add function to encode bytes as `Jojo_Bytes'.
> (Jojo_MOD): Use `Jojo_Bytes' instead of `Jojo_Byte[]'.
> (Jojo_INS): Likewise.
> (Jojo_MOD.get_count): Update accordingly.
> (Jojo_INS.get_count): Likewise.
> (Jojo_Hunk): Add `Jojo_Bytes' field to comply with the latest
> version of JojoDiff generator.
> (Jojo_Hunk_Orig): Add type to describe original format of JojoDiff.
> (jojo_patch_from_orig): Add conversion function to transform
> from `Jojo_Patch_Orig' to `Jojo_Patch'.
> (jojo_patch_apply): Update to support new `Jojo_Patch' and make
> verbosity more fine-grained.
> * testsuite/poke.pickles/jojodiff-test.pk: Add tests for JojoDiff
> pickle.
> * testsuite/Makefile.am (EXTRA_DIST): Add new test.
> ---
>
> Hi Jose,
>
> I'd like to add this patch to master and maint/poke-4; WDYT?
>
>
> Regards,
> Mohammad-Reza
>
>
> ChangeLog | 33 +++
> pickles/jojodiff.pk | 281 +++++++++++++++++++-----
> testsuite/Makefile.am | 1 +
> testsuite/poke.pickles/jojodiff-test.pk | 131 +++++++++++
> 4 files changed, 394 insertions(+), 52 deletions(-)
> create mode 100644 testsuite/poke.pickles/jojodiff-test.pk
>
> diff --git a/ChangeLog b/ChangeLog
> index 257c6c22..0f3c1ce8 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,36 @@
> +2024-03-15 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
> +
> + * pickles/jojodiff.pk (Jojo_Length2): Add initializer to
> + constraint.
> + (Jojo_Length2): Likewise.
> + (Jojo_Length3): Likewise.
> + (Jojo_Length5): Likewise.
> + (Jojo_Length9): Likewise.
> + (jojo_length_encode): New function to encode length according to
> + the Jojo format.
> + (Jojo_EscapedByte): Remove.
> + (Jojo_Datum): Auxiliary type for `Jojo_Bytes'.
> + (Jojo_Bytes): Type to describe sequence of data bytes in Jojo
> + format.
> + (Jojo_Bytes_Callback): Auxiliary type for `jojo_bytes_map' function.
> + (jojo_bytes_map): Add function to run the provided callback
> + for each decoded byte in `Jojo_Bytes'.
> + (jojo_bytes): Add function to encode bytes as `Jojo_Bytes'.
> + (Jojo_MOD): Use `Jojo_Bytes' instead of `Jojo_Byte[]'.
> + (Jojo_INS): Likewise.
> + (Jojo_MOD.get_count): Update accordingly.
> + (Jojo_INS.get_count): Likewise.
> + (Jojo_Hunk): Add `Jojo_Bytes' field to comply with the latest
> + version of JojoDiff generator.
> + (Jojo_Hunk_Orig): Add type to describe original format of JojoDiff.
> + (jojo_patch_from_orig): Add conversion function to transform
> + from `Jojo_Patch_Orig' to `Jojo_Patch'.
> + (jojo_patch_apply): Update to support new `Jojo_Patch' and make
> + verbosity more fine-grained.
> + * testsuite/poke.pickles/jojodiff-test.pk: Add tests for JojoDiff
> + pickle.
> + * testsuite/Makefile.am (EXTRA_DIST): Add new test.
> +
> 2024-03-05 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
>
> * autogen.sh: Updated by `./bootstrap --bootstrap-sync'.
> diff --git a/pickles/jojodiff.pk b/pickles/jojodiff.pk
> index 20bfb52f..714a4f52 100644
> --- a/pickles/jojodiff.pk
> +++ b/pickles/jojodiff.pk
> @@ -19,10 +19,9 @@
> /* Reference:
> - https://jojodiff.sourceforge.net/
> - https://github.com/vibhorkalley/jojodiff
> - - https://github.com/janjongboom/janpatch
> + - https://github.com/janjongboom/janpatch (see issue 16)
> */
>
> -
> load ios;
>
> var JOJO_ESC = 0xa7UB, /* Escape. */
> @@ -51,7 +50,7 @@ type Jojo_Length1 =
> type Jojo_Length2 =
> struct
> {
> - uint<8> prefix : prefix == 252UB;
> + uint<8> prefix == 252UB;
> uint<8> x;
>
> computed Jojo_Offset value;
> @@ -65,7 +64,7 @@ type Jojo_Length2 =
> type Jojo_Length3 =
> struct
> {
> - uint<8> prefix : prefix == 253UB;
> + uint<8> prefix == 253UB;
> big uint<16> x;
>
> computed Jojo_Offset value;
> @@ -79,7 +78,7 @@ type Jojo_Length3 =
> type Jojo_Length5 =
> struct
> {
> - uint<8> prefix : prefix == 254UB;
> + uint<8> prefix == 254UB;
> big uint<32> x;
>
> computed Jojo_Offset value;
> @@ -93,7 +92,7 @@ type Jojo_Length5 =
> type Jojo_Length9 =
> struct
> {
> - uint<8> prefix : prefix == 255UB;
> + uint<8> prefix == 255UB;
> big uint<64> x;
>
> computed Jojo_Offset value;
> @@ -130,32 +129,104 @@ type Jojo_Length =
> }
> };
>
> -type Jojo_EscapedByte =
> - struct
> - {
> - uint<8> prefix1 == JOJO_ESC;
> - uint<8> prefix2 == JOJO_ESC;
> - uint<8> value : value in [
> - JOJO_ESC, JOJO_MOD, JOJO_MOD, JOJO_INS, JOJO_DEL, JOJO_EQL,
> JOJO_BKT];
> - };
> +/* Encode the length in Jojo format.
> + *
> + * 0 < x <= 252 1 byte: 0-251
> + * 252 < x <= 508 2 bytes: 252, x-253
> + * 508 < x < 0x10000 3 bytes: 253, xx
> + * 0x10000 <= x < 0x100000000 5 bytes: 254, xxxx
> + * 0x100000000 <= x 9 bytes: 255, xxxxxxxx
> + */
>
> -type Jojo_Byte =
> +fun jojo_length_encode = (Jojo_Offset len) Jojo_Length:
> +{
> + assert (len != 0#B);
> +
> + return len <= 252#B ? Jojo_Length {len1 = Jojo_Length1 {x = len/#B - 1}}
> + : len <= 508#B ? Jojo_Length {len2 = Jojo_Length2 {x = len/#B - 253}}
> + : len < 0x10000#B ? Jojo_Length {len3 = Jojo_Length3 {x = len/#B}}
> + : len < 0x100000000#B ? Jojo_Length {len5 = Jojo_Length5 {x = len/#B}}
> + : Jojo_Length {len9 = Jojo_Length9 {x = len/#B}};
> +}
> +
> +type Jojo_Datum =
> union
> {
> - Jojo_EscapedByte escaped;
> + uint<8>[2] escaped_escape : escaped_escape == [JOJO_ESC, JOJO_ESC];
> + uint<8>[2] pair : pair[0] == JOJO_ESC
> + && !(pair[1] in [JOJO_MOD, JOJO_INS, JOJO_DEL, JOJO_EQL, JOJO_BKT]);
> uint<8> value : value != JOJO_ESC;
> };
>
> -fun jojo_bytes = (Jojo_Byte[] jbytes) uint<8>[]:
> +/* Jojo-encoded bytes. */
> +
> +type Jojo_Bytes =
> + struct
> + {
> + Jojo_Datum[] data : data'length > 0;
> +
> + method get_count = Jojo_Offset:
> + {
> + var len = 0UL#B;
> +
> + for (d in data)
> + len += !(d.pair ?! E_elem) ? 2#B : 1#B;
> + return len;
> + }
> + };
> +
> +type Jojo_Bytes_Callback = (uint<64>/*index*/,uint<8>/*byte*/)void;
> +
> +/* Traverse Jojo_Bytes's data (Jojo_Datum) and for each byte call provided
> + DO callback. */
> +
> +fun jojo_bytes_map = (Jojo_Bytes jbytes, Jojo_Bytes_Callback do) void:
> {
> - var len = jbytes'length,
> - bytes = uint<8>[len] ();
> + var i = 0UL;
> +
> + for (d in jbytes.data)
> + if (!(d.value ?! E_elem))
> + do (/*index*/ i++, /*byte*/ d.value);
> + else if (!(d.pair ?! E_elem))
> + {
> + do (/*index*/ i++, /*byte*/ d.pair[0]);
> + do (/*index*/ i++, /*byte*/ d.pair[1]);
> + }
> + else
> + do (/*index*/ i++, /*byte*/ JOJO_ESC);
> +}
> +
> +/* Decode Jojo-encoded bytes into normal byte array. */
>
> - for (var i = 0UL; i != len; ++i)
> - bytes[i] = jbytes[i].value;
> +fun jojo_bytes = (Jojo_Bytes jbytes) uint<8>[]:
> +{
> + var bytes = uint<8>[jbytes.get_count] ();
> +
> + jojo_bytes_map
> + :jbytes jbytes
> + :do lambda (uint<64> i, uint<8> byte) void:
> + {
> + bytes[i] = byte;
> + };
> return bytes;
> }
>
> +/* Encode provided BYTES in Jojo format. */
> +
> +fun jojo_bytes_encode = (uint<8>[] bytes) Jojo_Bytes:
> +{
> + var ds = Jojo_Datum[] ();
> +
> + for (b in bytes)
> + {
> + if (b == JOJO_ESC)
> + apush (ds, Jojo_Datum {escaped_escape=[JOJO_ESC, JOJO_ESC]});
> + else
> + apush (ds, Jojo_Datum {value=b});
> + }
> + return Jojo_Bytes {data=ds};
> +}
> +
> /* MOD patch instruction
>
> Add the following bytes to the new file. Advance both cursors. */
> @@ -165,12 +236,12 @@ type Jojo_MOD =
> {
> uint<8> prefix == JOJO_ESC;
> uint<8> opcode == JOJO_MOD;
> - Jojo_Byte[] bytes;
> + Jojo_Bytes jbytes;
>
> computed Jojo_Offset count;
>
> method get_count = Jojo_Offset:
> - { return bytes'length#B; } /* Don't use `size' attribute. */
> + { return jbytes.get_count; }
>
> method _print = void:
> { printf ("#<MOD:%v>", get_count); }
> @@ -185,12 +256,12 @@ type Jojo_INS =
> {
> uint<8> prefix == JOJO_ESC;
> uint<8> opcode == JOJO_INS;
> - Jojo_Byte[] bytes;
> + Jojo_Bytes jbytes;
>
> computed Jojo_Offset count;
>
> method get_count = Jojo_Offset:
> - { return bytes'length#B; } /* Don't use `size' attribute. */
> + { return jbytes.get_count; } /* Don't use `size' attribute. */
>
> method _print = void:
> { printf ("#<INS:%v>", get_count); }
> @@ -256,7 +327,28 @@ type Jojo_EQL =
> { printf ("#<EQL:%v>", get_count); }
> };
>
> +/* In commit 75e4450b2ff661b9d3d0222ca84865e4131f714c (jojodiff.sf.net),
> + the author changed the format of output in a backward-incompatible way.
> + With that commit, ESC-MOD sequence is the default operation when a new
> + operation sequence is needed:
> + - at the start of a file
> + - after an EQL, DEL or BKT operation
> +
> + Use Jojo_{Hunk,Patch} for new data, and Jojo_{Hunk,Patch}_Orig for the
> + original data. */
> +
> type Jojo_Hunk =
> + union
> + {
> + Jojo_MOD mod;
> + Jojo_INS ins;
> + Jojo_DEL del;
> + Jojo_EQL eql;
> + Jojo_BKT bkt;
> + Jojo_Bytes jbytes;
> + };
> +
> +type Jojo_Hunk_Orig =
> union
> {
> Jojo_MOD mod;
> @@ -267,6 +359,36 @@ type Jojo_Hunk =
> };
>
> type Jojo_Patch = Jojo_Hunk[];
> +type Jojo_Patch_Orig = Jojo_Hunk_Orig[];
> +
> +/* Convert original Jojo patch to Jojo_Patch type.
> +
> + Useful as a pre-processing step before `jojo_patch_apply'. */
> +
> +fun jojo_patch_from_orig = (Jojo_Patch_Orig orig) Jojo_Patch:
> +{
> + var hs = Jojo_Hunk[] ();
> +
> + for (h in orig)
> + {
> + if (!(h.mod ?! E_elem))
> + apush (hs, Jojo_Hunk {mod=h.mod});
> + else if (!(h.ins ?! E_elem))
> + apush (hs, Jojo_Hunk {ins=h.ins});
> + else if (!(h.del ?! E_elem))
> + apush (hs, Jojo_Hunk {del=h.del});
> + else if (!(h.eql ?! E_elem))
> + apush (hs, Jojo_Hunk {eql=h.eql});
> + else if (!(h.bkt ?! E_elem))
> + apush (hs, Jojo_Hunk {bkt=h.bkt});
> + else
> + assert (0, "unreachable reached!");
> + }
> + return hs;
> +}
> +
> +/* Apply provided Jojo PATCH to construct the new content in NEW_IOS from
> + content of ORIG_IOS. */
>
> fun jojo_patch_apply = (Jojo_Patch patch,
> int<32> orig_ios,
> @@ -278,55 +400,82 @@ fun jojo_patch_apply = (Jojo_Patch patch,
> for (hunk in patch)
> {
> if (verbosity > 0)
> - printf ("Hunk:%v\n", hunk);
> + {
> + var oacutoff = vm_oacutoff;
> +
> + if (verbosity == 1) vm_set_oacutoff (5);
> + printf ("Hunk:%v\n", hunk);
> + if (verbosity == 1) vm_set_oacutoff (oacutoff);
> + }
>
> if (!(hunk.mod ?! E_elem))
> {
> - var mod = hunk.mod,
> - len = mod.bytes'length;
> + var jbytes = hunk.mod.jbytes;
>
> if (verbosity > 1)
> - printf ("MOD :to %v :from %v :bytes %v\n", new_off, orig_off,
> - jojo_bytes (mod.bytes));
> + {
> + printf ("MOD :to %v :from %v", new_off, orig_off);
> + if (verbosity > 2)
> + {
> + var bytes = jojo_bytes (jbytes);
> +
> + printf (" :nbytes %v :bytes %v", bytes'size, bytes);
> + }
> + print ("\n");
> + }
>
> /* Add the `bytes' to the new file.
> Advance both cursors. */
> - for (b in mod.bytes)
> - {
> - uint<8> @ new_ios : new_off = b.value;
> - new_off++;
> - orig_off++;
> - }
> + jojo_bytes_map
> + :jbytes jbytes
> + :do lambda (uint<64> index, uint<8> byte) void:
> + {
> + uint<8> @ new_ios : new_off = byte;
> + new_off++;
> + orig_off++;
> + };
> }
> else if (!(hunk.ins ?! E_elem))
> {
> - var o = orig_off,
> - ins = hunk.ins;
> + var jbytes = hunk.ins.jbytes;
>
> if (verbosity > 1)
> - printf ("INS :to %v :bytes %v\n", new_off, jojo_bytes
> (ins.bytes));
> + {
> + printf ("INS :to %v", new_off);
> + if (verbosity > 2)
> + {
> + var bytes = jojo_bytes (jbytes);
> +
> + printf (" :nbytes %v :bytes %v", bytes'size, bytes);
> + }
> + print ("\n");
> + }
>
> /* Add the following bytes to the new file.
> Advance cursor in new file. */
> - for (b in ins.bytes)
> - {
> - uint<8> @ new_ios : new_off = b.value;
> - new_off++;
> - o++;
> - }
> + jojo_bytes_map
> + :jbytes jbytes
> + :do lambda (uint<64> index, uint<8> byte) void:
> + {
> + uint<8> @ new_ios : new_off = byte;
> + new_off++;
> + };
> }
> else if (!(hunk.del ?! E_elem))
> {
> + var len = hunk.del.jojo_length.get_length;
> +
> if (verbosity > 1)
> {
> - printf ("DEL :from %v\n", orig_off);
> - ios_dump_bytes :ios orig_ios
> - :from orig_off
> - :size hunk.del.jojo_length.get_length
> - :ascii_p 1;
> + printf ("DEL :from %v :size %v\n", orig_off, len);
> + if (verbosity > 2)
> + ios_dump_bytes :ios orig_ios
> + :from orig_off
> + :size len
> + :ascii_p 1;
> }
>
> - orig_off += hunk.del.jojo_length.get_length;
> + orig_off += len;
> }
> else if (!(hunk.eql ?! E_elem))
> {
> @@ -334,7 +483,7 @@ fun jojo_patch_apply = (Jojo_Patch patch,
>
> if (verbosity > 1)
> {
> - printf ("EQL :from %v\n", orig_off);
> + printf ("EQL :from %v :size %v\n", orig_off, len);
> ios_dump_bytes :ios orig_ios
> :from orig_off
> :size len
> @@ -356,6 +505,34 @@ fun jojo_patch_apply = (Jojo_Patch patch,
> assert (orig_off >= len);
> orig_off -= len;
> }
> + else if (!(hunk.jbytes ?! E_elem))
> + {
> + /* Like JOJO_Mod. */
> + var jbytes = hunk.jbytes;
> +
> + if (verbosity > 1)
> + {
> + printf ("MOD(implicit) :to %v :from %v", new_off, orig_off);
> + if (verbosity > 2)
> + {
> + var bytes = jojo_bytes (jbytes);
> +
> + printf (" :nbytes %v :bytes %v", bytes'size, bytes);
> + }
> + print ("\n");
> + }
> +
> + /* Add the `bytes' to the new file.
> + Advance both cursors. */
> + jojo_bytes_map
> + :jbytes jbytes
> + :do lambda (uint<64> index, uint<8> byte) void:
> + {
> + uint<8> @ new_ios : new_off = byte;
> + new_off++;
> + orig_off++;
> + };
> + }
> else
> assert (0, "unreachable reached!");
> }
> diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
> index 7f94d05f..916ff8a4 100644
> --- a/testsuite/Makefile.am
> +++ b/testsuite/Makefile.am
> @@ -618,6 +618,7 @@ EXTRA_DIST = \
> poke.pickles/coff-test.pk \
> poke.pickles/id3v2-test.pk \
> poke.pickles/jffs-test.pk \
> + poke.pickles/jojodiff-test.pk \
> poke.pickles/leb128-test.pk \
> poke.pickles/mcr-test.pk \
> poke.pickles/openpgp-test.pk \
> diff --git a/testsuite/poke.pickles/jojodiff-test.pk
> b/testsuite/poke.pickles/jojodiff-test.pk
> new file mode 100644
> index 00000000..fbdee69d
> --- /dev/null
> +++ b/testsuite/poke.pickles/jojodiff-test.pk
> @@ -0,0 +1,131 @@
> +/* jojo-test.pk - Tests for the JojoDiff pickle. */
> +
> +/* Copyright (C) 2024 The poke authors */
> +
> +/* This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 3 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +load pktest;
> +load jojodiff;
> +
> +/* TODO(@mnabipoor): Add tests for patch data. */
> +
> +var tests = [
> + PkTest {
> + name = "jojo_length_encode",
> + func = lambda (string name) void:
> + {
> + {
> + var l = jojo_length_encode (10#B);
> +
> + assert (l isa Jojo_Length);
> +
> + assert (!(l.len1 ?! E_elem));
> + assert (l.len1.x == 9);
> + assert (l.len1.value == 10#B);
> + assert (l.get_length == 10#B);
> + }
> +
> + {
> + var l = jojo_length_encode (252#B);
> +
> + assert (!(l.len1 ?! E_elem));
> + assert (l.len1.x == 251);
> + assert (l.len1.value == 252#B);
> + assert (l.get_length == 252#B);
> + }
> +
> + {
> + var l = jojo_length_encode (253#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (!(l.len2 ?! E_elem));
> + assert (l.len2.x == 0);
> + assert (l.len2.value == 253#B);
> + assert (l.get_length == 253#B);
> + }
> +
> + {
> + var l = jojo_length_encode (508#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (!(l.len2 ?! E_elem));
> + assert (l.len2.x == 255);
> + assert (l.len2.value == 508#B);
> + assert (l.get_length == 508#B);
> + }
> +
> + {
> + var l = jojo_length_encode (509#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (l.len2 ?! E_elem);
> + assert (!(l.len3 ?! E_elem));
> + assert (l.len3.x == 509);
> + assert (l.len3.value == 509#B);
> + assert (l.get_length == 509#B);
> + }
> +
> + {
> + var l = jojo_length_encode (0x10000#B - 1#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (l.len2 ?! E_elem);
> + assert (!(l.len3 ?! E_elem));
> + assert (l.len3.x == 0xffff);
> + assert (l.len3.value == 0xffff#B);
> + assert (l.get_length == 0xffff#B);
> + }
> +
> + {
> + var l = jojo_length_encode (0x10000#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (l.len2 ?! E_elem);
> + assert (l.len3 ?! E_elem);
> + assert (!(l.len5 ?! E_elem));
> + assert (l.len5.x == 0x10000);
> + assert (l.len5.value == 0x10000#B);
> + assert (l.get_length == 0x10000#B);
> + }
> +
> + {
> + var l = jojo_length_encode (0x100000000#B - 1#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (l.len2 ?! E_elem);
> + assert (l.len3 ?! E_elem);
> + assert (!(l.len5 ?! E_elem));
> + assert (l.len5.x == 0xffffffff);
> + assert (l.len5.value == 0xffffffff#B);
> + assert (l.get_length == 0xffffffff#B);
> + }
> +
> + {
> + var l = jojo_length_encode (0x100000000#B);
> +
> + assert (l.len1 ?! E_elem);
> + assert (l.len2 ?! E_elem);
> + assert (l.len3 ?! E_elem);
> + assert (l.len5 ?! E_elem);
> + assert (!(l.len9 ?! E_elem));
> + assert (l.len9.x == 0x100000000);
> + assert (l.len9.value == 0x100000000#B);
> + assert (l.get_length == 0x100000000#B);
> + }
> + },
> + },
> +];
> +
> +exit (pktest_run (tests) ? 0 : 1);