[Qemacs-commit] qemacs charset.c charsetmore.c kmaptoqe.c ligto...

qemacs-commit
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemacs-commit] qemacs charset.c charsetmore.c kmaptoqe.c ligto...

From:	Charlie Gordon
Subject:	[Qemacs-commit] qemacs charset.c charsetmore.c kmaptoqe.c ligto...
Date:	Sat, 01 Mar 2014 22:37:26 +0000
CVSROOT:        /sources/qemacs
Module name:    qemacs
Changes by:     Charlie Gordon <chqrlie>        14/03/01 22:37:26

Modified files:
        .              : charset.c charsetmore.c kmaptoqe.c ligtoqe.c 
                         charsetjis.c cptoqe.c ligatures qe.h 

Log message:
        Add charset probing functions
        
        * add QECharset method probe_func() to test charset likelihood
        * probe function is not used for automatic charset detection yet
        * fix kmaptoqe.c to use stable sorting and make kmaps reproducible
        * fix ligtoqe.c to use stable sorting and make ligatures reproducible

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/qemacs/charset.c?cvsroot=qemacs&r1=1.32&r2=1.33
http://cvs.savannah.gnu.org/viewcvs/qemacs/charsetmore.c?cvsroot=qemacs&r1=1.18&r2=1.19
http://cvs.savannah.gnu.org/viewcvs/qemacs/kmaptoqe.c?cvsroot=qemacs&r1=1.11&r2=1.12
http://cvs.savannah.gnu.org/viewcvs/qemacs/ligtoqe.c?cvsroot=qemacs&r1=1.9&r2=1.10
http://cvs.savannah.gnu.org/viewcvs/qemacs/charsetjis.c?cvsroot=qemacs&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/qemacs/cptoqe.c?cvsroot=qemacs&r1=1.16&r2=1.17
http://cvs.savannah.gnu.org/viewcvs/qemacs/ligatures?cvsroot=qemacs&rev=1.2
http://cvs.savannah.gnu.org/viewcvs/qemacs/qe.h?cvsroot=qemacs&r1=1.142&r2=1.143

Patches:
Index: charset.c
===================================================================
RCS file: /sources/qemacs/qemacs/charset.c,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -b -r1.32 -r1.33
--- charset.c   10 Feb 2014 20:29:26 -0000      1.32
+++ charset.c   1 Mar 2014 22:37:26 -0000       1.33
@@ -119,6 +119,7 @@
 QECharset charset_raw = {
     "raw",
     "binary|none",
+    NULL,
     decode_raw_init,
     decode_8bit,
     encode_raw,
@@ -132,6 +133,46 @@
 /********************************************************/
 /* 8859-1 */
 
+static int probe_8859_1(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + size;
+    uint32_t c;
+    int count_spaces, count_lines, count_high;
+
+    count_spaces = count_lines = count_high = 0;
+
+    while (p < p_end) {
+        c = p[0];
+        p += 1;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c < 0x7F) {
+            continue;
+        } else
+        if (c < 0x80) {
+            return 0;
+        } else {
+            count_high++;
+        }
+    }
+    if (count_spaces | count_lines)
+        return 1;
+    else
+        return 0;
+}
+
 static void decode_8859_1_init(CharsetDecodeState *s)
 {
     s->table = table_idem;
@@ -150,6 +191,7 @@
 QECharset charset_8859_1 = {
     "8859-1",
     "ISO-8859-1|iso-ir-100|latin1|l1|819",
+    probe_8859_1,
     decode_8859_1_init,
     decode_8bit,
     encode_8859_1,
@@ -181,6 +223,7 @@
 QECharset charset_vt100 = {
     "vt100",
     NULL,
+    NULL,
     decode_vt100_init,
     decode_8bit,
     encode_vt100,
@@ -207,6 +250,7 @@
 static QECharset charset_7bit = {
     "7bit",
     "us-ascii|ascii|7-bit|iso-ir-6|ANSI_X3.4|646",
+    NULL,
     decode_8859_1_init,
     decode_8bit,
     encode_7bit,
@@ -316,6 +360,80 @@
     return uq - dest;
 }
 
+static int probe_utf8(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + size;
+    uint32_t c;
+    int count_spaces, count_lines, count_utf8;
+
+    count_spaces = count_lines = count_utf8 = 0;
+
+    while (p < p_end) {
+        c = p[0];
+        p += 1;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c < 0x7F) {
+            continue;
+        } else
+        if (c < 0xc0) {
+            return 0;
+        } else
+        if (c < 0xe0) {
+            if (p[0] < 0x80 || p[0] > 0xbf)
+                return 0;
+            count_utf8++;
+            p += 1;
+        } else
+        if (c < 0xf0) {
+            if (p[0] < 0x80 || p[0] > 0xbf || p[1] < 0x80 || p[1] > 0xbf)
+                return 0;
+            count_utf8++;
+            p += 2;
+        } else
+        if (c < 0xf8) {
+            if (p[0] < 0x80 || p[0] > 0xbf || p[1] < 0x80 || p[1] > 0xbf
+            ||  p[2] < 0x80 || p[2] > 0xbf)
+                return 0;
+            count_utf8++;
+            p += 3;
+        } else
+        if (c < 0xfc) {
+            if (p[0] < 0x80 || p[0] > 0xbf || p[1] < 0x80 || p[1] > 0xbf
+            ||  p[2] < 0x80 || p[2] > 0xbf || p[3] < 0x80 || p[3] > 0xbf)
+                return 0;
+            count_utf8++;
+            p += 4;
+        } else
+        if (c < 0xfe) {
+            if (p[0] < 0x80 || p[0] > 0xbf || p[1] < 0x80 || p[1] > 0xbf
+            ||  p[2] < 0x80 || p[2] > 0xbf || p[3] < 0x80 || p[3] > 0xbf
+            ||  p[4] < 0x80 || p[4] > 0xbf)
+                return 0;
+            count_utf8++;
+            p += 5;
+        } else {
+            return 0;
+        }
+    }
+    if (count_spaces | count_lines | count_utf8)
+        return 1;
+    else
+        return 0;
+}
+
 static void decode_utf8_init(CharsetDecodeState *s)
 {
     s->table = table_utf8;
@@ -432,6 +550,7 @@
 QECharset charset_utf8 = {
     "utf-8",
     "utf8",
+    probe_utf8,
     decode_utf8_init,
     decode_utf8_func,
     encode_utf8,
@@ -445,6 +564,44 @@
 /********************************************************/
 /* UCS2/UCS4 */
 
+static int probe_ucs2le(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + (size & ~1);
+    uint32_t c;
+    int count_spaces, count_lines;
+
+    if (size & 1)
+        return 0;
+
+    count_spaces = count_lines = 0;
+
+    while (p < p_end) {
+        c = (p[0] << 0) | (p[1] << 8);
+        p += 2;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c >= 0x10000) {
+            return 0;
+        }
+    }
+    if (count_spaces | count_lines)
+        return 1;
+    else
+        return 0;
+}
+
 static void decode_ucs_init(CharsetDecodeState *s)
 {
     s->table = table_none;
@@ -543,6 +700,44 @@
     return (const u8 *)lp - buf;
 }
 
+static int probe_ucs2be(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + (size & ~1);
+    uint32_t c;
+    int count_spaces, count_lines;
+
+    if (size & 1)
+        return 0;
+
+    count_spaces = count_lines = 0;
+
+    while (p < p_end) {
+        c = (p[0] << 8) | (p[1] << 0);
+        p += 2;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c >= 0x10000) {
+            return 0;
+        }
+    }
+    if (count_spaces | count_lines)
+        return 1;
+    else
+        return 0;
+}
+
 static int decode_ucs2be(CharsetDecodeState *s)
 {
     /* XXX: should handle surrogates */
@@ -624,6 +819,7 @@
 QECharset charset_ucs2le = {
     "ucs2le",
     "utf16le|utf-16le",
+    probe_ucs2le,
     decode_ucs_init,
     decode_ucs2le,
     encode_ucs2le,
@@ -637,6 +833,7 @@
 QECharset charset_ucs2be = {
     "ucs2be",
     "ucs2|utf16|utf-16|utf16be|utf-16be",
+    probe_ucs2be,
     decode_ucs_init,
     decode_ucs2be,
     encode_ucs2be,
@@ -647,6 +844,44 @@
     2, 0, 0, 10, 0, 0, NULL, NULL,
 };
 
+static int probe_ucs4le(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + (size & ~3);
+    uint32_t c;
+    int count_spaces, count_lines;
+
+    if (size & 3)
+        return 0;
+
+    count_spaces = count_lines = 0;
+
+    while (p < p_end) {
+        c = (p[0] << 0) | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+        p += 4;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c >= 0x10000) {
+            return 0;
+        }
+    }
+    if (count_spaces | count_lines)
+        return 1;
+    else
+        return 0;
+}
+
 static int decode_ucs4le(CharsetDecodeState *s)
 {
     const u8 *p;
@@ -739,6 +974,44 @@
     return (const u8 *)lp - buf;
 }
 
+static int probe_ucs4be(__unused__ QECharset *charset, const u8 *buf, int size)
+{
+    static const uint32_t magic = (1 << '\b') | (1 << '\t') | (1 << '\f') |
+                                  (1 << '\n') | (1 << '\r') | (1 << '\033') |
+                                  (1 << 0x0e) | (1 << 0x0f) | (1 << 0x1f);
+    const u8 *p = buf;
+    const u8 *p_end = p + (size & ~3);
+    uint32_t c;
+    int count_spaces, count_lines;
+
+    if (size & 3)
+        return 0;
+
+    count_spaces = count_lines = 0;
+
+    while (p < p_end) {
+        c = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3] << 0);
+        p += 4;
+        if (c <= 32) {
+            if (c == ' ')
+                count_spaces++;
+            else
+            if (c == '\n')
+                count_lines++;
+            else
+            if (!(magic & (1 << c)))
+                return 0;
+        } else
+        if (c >= 0x10000) {
+            return 0;
+        }
+    }
+    if (count_spaces | count_lines)
+        return 1;
+    else
+        return 0;
+}
+
 static int decode_ucs4be(CharsetDecodeState *s)
 {
     const u8 *p;
@@ -818,6 +1091,7 @@
 QECharset charset_ucs4le = {
     "ucs4le",
     "utf32le|utf-32le",
+    probe_ucs4le,
     decode_ucs_init,
     decode_ucs4le,
     encode_ucs4le,
@@ -831,6 +1105,7 @@
 QECharset charset_ucs4be = {
     "ucs4be",
     "ucs4|utf32|utf-32|utf32be|utf-32be",
+    probe_ucs4be,
     decode_ucs_init,
     decode_ucs4be,
     encode_ucs4be,
@@ -1100,9 +1375,37 @@
     *eol_typep = eol_type;
 }
 
-/* detect the charset. Actually only UTF8 is detected */
 QECharset *detect_charset(const u8 *buf, int size, EOLType *eol_typep)
 {
+#if 0
+    QECharset *charset;
+
+    /* Try and determine charset */
+    /* CG: should iterate over charsets with probe function and score */
+    charset = &charset_utf8;
+    if (size > 0) {
+        if (charset_utf8.probe_func(&charset_utf8, buf, size))
+            charset = &charset_utf8;
+        else
+        if (charset_ucs4le.probe_func(&charset_ucs4le, buf, size))
+            charset = &charset_ucs4le;
+        else
+        if (charset_ucs4be.probe_func(&charset_ucs4be, buf, size))
+            charset = &charset_ucs4be;
+        else
+        if (charset_ucs2le.probe_func(&charset_ucs2le, buf, size))
+            charset = &charset_ucs2le;
+        else
+        if (charset_ucs2be.probe_func(&charset_ucs2be, buf, size))
+            charset = &charset_ucs2be;
+        else
+            charset = &charset_8859_1;
+        /* CG: should distinguish charset_8859_1, charset_raw and
+         * charset_auto */
+    }
+    return charset;
+#else
+    /* detect the charset. Actually only UTF8 is detected */
     int i, l, c, has_utf8, has_binary;
 
     has_utf8 = 0;
@@ -1211,6 +1514,7 @@
 #endif
     /* XXX: should use a state variable for default charset */
     return &charset_utf8;
+#endif
 }
 
 /********************************************************/

Index: charsetmore.c
===================================================================
RCS file: /sources/qemacs/qemacs/charsetmore.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -b -r1.18 -r1.19
--- charsetmore.c       1 Mar 2014 21:57:28 -0000       1.18
+++ charsetmore.c       1 Mar 2014 22:37:26 -0000       1.19
@@ -48,6 +48,7 @@
 static QECharset charset_8859_2 = {
     "8859-2",
     "ISO-8859-2|iso-ir-101|latin2|l2|iso-latin2",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -90,6 +91,7 @@
 static QECharset charset_8859_3 = {
     "8859-3",
     "ISO-8859-3",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -132,6 +134,7 @@
 static QECharset charset_8859_4 = {
     "8859-4",
     "ISO-8859-4|iso-ir-110|latin4|l4|iso-latin4",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -174,6 +177,7 @@
 static QECharset charset_8859_5 = {
     "8859-5",
     "ISO-8859-5",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -213,6 +217,7 @@
 static QECharset charset_8859_6 = {
     "8859-6",
     "ISO-8859-6",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -255,6 +260,7 @@
 static QECharset charset_8859_7 = {
     "8859-7",
     "ISO-8859-7|iso-ir-126|elot-928",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -296,6 +302,7 @@
 static QECharset charset_8859_8 = {
     "8859-8",
     "ISO-8859-8",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -332,6 +339,7 @@
 static QECharset charset_8859_9 = {
     "8859-9",
     "ISO-8859-9",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -374,6 +382,7 @@
 static QECharset charset_8859_10 = {
     "8859-10",
     "ISO-8859-10",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -416,6 +425,7 @@
 static QECharset charset_8859_11 = {
     "8859-11",
     "ISO-8859-11",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -458,6 +468,7 @@
 static QECharset charset_8859_13 = {
     "8859-13",
     "ISO-8859-13|iso-ir-179|latin7|l7|iso-latin7",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -500,6 +511,7 @@
 static QECharset charset_8859_14 = {
     "8859-14",
     "ISO-8859-14",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -534,6 +546,7 @@
 static QECharset charset_8859_15 = {
     "8859-15",
     "ISO-8859-15|latin9|l9|latin0|l0",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -576,6 +589,7 @@
 static QECharset charset_8859_16 = {
     "8859-16",
     "ISO-8859-16|latin10|l10",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -622,6 +636,7 @@
 static QECharset charset_cp437 = {
     "cp437",
     "437",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -668,6 +683,7 @@
 static QECharset charset_cp737 = {
     "cp737",
     "737",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -714,6 +730,7 @@
 static QECharset charset_cp850 = {
     "cp850",
     "850",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -760,6 +777,7 @@
 static QECharset charset_cp852 = {
     "cp852",
     "852",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -806,6 +824,7 @@
 static QECharset charset_cp866 = {
     "cp866",
     "866",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -852,6 +871,7 @@
 static QECharset charset_cp1125 = {
     "cp1125",
     "x-cp866-u|ruscii|1125",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -898,6 +918,7 @@
 static QECharset charset_cp1250 = {
     "cp1250",
     "windows-1250|1250",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -944,6 +965,7 @@
 static QECharset charset_cp1251 = {
     "cp1251",
     "windows-1251|1251",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -978,6 +1000,7 @@
 static QECharset charset_cp1252 = {
     "cp1252",
     "windows-1252|1252",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1024,6 +1047,7 @@
 static QECharset charset_cp1256 = {
     "cp1256",
     "windows-1256|1256",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1070,6 +1094,7 @@
 static QECharset charset_cp1257 = {
     "cp1257",
     "windows-1257|1257",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1116,6 +1141,7 @@
 static QECharset charset_mac_latin2 = {
     "mac-latin2",
     "x-mac-ce|mac-ce|macce|10029",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1162,6 +1188,7 @@
 QECharset charset_mac_roman = {
     "mac-roman",
     "x-mac|mac",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1208,6 +1235,7 @@
 static QECharset charset_kamen = {
     "kamen",
     "x-kam-cs|kam|867|869",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1254,6 +1282,7 @@
 static QECharset charset_koi8_r = {
     "koi8-r",
     "",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1300,6 +1329,7 @@
 static QECharset charset_koi8_u = {
     "koi8-u",
     "",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1362,6 +1392,7 @@
 static QECharset charset_tcvn = {
     "tcvn",
     "TCVN-5712|TCVN|TCVN-0|TCVN-1",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1424,6 +1455,7 @@
 static QECharset charset_viscii = {
     "viscii",
     "VISCII|VISCII-1|VISCII 1.1|VISCII-1.1|VISCII1.1-1",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1486,6 +1518,7 @@
 static QECharset charset_cp037 = {
     "cp037",
     "037|ebcdic-037",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1548,6 +1581,7 @@
 static QECharset charset_cp424 = {
     "cp424",
     "424|ebcdic-424",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1610,6 +1644,7 @@
 static QECharset charset_cp500 = {
     "cp500",
     "500|ebcdic-500",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1672,6 +1707,7 @@
 static QECharset charset_cp875 = {
     "cp875",
     "875|ebcdic-875",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1734,6 +1770,7 @@
 static QECharset charset_cp1026 = {
     "cp1026",
     "1026|ebcdic-1026",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,
@@ -1780,6 +1817,7 @@
 static QECharset charset_atarist = {
     "atarist",
     "atari-st",
+    NULL,
     decode_8bit_init,
     decode_8bit,
     encode_8bit,

Index: kmaptoqe.c
===================================================================
RCS file: /sources/qemacs/qemacs/kmaptoqe.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- kmaptoqe.c  20 Apr 2008 14:18:18 -0000      1.11
+++ kmaptoqe.c  1 Mar 2014 22:37:26 -0000       1.12
@@ -2,7 +2,7 @@
  * Convert Yudit kmap files to QEmacs binary internal format
  *
  * Copyright (c) 2002 Fabrice Bellard.
- * Copyright (c) 2007-2008 Charlie Gordon.
+ * Copyright (c) 2007-2014 Charlie Gordon.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -40,6 +40,7 @@
     int len;
     unsigned short output[20];
     int olen;
+    int order;
 } InputEntry;
 
 static InputEntry inputs[NB_MAX];
@@ -55,6 +56,7 @@
 static FILE *outfile;
 static unsigned char outbuf[100000], *outbuf_ptr;
 
+/* sort function implements strict order, making qsort stable */
 static int sort_func(const void *a1, const void *b1)
 {
     const InputEntry *a = a1;
@@ -66,7 +68,10 @@
         if (val != 0)
             return val;
     }
-    return a->output[0] - b->output[0];
+    val = a->output[0] - b->output[0];
+    if (val != 0)
+        return val;
+    return a->order - b->order;
 }
 
 
@@ -496,8 +501,9 @@
     InputEntry *ip;
 
     if (argc < 3) {
-        printf("usage: kmaptoqe outfile kmaps...\n"
-               "Convert yudit keyboard maps to qemacs compressed format\n");
+        printf("kmaptoqe -- Convert yudit keyboard maps to qemacs compressed 
format\n"
+               "usage: kmaptoqe outfile kmaps...\n"
+               "       kmaptoqe --dump outfile\n");
         exit(1);
     }
     if (!strcmp(argv[1], "--dump")) {
@@ -588,6 +594,7 @@
                 if (*p == '"')
                     break;
             }
+            ip->order = ip - inputs;
             ip++;
             nb_inputs++;
             continue;

Index: ligtoqe.c
===================================================================
RCS file: /sources/qemacs/qemacs/ligtoqe.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -b -r1.9 -r1.10
--- ligtoqe.c   20 Apr 2008 14:18:19 -0000      1.9
+++ ligtoqe.c   1 Mar 2014 22:37:26 -0000       1.10
@@ -92,9 +92,13 @@
     int val;
 
     val = a->buf_in[0] - b->buf_in[0];
-    if (val == 0 &&
-        a->buf_in_size >= 2 && b->buf_in_size >= 2) {
+    if (val == 0
+    &&  a->buf_in_size >= 2 && b->buf_in_size >= 2) {
         val = a->buf_in[1] - b->buf_in[1];
+        if (val == 0
+        &&  a->buf_in_size >= 3 && b->buf_in_size >= 3) {
+            val = a->buf_in[2] - b->buf_in[2];
+        }
     }
     return val;
 }

Index: charsetjis.c
===================================================================
RCS file: /sources/qemacs/qemacs/charsetjis.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- charsetjis.c        2 May 2008 17:05:39 -0000       1.6
+++ charsetjis.c        1 Mar 2014 22:37:26 -0000       1.7
@@ -145,6 +145,7 @@
 static QECharset charset_euc_jp = {
     "euc-jp",
     NULL,
+    NULL,
     decode_euc_jp_init,
     decode_euc_jp_func,
     encode_euc_jp,
@@ -224,6 +225,7 @@
 static QECharset charset_sjis = {
     "sjis",
     NULL,
+    NULL,
     decode_sjis_init,
     decode_sjis_func,
     encode_sjis,

Index: cptoqe.c
===================================================================
RCS file: /sources/qemacs/qemacs/cptoqe.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- cptoqe.c    1 Mar 2014 21:57:29 -0000       1.16
+++ cptoqe.c    1 Mar 2014 22:37:26 -0000       1.17
@@ -231,7 +231,8 @@
     }
     printf("\",\n");
 
-    printf("    decode_8bit_init,\n"
+    printf("    NULL,\n"
+           "    decode_8bit_init,\n"
            "    decode_8bit,\n"
            "    encode_8bit,\n"
            "    charset_get_pos_8bit,\n"

Index: ligatures
===================================================================
RCS file: /sources/qemacs/qemacs/ligatures,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -b -r1.1.1.1 -r1.2
Binary files /tmp/cvsNd7gZL and /tmp/cvsQrNYdn differ

Index: qe.h
===================================================================
RCS file: /sources/qemacs/qemacs/qe.h,v
retrieving revision 1.142
retrieving revision 1.143
diff -u -b -r1.142 -r1.143
--- qe.h        10 Feb 2014 21:27:53 -0000      1.142
+++ qe.h        1 Mar 2014 22:37:26 -0000       1.143
@@ -485,6 +485,7 @@
 struct QECharset {
     const char *name;
     const char *aliases;
+    int (*probe_func)(QECharset *charset, const u8 *buf, int size);
     void (*decode_init)(CharsetDecodeState *s);
     int (*decode_func)(CharsetDecodeState *s);
     /* return NULL if cannot encode. Currently no state since speed is
[Prev in Thread]
Current Thread
[Next in Thread]
[Qemacs-commit] qemacs charset.c charsetmore.c kmaptoqe.c ligto..., Charlie Gordon <=
Prev by Date: [Qemacs-commit] qemacs charsetmore.c cptoqe.c cp/kamen.cp
Next by Date: [Qemacs-commit] qemacs makemode.c script.c
Previous by thread: [Qemacs-commit] qemacs charsetmore.c cptoqe.c cp/kamen.cp
Next by thread: [Qemacs-commit] qemacs makemode.c script.c
Index(es):
- Date
- Thread