[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] emacs/src coding.c
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] emacs/src coding.c |
Date: |
Wed, 14 Jan 2009 12:17:53 +0000 |
CVSROOT: /cvsroot/emacs
Module name: emacs
Changes by: Kenichi Handa <handa> 09/01/14 12:17:53
Modified files:
src : coding.c
Log message:
(TWO_MORE_BYTES): New macro.
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
ONE_MORE_BYTE.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/emacs/src/coding.c?cvsroot=emacs&r1=1.406&r2=1.407
Patches:
Index: coding.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/coding.c,v
retrieving revision 1.406
retrieving revision 1.407
diff -u -b -r1.406 -r1.407
--- coding.c 10 Jan 2009 10:40:45 -0000 1.406
+++ coding.c 14 Jan 2009 12:17:52 -0000 1.407
@@ -743,6 +743,47 @@
consumed_chars++; \
} while (0)
+/* Safely get two bytes from the source text pointed by SRC which ends
+ at SRC_END, and set C1 and C2 to those bytes. If there are not
+ enough bytes in the source for C1, it jumps to `no_more_source'.
+ If there are not enough bytes in the source for C2, set C2 to -1.
+ If multibytep is nonzero and a multibyte character is found at SRC,
+ set C1 and/or C2 to the negative value of the character code. The
+ caller should declare and set these variables appropriately in
+ advance:
+ src, src_end, multibytep
+ It is intended that this macro is used in detect_coding_utf_16. */
+
+#define TWO_MORE_BYTES(c1, c2) \
+ do { \
+ if (src == src_end) \
+ goto no_more_source; \
+ c1 = *src++; \
+ if (multibytep && (c1 & 0x80)) \
+ { \
+ if ((c1 & 0xFE) == 0xC0) \
+ c1 = ((c1 & 1) << 6) | *src++; \
+ else \
+ { \
+ c1 = c2 = -1; \
+ break; \
+ } \
+ } \
+ if (src == src_end) \
+ c2 = -1; \
+ else \
+ { \
+ c2 = *src++; \
+ if (multibytep && (c2 & 0x80)) \
+ { \
+ if ((c2 & 0xFE) == 0xC0) \
+ c2 = ((c2 & 1) << 6) | *src++; \
+ else \
+ c2 = -1; \
+ } \
+ } \
+ } while (0)
+
#define ONE_MORE_BYTE_NO_CHECK(c) \
do { \
@@ -1575,8 +1616,7 @@
return 0;
}
- ONE_MORE_BYTE (c1);
- ONE_MORE_BYTE (c2);
+ TWO_MORE_BYTES (c1, c2);
if ((c1 == 0xFF) && (c2 == 0xFE))
{
detect_info->found |= (CATEGORY_MASK_UTF_16_LE
@@ -1593,6 +1633,11 @@
| CATEGORY_MASK_UTF_16_BE_NOSIG
| CATEGORY_MASK_UTF_16_LE_NOSIG);
}
+ else if (c1 < 0 || c2 < 0)
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_16;
+ return 0;
+ }
else
{
/* We check the dispersion of Eth and Oth bytes where E is even and
@@ -1610,8 +1655,9 @@
while (1)
{
- ONE_MORE_BYTE (c1);
- ONE_MORE_BYTE (c2);
+ TWO_MORE_BYTES (c1, c2);
+ if (c1 < 0 || c2 < 0)
+ break;
if (! e[c1])
{
e[c1] = 1;