[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemacs-commit] qemacs charset.c
From: |
Charlie Gordon |
Subject: |
[Qemacs-commit] qemacs charset.c |
Date: |
Sun, 06 Mar 2016 17:31:52 +0000 |
CVSROOT: /sources/qemacs
Module name: qemacs
Changes by: Charlie Gordon <chqrlie> 16/03/06 17:31:52
Modified files:
. : charset.c
Log message:
charset: fixed scanning bug for DOS eol modes
- goto_char had infinite loops for EOL_DOS in 8, 16 and 32 bits modes.
- this bug caused qemacs to hang on searches on DOS/Windows files.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/qemacs/charset.c?cvsroot=qemacs&r1=1.42&r2=1.43
Patches:
Index: charset.c
===================================================================
RCS file: /sources/qemacs/qemacs/charset.c,v
retrieving revision 1.42
retrieving revision 1.43
diff -u -b -r1.42 -r1.43
--- charset.c 16 Sep 2015 22:18:23 -0000 1.42
+++ charset.c 6 Mar 2016 17:31:44 -0000 1.43
@@ -529,22 +529,22 @@
nb_chars = 0;
buf_ptr = buf;
buf_end = buf_ptr + size;
- while (buf_ptr < buf_end) {
+ for (; buf_ptr < buf_end; buf_ptr++) {
c = *buf_ptr;
+ if (c >= 0x80 && c < 0xc0) {
+ /* Test done here to skip initial trailing bytes if any */
+ continue;
+ }
if (c == '\n' && s->eol_type == EOL_DOS) {
/* ignore \n in EOL_DOS scan, but count \r.
* see comment above.
*/
continue;
}
- if (c < 0x80 || c >= 0xc0) {
- /* Test done here to skip initial trailing bytes if any */
if (nb_chars >= pos)
break;
nb_chars++;
}
- buf_ptr++;
- }
return buf_ptr - buf;
}
@@ -761,17 +761,17 @@
const u8 *buf, int size)
{
/* XXX: should handle surrogates */
- int nb_skip;
+ int count = size >> 1; /* convert byte count to char16 count */
const uint16_t *buf_end, *buf_ptr;
uint16_t nl;
union { uint16_t n; char c[2]; } u;
if (s->eol_type != EOL_DOS)
- return size >> 1;
+ return count;
- nb_skip = 0;
buf_ptr = (const uint16_t *)(const void *)buf;
- buf_end = buf_ptr + (size >> 1);
+ buf_end = buf_ptr + count;
+ // XXX: undefined behavior
u.n = 0;
u.c[s->charset == &charset_ucs2be] = '\n';
nl = u.n;
@@ -779,10 +779,10 @@
while (buf_ptr < buf_end) {
if (*buf_ptr++ == nl) {
/* ignore \n in EOL_DOS scan, but count \r. (see above) */
- nb_skip++;
+ count--;
}
}
- return (size >> 1) - nb_skip;
+ return count;
}
static int charset_goto_char_ucs2(CharsetDecodeState *s,
@@ -800,11 +800,12 @@
nb_chars = 0;
buf_ptr = (const uint16_t *)(const void *)buf;
buf_end = buf_ptr + (size >> 1);
+ // XXX: undefined behavior
u.n = 0;
u.c[s->charset == &charset_ucs2be] = '\n';
nl = u.n;
- while (buf_ptr < buf_end) {
+ for (; buf_ptr < buf_end; buf_ptr++) {
if (*buf_ptr == nl) {
/* ignore \n in EOL_DOS scan, but count \r. (see above) */
continue;
@@ -812,7 +813,6 @@
if (nb_chars >= pos)
break;
nb_chars++;
- buf_ptr++;
}
return (const u8*)buf_ptr - buf;
}
@@ -1034,17 +1034,17 @@
static int charset_get_chars_ucs4(CharsetDecodeState *s,
const u8 *buf, int size)
{
- int nb_skip;
+ int count = size >> 2; /* convert byte count to char32 count */
const uint32_t *buf_end, *buf_ptr;
uint32_t nl;
union { uint32_t n; char c[4]; } u;
if (s->eol_type != EOL_DOS)
- return size >> 2;
+ return count;
- nb_skip = 0;
buf_ptr = (const uint32_t *)(const void *)buf;
- buf_end = buf_ptr + (size >> 2);
+ buf_end = buf_ptr + count;
+ // XXX: undefined behavior
u.n = 0;
u.c[(s->charset == &charset_ucs4be) * 3] = '\n';
nl = u.n;
@@ -1052,10 +1052,10 @@
while (buf_ptr < buf_end) {
if (*buf_ptr++ == nl) {
/* ignore \n in EOL_DOS scan, but count \r. (see above) */
- nb_skip++;
+ count--;
}
}
- return (size >> 2) - nb_skip;
+ return count;
}
static int charset_goto_char_ucs4(CharsetDecodeState *s,
@@ -1072,11 +1072,12 @@
nb_chars = 0;
buf_ptr = (const uint32_t *)(const void *)buf;
buf_end = buf_ptr + (size >> 2);
+ // XXX: undefined behavior
u.n = 0;
u.c[(s->charset == &charset_ucs4be) * 3] = '\n';
nl = u.n;
- while (buf_ptr < buf_end) {
+ for (; buf_ptr < buf_end; buf_ptr++) {
if (*buf_ptr == nl) {
/* ignore \n in EOL_DOS scan, but count \r. (see above) */
continue;
@@ -1084,7 +1085,6 @@
if (nb_chars >= pos)
break;
nb_chars++;
- buf_ptr++;
}
return (const u8*)buf_ptr - buf;
}
@@ -1661,22 +1661,21 @@
int charset_get_chars_8bit(CharsetDecodeState *s,
const u8 *buf, int size)
{
- int nb_skip;
+ int count = size;
const u8 *buf_end, *buf_ptr;
if (s->eol_type != EOL_DOS)
- return size;
+ return count;
- nb_skip = 0;
buf_ptr = buf;
- buf_end = buf_ptr + size;
+ buf_end = buf_ptr + count;
while (buf_ptr < buf_end) {
if (*buf_ptr++ == '\n') {
/* ignore \n in EOL_DOS scan, but count \r. (see above) */
- nb_skip++;
+ count--;
}
}
- return size - nb_skip;
+ return count;
}
int charset_goto_char_8bit(CharsetDecodeState *s,
@@ -1691,7 +1690,7 @@
nb_chars = 0;
buf_ptr = buf;
buf_end = buf_ptr + size;
- while (buf_ptr < buf_end) {
+ for (; buf_ptr < buf_end; buf_ptr++) {
if (*buf_ptr == '\n') {
/* ignore \n in EOL_DOS scan, but count \r. */
continue;
@@ -1699,7 +1698,6 @@
if (nb_chars >= pos)
break;
nb_chars++;
- buf_ptr++;
}
return buf_ptr - buf;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Qemacs-commit] qemacs charset.c,
Charlie Gordon <=