[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Fwd: Gawk FIELDWIDTHS and multibyte characters] --- patch
From: |
Steffen Schuler |
Subject: |
Re: [Fwd: Gawk FIELDWIDTHS and multibyte characters] --- patch |
Date: |
Mon, 17 Mar 2008 20:52:41 +0100 |
Hi,
here you have a tested patch for the multibyte character bug at FIELDWITHS:
Regards,
Steffen Schuler
-------------------8<-------------------------------------------------------------------
--- gawk-3.1.6a/field.c 2007-09-22 23:34:34.000000000 +0200
+++ gawk-3.1.6a.fix/field.c 2008-03-17 07:45:03.000000000 +0100
@@ -700,16 +700,52 @@
register char *scan = *buf;
register long nf = parse_high_water;
register char *end = scan + len;
+#ifdef MBS_SUPPORT
+ int nmbc;
+ size_t mbclen;
+ size_t mbslen;
+ size_t lenrest;
+ char *mbscan;
+ mbstate_t mbs;
+ if (gawk_mb_cur_max > 1)
+ memset(&mbs, 0, sizeof(mbstate_t));
+#endif
if (up_to == UNLIMITED)
nf = 0;
if (len == 0)
return nf;
for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
- if (len > end - scan)
- len = end - scan;
- (*set)(++nf, scan, (long) len, n);
- scan += len;
+#ifdef MBS_SUPPORT
+ if (gawk_mb_cur_max > 1) {
+ nmbc = 0;
+ mbslen = 0;
+ mbscan = scan;
+ lenrest = end - scan;
+ while (nmbc < len && mbslen < lenrest) {
+ mbclen = 0;
+ mbclen = mbrlen(mbscan, end - mbscan, &mbs);
+ if ((mbclen == 1) || (mbclen == (size_t) -1)
+ || (mbclen == (size_t) -2) || (mbclen == 0)) {
+ /* We treat it as a singlebyte character. */
+ mbclen = 1;
+ }
+ if (mbclen <= end - mbscan) {
+ mbscan += mbclen;
+ mbslen += mbclen;
+ ++nmbc;
+ }
+ }
+ (*set)(++nf, scan, (long) mbslen, n);
+ scan += mbslen;
+ } else
+#endif
+ {
+ if (len > end - scan)
+ len = end - scan;
+ (*set)(++nf, scan, (long) len, n);
+ scan += len;
+ }
}
if (len == -1)
*buf = end;
- Re: [Fwd: Gawk FIELDWIDTHS and multibyte characters] --- patch,
Steffen Schuler <=