bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Fwd: Gawk FIELDWIDTHS and multibyte characters] --- patch


From: Steffen Schuler
Subject: Re: [Fwd: Gawk FIELDWIDTHS and multibyte characters] --- patch
Date: Mon, 17 Mar 2008 20:52:41 +0100

Hi,

here you have a tested patch for the multibyte character bug at FIELDWITHS:

Regards,

Steffen Schuler

-------------------8<-------------------------------------------------------------------
--- gawk-3.1.6a/field.c    2007-09-22 23:34:34.000000000 +0200
+++ gawk-3.1.6a.fix/field.c    2008-03-17 07:45:03.000000000 +0100
@@ -700,16 +700,52 @@
     register char *scan = *buf;
     register long nf = parse_high_water;
     register char *end = scan + len;
+#ifdef MBS_SUPPORT
+    int nmbc;
+    size_t mbclen;
+    size_t mbslen;
+    size_t lenrest;
+    char *mbscan;
+    mbstate_t mbs;
+    if (gawk_mb_cur_max > 1)
+        memset(&mbs, 0, sizeof(mbstate_t));
+#endif

     if (up_to == UNLIMITED)
         nf = 0;
     if (len == 0)
         return nf;
     for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
-        if (len > end - scan)
-            len = end - scan;
-        (*set)(++nf, scan, (long) len, n);
-        scan += len;
+#ifdef MBS_SUPPORT
+        if (gawk_mb_cur_max > 1) {
+                nmbc = 0;
+            mbslen = 0;
+            mbscan = scan;
+            lenrest = end - scan;
+            while (nmbc < len && mbslen < lenrest) {
+                mbclen = 0;
+                mbclen = mbrlen(mbscan, end - mbscan, &mbs);
+                if ((mbclen == 1) || (mbclen == (size_t) -1)
+                        || (mbclen == (size_t) -2) || (mbclen == 0)) {
+                    /* We treat it as a singlebyte character.  */
+                    mbclen = 1;
+                }
+                if (mbclen <= end - mbscan) {
+                    mbscan += mbclen;
+                    mbslen += mbclen;
+                    ++nmbc;
+                }
+            }
+            (*set)(++nf, scan, (long) mbslen, n);
+            scan += mbslen;
+                } else
+#endif
+        {
+            if (len > end - scan)
+                len = end - scan;
+            (*set)(++nf, scan, (long) len, n);
+            scan += len;
+        }
     }
     if (len == -1)
         *buf = end;


reply via email to

[Prev in Thread] Current Thread [Next in Thread]