[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#7960: [PATCH] fmt: fix formatting multibyte text (bug #7372)
From: |
Kostya Stopani |
Subject: |
bug#7960: [PATCH] fmt: fix formatting multibyte text (bug #7372) |
Date: |
Wed, 2 Feb 2011 17:17:12 +0300 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
>From b118695b7b614f5f0e371cad885a01306f527d9e Mon Sep 17 00:00:00 2001
From: Kostya Stopani <address@hidden>
Date: Wed, 2 Feb 2011 17:10:05 +0300
Subject: [PATCH] fmt: fix formatting multibyte text (bug #7372)
* src/fmt.c (guess_screen_width): Add function to compute screen width
of a possibly multibyte word to correctly format international
text. If it's not multibyte fall back to byte length.
* src/fmt.c (mbsnrtowcs): Stub function partly implementing a GNU
extenstion function of the same name for non-GNU platforms.
* src/fmt.c (struct Word): Add a new field "nchar" to hold byte-length
of "text".
* src/fmt.c (get_line, check_punctuation, put_word): Use Word.length
as screen width of a word and Word.nchar as byte-length.
---
src/fmt.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/src/fmt.c b/src/fmt.c
index 7d5aee3..1dcbaaf 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -20,6 +20,10 @@
#include <stdio.h>
#include <sys/types.h>
#include <getopt.h>
+#include <wchar.h>
+#include <string.h>
+#include <errno.h>
+
/* Redefine. Otherwise, systems (Unicos for one) with headers that define
it to be a type get syntax errors for the variable declaration below. */
@@ -135,6 +139,7 @@ struct Word
const char *text; /* the text of the word */
int length; /* length of this word */
+ int nchar; /* number of char entries in text array */
int space; /* the size of the following space */
unsigned int paren:1; /* starts with open paren */
unsigned int period:1; /* ends in [.?!])* */
@@ -167,6 +172,11 @@ static void put_paragraph (WORD *finish);
static void put_line (WORD *w, int indent);
static void put_word (WORD *w);
static void put_space (int space);
+static unsigned int guess_screen_width (const char *text, size_t b);
+#ifndef __GNU_LIBRARY__
+static size_t mbsnrtowcs (wchar_t *DST, const char **SRC, size_t NMC, size_t
LEN,
+ mbstate_t *restrict PS);
+#endif /* __GNU_LIBRARY__ */
/* Option values. */
@@ -670,7 +680,10 @@ get_line (FILE *f, int c)
c = getc (f);
}
while (c != EOF && !isspace (c));
- in_column += word_limit->length = wptr - word_limit->text;
+ word_limit->nchar = wptr - word_limit->text;
+ word_limit->length = guess_screen_width (word_limit->text,
word_limit->nchar);
+ in_column += word_limit->length;
+
check_punctuation (word_limit);
/* Scan inter-word space. */
@@ -751,7 +764,7 @@ static void
check_punctuation (WORD *w)
{
char const *start = w->text;
- char const *finish = start + (w->length - 1);
+ char const *finish = start + (w->nchar - 1);
unsigned char fin = *finish;
w->paren = isopen (*start);
@@ -982,7 +995,7 @@ put_word (WORD *w)
int n;
s = w->text;
- for (n = w->length; n != 0; n--)
+ for (n = w->nchar; n != 0; n--)
putchar (*s++);
out_column += w->length;
}
@@ -1011,3 +1024,75 @@ put_space (int space)
out_column++;
}
}
+
+/* Try to convert text to multibyte and in this way determine its
+ screen width. Return number of bytes if conversion fails. */
+
+static unsigned int
+guess_screen_width (const char *text, size_t b)
+{
+ size_t c;
+ mbstate_t state;
+
+ memset (&state, 0, sizeof (state));
+
+ /* Try conversion */
+ c = mbsnrtowcs (NULL, &text, b, b, &state);
+ if (c > 0 && errno != EILSEQ)
+ return c;
+ else
+ return b;
+}
+
+
+#ifndef __GNU_LIBRARY__
+
+#define INITBUFSIZE 1024
+#define MAXBUFSIZE 1024*1024
+
+/* Stub mbsnrtowcs to be used when GNU extensions are unavailable. */
+
+size_t mbsnrtowcs (wchar_t *DST, const char **SRC, size_t NMC, size_t LEN,
+ mbstate_t *restrict PS)
+{
+ static char *buf = NULL;
+ static size_t buf_size = INITBUFSIZE;
+ char *new_buf;
+ size_t new_buf_size, c;
+ mbstate_t state;
+
+ if (!buf)
+ {
+ buf = malloc (buf_size * sizeof (char));
+ if (!buf) return NMC;
+ }
+
+ memset (&state, 0, sizeof (state));
+
+ if (buf_size < NMC + 1)
+ {
+ /* Try to resize the buffer. */
+ new_buf_size = NMC + 1;
+ if (new_buf_size <= MAXBUFSIZE)
+ {
+ new_buf = realloc (buf, new_buf_size * sizeof (char));
+ if (!new_buf) return NMC;
+ buf = new_buf;
+ buf_size = new_buf_size;
+ }
+ else
+ {
+ return NMC;
+ }
+ }
+
+ strncpy (buf, *SRC, NMC);
+ buf[NMC] = '\0';
+ new_buf = buf;
+ c = mbsrtowcs (NULL, (const char **restrict) &new_buf, NMC, &state);
+ if (c > 0 && errno != EILSEQ)
+ return c;
+ else
+ return NMC;
+}
+#endif /* __GNU_LIBRARY__ */
--
1.7.0.4
- bug#7960: [PATCH] fmt: fix formatting multibyte text (bug #7372),
Kostya Stopani <=