diff --git a/src/fmt.c b/src/fmt.c index 89d13a6..56f7c0b 100644 --- a/src/fmt.c +++ b/src/fmt.c @@ -20,6 +20,7 @@ #include #include #include +#include /* Redefine. Otherwise, systems (Unicos for one) with headers that define it to be a type get syntax errors for the variable declaration below. */ @@ -135,6 +136,7 @@ struct Word const char *text; /* the text of the word */ int length; /* length of this word */ + int width; int space; /* the size of the following space */ unsigned int paren:1; /* starts with open paren */ unsigned int period:1; /* ends in [.?!])* */ @@ -259,6 +261,42 @@ static int next_prefix_indent; paragraphs chosen by fmt_paragraph(). */ static int last_line_length; +static size_t +get_display_width (const char *beg, const char *end) +{ + const char *ptr; + size_t r = 0; + mbstate_t ps; + + memset (&ps, 0, sizeof (ps)); + + for (ptr = beg; *ptr && ptr < end; ) + { + wchar_t wc; + size_t s; + + s = mbrtowc (&wc, ptr, end - ptr, &ps); + if (s == (size_t) -1) + break; + if (s == (size_t) -2) + { + ptr++; + r++; + continue; + } + if (wc == '\e' && ptr + 3 < end + && ptr[1] == '[' && (ptr[2] == '0' || ptr[2] == '1') + && ptr[3] == 'm') + { + ptr += 4; + continue; + } + r += wcwidth (wc); + ptr += s; + } + return r; +} + void usage (int status) { @@ -669,7 +707,9 @@ get_line (FILE *f, int c) c = getc (f); } while (c != EOF && !isspace (c)); - in_column += word_limit->length = wptr - word_limit->text; + word_limit->length = wptr - word_limit->text; + in_column += word_limit->width = get_display_width (word_limit->text, + wptr); check_punctuation (word_limit); /* Scan inter-word space. */ @@ -871,13 +911,13 @@ fmt_paragraph (void) if (w == word_limit) break; - len += (w - 1)->space + w->length; /* w > start >= word */ + len += (w - 1)->space + w->width; /* w > start >= word */ } while (len < max_width); start->best_cost = best + base_cost (start); } - word_limit->length = saved_length; + word_limit->width = saved_length; } /* Return the constant component of the cost of breaking before the @@ -902,13 +942,13 @@ base_cost (WORD *this) else if ((this - 1)->punct) cost -= PUNCT_BONUS; else if (this > word + 1 && (this - 2)->final) - cost += WIDOW_COST ((this - 1)->length); + cost += WIDOW_COST ((this - 1)->width); } if (this->paren) cost -= PAREN_BONUS; else if (this->final) - cost += ORPHAN_COST (this->length); + cost += ORPHAN_COST (this->width); return cost; } @@ -983,7 +1023,7 @@ put_word (WORD *w) s = w->text; for (n = w->length; n != 0; n--) putchar (*s++); - out_column += w->length; + out_column += w->width; } /* Output to stdout SPACE spaces, or equivalent tabs. */