diff --git a/lisp/word-wrap.el b/lisp/word-wrap.el new file mode 100644 index 0000000..6d59a83 --- /dev/null +++ b/lisp/word-wrap.el @@ -0,0 +1,21 @@ +(define-minor-mode word-wrap-char-table-mode + "Toggle wrapping using a look-up to word-wrap-chars, globally. + +Currently, this allows word wrapping on the characters U+2000 to +U+200B in addition to the default of space and tap, when +`word-wrap' is set to t. + +(Provisional and unstable.) +" + :global t + :lighter "uws " + (if word-wrap-char-table-mode + (progn (setq word-wrap-chars (make-char-table nil nil)) + (set-char-table-range word-wrap-chars 9 t) + (set-char-table-range word-wrap-chars 32 t) + (set-char-table-range word-wrap-chars + '(8192 . 8203) t)) + (setq word-wrap-chars nil))) + +(provide 'word-wrap) + diff --git a/src/character.c b/src/character.c index c8ffa2b..af89a8b 100644 --- a/src/character.c +++ b/src/character.c @@ -1145,4 +1145,10 @@ All Unicode characters have one of the following values (symbol): See The Unicode Standard for the meaning of those values. */); /* The correct char-table is setup in characters.el. */ Vunicode_category_table = Qnil; + + DEFVAR_LISP ("word-wrap-chars", Vword_wrap_chars, + doc: /* A char-table for characters at which word-wrap occurs. +Such characters have value t in this table. +This is set up in ... */); + Vword_wrap_chars = Qnil; } diff --git a/src/xdisp.c b/src/xdisp.c index 7e47c06..4e8b045 100644 --- a/src/xdisp.c +++ b/src/xdisp.c @@ -348,20 +348,41 @@ static Lisp_Object list_of_error; #endif /* HAVE_WINDOW_SYSTEM */ /* Test if the display element loaded in IT, or the underlying buffer - or string character, is a space or a TAB character. This is used - to determine where word wrapping can occur. */ - -#define IT_DISPLAYING_WHITESPACE(it) \ - ((it->what == IT_CHARACTER && (it->c == ' ' || it->c == '\t')) \ - || ((STRINGP (it->string) \ - && (SREF (it->string, IT_STRING_BYTEPOS (*it)) == ' ' \ - || SREF (it->string, IT_STRING_BYTEPOS (*it)) == '\t')) \ - || (it->s \ - && (it->s[IT_BYTEPOS (*it)] == ' ' \ - || it->s[IT_BYTEPOS (*it)] == '\t')) \ - || (IT_BYTEPOS (*it) < ZV_BYTE \ - && (*BYTE_POS_ADDR (IT_BYTEPOS (*it)) == ' ' \ - || *BYTE_POS_ADDR (IT_BYTEPOS (*it)) == '\t')))) \ + or string character, is a space or tab (by default, to avoid the + unnecessary performance hit of char-table lookup). If + word-wrap-chars is a char-table, then instead check if the relevant + element or character belongs to the char-table. This is used to + determine where word wrapping can occur. */ + +static inline bool +char_is_whitespace_p (int c) { + return !NILP (CHAR_TABLE_REF (Vword_wrap_chars, c)); +} + +static inline bool +it_displaying_whitespace (struct it *it) { + if (!CHAR_TABLE_P (Vword_wrap_chars)) { + return ((it->what == IT_CHARACTER && (it->c == ' ' || it->c == '\t')) + || ((STRINGP (it->string) + && (SREF (it->string, IT_STRING_BYTEPOS (*it)) == ' ' + || SREF (it->string, IT_STRING_BYTEPOS (*it)) == '\t')) + || (it->s + && (it->s[IT_BYTEPOS (*it)] == ' ' + || it->s[IT_BYTEPOS (*it)] == '\t')) + || (IT_BYTEPOS (*it) < ZV_BYTE + && (*BYTE_POS_ADDR (IT_BYTEPOS (*it)) == ' ' + || *BYTE_POS_ADDR (IT_BYTEPOS (*it)) == '\t')))); + } else { + return ((it->what == IT_CHARACTER && char_is_whitespace_p (it->c)) + || (STRINGP (it->string) && char_is_whitespace_p + (STRING_CHAR + (SDATA (it->string) + IT_STRING_BYTEPOS (*it)))) + || (it->s && char_is_whitespace_p + (STRING_CHAR(it->s + IT_BYTEPOS (*it)))) + || (IT_BYTEPOS (*it) < ZV_BYTE && char_is_whitespace_p + (FETCH_CHAR (IT_BYTEPOS (*it))))); + } +} /* True means print newline to stdout before next mini-buffer message. */ @@ -8785,7 +8806,7 @@ move_it_in_display_line_to (struct it *it, { if (it->line_wrap == WORD_WRAP && it->area == TEXT_AREA) { - if (IT_DISPLAYING_WHITESPACE (it)) + if (it_displaying_whitespace (it)) may_wrap = true; else if (may_wrap) { @@ -8950,7 +8971,7 @@ move_it_in_display_line_to (struct it *it, SAVE_IT (tem_it, *it, tem_data); set_iterator_to_next (it, true); if (get_next_display_element (it) - && IT_DISPLAYING_WHITESPACE (it)) + && it_displaying_whitespace (it)) can_wrap = false; RESTORE_IT (it, &tem_it, tem_data); } @@ -9041,7 +9062,7 @@ move_it_in_display_line_to (struct it *it, wrapped in the middle of whitespace. Therefore, wrap_it _is_ relevant in that case. */ - && !(moved_forward && IT_DISPLAYING_WHITESPACE (it))) + && !(moved_forward && it_displaying_whitespace (it))) { /* If we've found TO_X, go back there, as we now know the last word fits on this screen line. */ @@ -21427,7 +21448,7 @@ display_line (struct it *it, int cursor_vpos) if (it->line_wrap == WORD_WRAP && it->area == TEXT_AREA) { - if (IT_DISPLAYING_WHITESPACE (it)) + if (it_displaying_whitespace (it)) may_wrap = true; else if (may_wrap) { @@ -21571,7 +21592,7 @@ display_line (struct it *it, int cursor_vpos) was a space or tab AND (ii) the current character is not. */ && (!may_wrap - || IT_DISPLAYING_WHITESPACE (it))) + || it_displaying_whitespace (it))) goto back_to_wrap; /* Record the maximum and minimum buffer @@ -21605,7 +21626,7 @@ display_line (struct it *it, int cursor_vpos) was a space or tab AND (ii) the current character is not. */ && (!may_wrap - || IT_DISPLAYING_WHITESPACE (it))) + || it_displaying_whitespace (it))) goto back_to_wrap; }