[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#24603: [PATCHv5 03/11] Add support for title-casing letters (bug#246
From: |
Michal Nazarewicz |
Subject: |
bug#24603: [PATCHv5 03/11] Add support for title-casing letters (bug#24603) |
Date: |
Thu, 9 Mar 2017 22:51:42 +0100 |
* src/casefiddle.c (struct casing_context, prepare_casing_context): Add
titlecase_char_table member. It’s set to the ‘titlecase’ Unicode
property table if capitalisation has been requested.
(case_character): Make use of the titlecase_char_table to title-case
initial characters when capitalising.
* test/src/casefiddle-tests.el (casefiddle-tests--characters,
casefiddle-tests-casing): Update test cases which are now passing.
---
etc/NEWS | 2 +-
src/casefiddle.c | 27 +++++++++++++++++++++------
test/src/casefiddle-tests.el | 39 ++++++++++++++++++++++++++-------------
3 files changed, 48 insertions(+), 20 deletions(-)
diff --git a/etc/NEWS b/etc/NEWS
index 32137a79da6..715764accf1 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -351,7 +351,7 @@ same as in modes where the character is not whitespace.
Instead of only checking the modification time, Emacs now also checks
the file's actual content before prompting the user.
-** Title case characters are properly converted to upper case.
+** Title case characters are properly cased (from and into).
'upcase', 'upcase-region' et al. convert title case characters (such
as the single character "Dz") into their upper case form (such as "DZ").
As a downside, 'capitalize' and 'upcase-initials' produce awkward
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 8129d376a5a..01e35194e0e 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -33,6 +33,10 @@ enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE,
CASE_CAPITALIZE_UP};
/* State for casing individual characters. */
struct casing_context {
+ /* A char-table with title-case character mappings or nil. It being non-nil
+ implies flag being CASE_CAPITALIZE or CASE_CAPITALIZE_UP (but the reverse
+ is not true). */
+ Lisp_Object titlecase_char_table;
/* User-requested action. */
enum case_action flag;
/* If true, function operates on a buffer as opposed to a string or
character.
@@ -54,6 +58,9 @@ prepare_casing_context (struct casing_context *ctx,
ctx->flag = flag;
ctx->inbuffer = inbuffer;
ctx->inword = flag == CASE_DOWN;
+ ctx->titlecase_char_table = (int)flag >= (int)CASE_CAPITALIZE
+ ? uniprop_table (intern_c_string ("titlecase"))
+ : Qnil;
/* If the case table is flagged as modified, rescan it. */
if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
@@ -68,10 +75,16 @@ prepare_casing_context (struct casing_context *ctx,
static int
case_character (struct casing_context *ctx, int ch)
{
+ Lisp_Object prop;
+
if (ctx->inword)
ch = ctx->flag == CASE_CAPITALIZE_UP ? ch : downcase (ch);
+ else if (!NILP (ctx->titlecase_char_table) &&
+ CHARACTERP (prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
+ ch = XFASTINT (prop);
else
ch = upcase(ch);
+
if ((int) ctx->flag >= (int) CASE_CAPITALIZE)
ctx->inword = SYNTAX (ch) == Sword &&
(!ctx->inbuffer || ctx->inword || !syntax_prefix_flag_p (ch));
@@ -199,8 +212,8 @@ The argument object is not altered--the value is a copy.
*/)
DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
doc: /* Convert argument to capitalized form and return that.
-This means that each word's first character is upper case
-and the rest is lower case.
+This means that each word's first character is upper case (more
+precisely, if available, title case) and the rest is lower case.
The argument may be a character or string. The result has the same type.
The argument object is not altered--the value is a copy. */)
(Lisp_Object obj)
@@ -212,7 +225,8 @@ The argument object is not altered--the value is a copy.
*/)
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
doc: /* Convert the initial of each word in the argument to upper case.
-Do not change the other letters of each word.
+(More precisely, if available, initial of each word is converted to
+title-case). Do not change the other letters of each word.
The argument may be a character or string. The result has the same type.
The argument object is not altered--the value is a copy. */)
(Lisp_Object obj)
@@ -376,8 +390,8 @@ point and the mark is operated on. */)
DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
doc: /* Convert the region to capitalized form.
-Capitalized form means each word's first character is upper case
-and the rest of it is lower case.
+Capitalized form means each word's first character is upper case (more
+precisely, if available, title case) and the rest of it is lower case.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
(Lisp_Object beg, Lisp_Object end)
@@ -391,7 +405,8 @@ character positions to operate on. */)
DEFUN ("upcase-initials-region", Fupcase_initials_region,
Supcase_initials_region, 2, 2, "r",
doc: /* Upcase the initial of each word in the region.
-Subsequent letters of each word are not changed.
+(More precisely, if available, initial of each word is converted to
+title-case). Subsequent letters of each word are not changed.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
(Lisp_Object beg, Lisp_Object end)
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el
index 152d85de006..e83cb00059b 100644
--- a/test/src/casefiddle-tests.el
+++ b/test/src/casefiddle-tests.el
@@ -63,13 +63,9 @@ casefiddle-tests--characters
(?Ł ?Ł ?ł ?Ł)
(?ł ?Ł ?ł ?Ł)
- ;; FIXME(bug#24603): Commented ones are what we want.
- ;;(?DŽ ?DŽ ?dž ?Dž)
- (?DŽ ?DŽ ?dž ?DŽ)
- ;;(?Dž ?DŽ ?dž ?Dž)
- (?Dž ?DŽ ?dž ?DŽ)
- ;;(?dž ?DŽ ?dž ?Dž)
- (?dž ?DŽ ?dž ?DŽ)
+ (?DŽ ?DŽ ?dž ?Dž)
+ (?Dž ?DŽ ?dž ?Dž)
+ (?dž ?DŽ ?dž ?Dž)
(?Σ ?Σ ?σ ?Σ)
(?σ ?Σ ?σ ?Σ)
@@ -186,19 +182,19 @@ casefiddle-tests--test-casing
;; input upper lower capitalize up-initials
'(("Foo baR" "FOO BAR" "foo bar" "Foo Bar" "Foo BaR")
("Ⅷ ⅷ" "Ⅷ Ⅷ" "ⅷ ⅷ" "Ⅷ Ⅷ" "Ⅷ Ⅷ")
+ ;; "DžUNGLA" is an unfortunate result but it’s really best we can
+ ;; do while still being consistent. Hopefully, users only ever
+ ;; use upcase-initials on camelCase identifiers not real words.
+ ("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
+ ("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
+ ("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
;; FIXME(bug#24603): Everything below is broken at the moment.
;; Here’s what should happen:
- ;;("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
- ;;("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
- ;;("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
;;("define" "DEFINE" "define" "Define" "Define")
;;("fish" "FIsh" "fish" "Fish" "Fish")
;;("Straße" "STRASSE" "straße" "Straße" "Straße")
;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")
;; And here’s what is actually happening:
- ("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA")
- ("Džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
- ("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
("define" "DEfiNE" "define" "Define" "Define")
("fish" "fiSH" "fish" "fish" "fish")
("Straße" "STRAßE" "straße" "Straße" "Straße")
@@ -243,4 +239,21 @@ casefiddle-tests--test-casing
"\xef\xff\xef Zażółć GĘŚlą \xcf\xcf")))))))
+(ert-deftest casefiddle-tests-char-casing ()
+ ;; input upcase downcase [titlecase]
+ (dolist (test '((?a ?A ?a) (?A ?A ?a)
+ (?ł ?Ł ?ł) (?Ł ?Ł ?ł)
+ (?ß ?ß ?ß) (?ẞ ?ẞ ?ß)
+ (?ⅷ ?Ⅷ ?ⅷ) (?Ⅷ ?Ⅷ ?ⅷ)
+ (?DŽ ?DŽ ?dž ?Dž) (?Dž ?DŽ ?dž ?Dž) (?dž ?DŽ ?dž ?Dž)))
+ (let ((ch (car test))
+ (up (nth 1 test))
+ (lo (nth 2 test))
+ (tc (or (nth 3 test) (nth 1 test))))
+ (should (eq up (upcase ch)))
+ (should (eq lo (downcase ch)))
+ (should (eq tc (capitalize ch)))
+ (should (eq tc (upcase-initials ch))))))
+
+
;;; casefiddle-tests.el ends here
--
2.12.0.246.ga2ecc84866-goog
- bug#24603: [PATCHv5 00/11] Casing improvements, Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 03/11] Add support for title-casing letters (bug#24603),
Michal Nazarewicz <=
- bug#24603: [PATCHv5 06/11] Implement special sigma casing rule (bug#24603), Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 04/11] Split up casify_region function (bug#24603), Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 07/11] Introduce ‘buffer-language’ buffer-locar variable, Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 02/11] Introduce case_character function, Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 01/11] Split casify_object into multiple functions, Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 10/11] Implement casing rules for Lithuanian (bug#24603), Michal Nazarewicz, 2017/03/09
- bug#24603: [PATCHv5 08/11] Implement rules for title-casing Dutch ij ‘letter’ (bug#24603), Michal Nazarewicz, 2017/03/09