bug-libunistring
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-libunistring] [PATCH 5/8] Update to Unicode 6.1.0


From: Daiki Ueno
Subject: [bug-libunistring] [PATCH 5/8] Update to Unicode 6.1.0
Date: Fri, 10 Oct 2014 22:59:50 +0900

* lib/unictype/joininggroup_byname.gperf: Add Rohingya Yeh
joining group name.
* lib/unictype/joininggroup_name.h: Likewise.

* lib/unilbrk/lbrktables.h (LBP_HL): New enumeration value.

* lib/gen-uni-tables.c (UC_JOINING_GROUP_ROHINGYA_YEH): New
enumeration value.
(fill_arabicshaping, joining_group_as_c_identifier): Support
UC_JOINING_GROUP_ROHINGYA_YEH.
(is_property_default_ignorable_code_point): Reject U+0604.
(LBP_HL): New enumeration value.
(get_lbp, debug_output_lbp, fill_org_lbp, debug_output_org_lbp)
(output_lbp): Support LBP_HL.
(fill_org_lbp): Resolve CJ as NS, for backward compatibility.
---
 lib/gen-uni-tables.c                   | 24 +++++++++++++++++++-----
 lib/unictype.in.h                      |  3 ++-
 lib/unictype/joininggroup_byname.gperf |  2 ++
 lib/unictype/joininggroup_name.h       |  1 +
 lib/unilbrk/lbrktables.h               |  2 ++
 5 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index 3747875..16af39f 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -32,7 +32,7 @@
                       /usr/local/share/Unidata/CompositionExclusions.txt \
                       /usr/local/share/Unidata/SpecialCasing.txt \
                       /usr/local/share/Unidata/CaseFolding.txt \
-                      6.0.0
+                      6.1.0
  */
 
 #include <stdbool.h>
@@ -2868,7 +2868,7 @@ is_property_default_ignorable_code_point (unsigned int ch)
   bool result1 =
     (is_category_Cf (ch)
      && !(ch >= 0xFFF9 && ch <= 0xFFFB) /* Annotations */
-     && !((ch >= 0x0600 && ch <= 0x0603) || ch == 0x06DD || ch == 0x070F)
+     && !((ch >= 0x0600 && ch <= 0x0604) || ch == 0x06DD || ch == 0x070F)
      /* For some reason, the following are not listed as having property
         Default_Ignorable_Code_Point.  */
      && !(ch == 0x110BD))
@@ -3746,7 +3746,8 @@ enum
   UC_JOINING_GROUP_YUDH,                  /* Yudh */
   UC_JOINING_GROUP_YUDH_HE,               /* Yudh_He */
   UC_JOINING_GROUP_ZAIN,                  /* Zain */
-  UC_JOINING_GROUP_ZHAIN                  /* Zhain */
+  UC_JOINING_GROUP_ZHAIN,                 /* Zhain */
+  UC_JOINING_GROUP_ROHINGYA_YEH           /* Rohingya_Yeh */
 };
 
 static uint8_t unicode_joining_group[0x110000];
@@ -3886,6 +3887,7 @@ fill_arabicshaping (const char *arabicshaping_filename)
       TRY(UC_JOINING_GROUP_YUDH_HE,               "YUDH HE")
       TRY(UC_JOINING_GROUP_ZAIN,                  "ZAIN")
       TRY(UC_JOINING_GROUP_ZHAIN,                 "ZHAIN")
+      TRY(UC_JOINING_GROUP_ROHINGYA_YEH,          "ROHINGYA YEH")
 #undef TRY
       else
         {
@@ -3987,7 +3989,7 @@ output_joining_type (const char *filename, const char 
*version)
     }
 
   fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Arabic joining group of Unicode characters.  */\n");
+  fprintf (stream, "/* Arabic joining type of Unicode characters.  */\n");
   fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode 
%s.  */\n",
            version);
 
@@ -4167,6 +4169,7 @@ joining_group_as_c_identifier (int joining_group)
   TRY(UC_JOINING_GROUP_YUDH_HE)
   TRY(UC_JOINING_GROUP_ZAIN)
   TRY(UC_JOINING_GROUP_ZHAIN)
+  TRY(UC_JOINING_GROUP_ROHINGYA_YEH)
 #undef TRY
   abort ();
 }
@@ -6238,8 +6241,10 @@ enum
   LBP_SY = 17, /* symbols allowing breaks */
   LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */
   LBP_AL = 18, /* ordinary alphabetic and symbol characters */
+/*LBP_CJ,         conditional Japanese starter, resolved to NS */
   LBP_H2 = 19, /* Hangul LV syllable */
   LBP_H3 = 20, /* Hangul LVT syllable */
+  LBP_HL = 33, /* Hebrew letter */
   LBP_ID = 21, /* ideographic */
   LBP_JL = 22, /* Hangul L Jamo */
   LBP_JV = 23, /* Hangul V Jamo */
@@ -6690,6 +6695,10 @@ get_lbp (unsigned int ch)
       if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
         attr |= (int64_t) 1 << LBP_H3;
 
+      if ((ch >= 0x05D0 && ch <= 0x05F2) || ch == 0xFB1D
+          || (ch >= 0xFB1F && ch <= 0xFB28) || (ch >= 0xFB2A && ch <= 0xFB4F))
+        attr |= (int64_t) 1 << LBP_HL;
+
       if ((ch >= 0x1100 && ch <= 0x115F) || (ch >= 0xA960 && ch <= 0xA97C))
         attr |= (int64_t) 1 << LBP_JL;
 
@@ -6851,7 +6860,7 @@ get_lbp (unsigned int ch)
           || ch == 0x2064 /* INVISIBLE PLUS */
           /* Extra characters for compatibility with Unicode LineBreak.txt.  */
           || ch == 0x110BD /* KAITHI NUMBER SIGN */)
-        if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | 
((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | 
((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | 
((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | 
((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | 
((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | 
((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | 
((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) | 
((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID))))
+        if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | 
((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | 
((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | 
((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | 
((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | 
((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | 
((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | 
((int64_t) 1 << LBP_HL) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | 
((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID))))
           {
             /* ambiguous (alphabetic) ? */
             if ((unicode_width[ch] != NULL
@@ -6971,6 +6980,7 @@ debug_output_lbp (FILE *stream)
           PRINT_BIT(attr,LBP_AL);
           PRINT_BIT(attr,LBP_H2);
           PRINT_BIT(attr,LBP_H3);
+          PRINT_BIT(attr,LBP_HL);
           PRINT_BIT(attr,LBP_ID);
           PRINT_BIT(attr,LBP_JL);
           PRINT_BIT(attr,LBP_JV);
@@ -7085,6 +7095,7 @@ fill_org_lbp (const char *linebreak_filename)
       TRY(LBP_AL)
       TRY(LBP_H2)
       TRY(LBP_H3)
+      TRY(LBP_HL)
       TRY(LBP_ID)
       TRY(LBP_JL)
       TRY(LBP_JV)
@@ -7096,6 +7107,7 @@ fill_org_lbp (const char *linebreak_filename)
       else if (strcmp (field1, "CR") == 0) value = LBP_BK;
       else if (strcmp (field1, "NL") == 0) value = LBP_BK;
       else if (strcmp (field1, "SG") == 0) value = LBP_XX;
+      else if (strcmp (field1, "CJ") == 0) value = LBP_NS;
       else
         {
           fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
@@ -7165,6 +7177,7 @@ debug_output_org_lbp (FILE *stream)
           PRINT_BIT(attr,LBP_AL);
           PRINT_BIT(attr,LBP_H2);
           PRINT_BIT(attr,LBP_H3);
+          PRINT_BIT(attr,LBP_HL);
           PRINT_BIT(attr,LBP_ID);
           PRINT_BIT(attr,LBP_JL);
           PRINT_BIT(attr,LBP_JV);
@@ -7338,6 +7351,7 @@ output_lbp (FILE *stream1, FILE *stream2)
           CASE(LBP_AL);
           CASE(LBP_H2);
           CASE(LBP_H3);
+          CASE(LBP_HL);
           CASE(LBP_ID);
           CASE(LBP_JL);
           CASE(LBP_JV);
diff --git a/lib/unictype.in.h b/lib/unictype.in.h
index 5125e96..30c71aa 100644
--- a/lib/unictype.in.h
+++ b/lib/unictype.in.h
@@ -518,7 +518,8 @@ enum
   UC_JOINING_GROUP_YUDH,                  /* Yudh */
   UC_JOINING_GROUP_YUDH_HE,               /* Yudh_He */
   UC_JOINING_GROUP_ZAIN,                  /* Zain */
-  UC_JOINING_GROUP_ZHAIN                  /* Zhain */
+  UC_JOINING_GROUP_ZHAIN,                 /* Zhain */
+  UC_JOINING_GROUP_ROHINGYA_YEH           /* Rohingya_Yeh */
 };
 
 /* Return the name of a joining group.  */
diff --git a/lib/unictype/joininggroup_byname.gperf 
b/lib/unictype/joininggroup_byname.gperf
index bc2fbc8..90be16e 100644
--- a/lib/unictype/joininggroup_byname.gperf
+++ b/lib/unictype/joininggroup_byname.gperf
@@ -83,3 +83,5 @@ Yudh He, UC_JOINING_GROUP_YUDH_HE
 YudhHe, UC_JOINING_GROUP_YUDH_HE
 Zain, UC_JOINING_GROUP_ZAIN
 Zhain, UC_JOINING_GROUP_ZHAIN
+Rohingya Yeh, UC_JOINING_GROUP_ROHINGYA_YEH
+RohingyaYeh, UC_JOINING_GROUP_ROHINGYA_YEH
diff --git a/lib/unictype/joininggroup_name.h b/lib/unictype/joininggroup_name.h
index 78d4a10..681f1a5 100644
--- a/lib/unictype/joininggroup_name.h
+++ b/lib/unictype/joininggroup_name.h
@@ -72,3 +72,4 @@ ELEM (YUDH,                  "Yudh")
 ELEM (YUDH_HE,               "Yudh He")
 ELEM (ZAIN,                  "Zain")
 ELEM (ZHAIN,                 "Zhain")
+ELEM (ROHINGYA_YEH,          "Rohingya Yeh")
diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h
index e651d71..9014573 100644
--- a/lib/unilbrk/lbrktables.h
+++ b/lib/unilbrk/lbrktables.h
@@ -51,8 +51,10 @@ enum
   LBP_SY = 17, /* symbols allowing breaks */
   LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */
   LBP_AL = 18, /* ordinary alphabetic and symbol characters */
+/*LBP_CJ,         conditional Japanese starters, resolved to NS */
   LBP_H2 = 19, /* Hangul LV syllable */
   LBP_H3 = 20, /* Hangul LVT syllable */
+  LBP_HL = 33, /* Hebrew letter */
   LBP_ID = 21, /* ideographic */
   LBP_JL = 22, /* Hangul L Jamo */
   LBP_JV = 23, /* Hangul V Jamo */
-- 
2.1.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]