classpath
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Patch for the collator


From: Guilhem Lavaux
Subject: Patch for the collator
Date: Sat, 29 May 2004 19:41:00 +0200
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.6) Gecko/20040115

Hi,

Here is the "missing" patch to make classpath pass all mauve tests concerning RuleBasedCollator/CollationElementIterator. It principally fixes the behaviour of the comparator for ignorable characters. Previously, it was only marking them as to be discarded during the parsing. Now they are correctly returned by by CollationElementIterator.next() but ignored by compare().

Cheers,

Guilhem.

ChangeLog entry:
2004-05-29  Guilhem Lavaux <address@hidden>

        * java/text/RuleBasedCollator.java
        (CollationElement.CollationElement): Removed unused constructor.
        New argument for the other one.
        (subParseString): Mark the element as ignorable but do not erase
        them from the database.
        (compare): Take into account the ignore field for ignorable
        elements.
        (buildCollationVector): Use the ignore field now.
        * java/text/CollationElementIterator.java
        (setText): Keep ignorable characters.

Index: java/text/RuleBasedCollator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/RuleBasedCollator.java,v
retrieving revision 1.24
diff -u -b -B -r1.24 RuleBasedCollator.java
--- java/text/RuleBasedCollator.java    20 May 2004 10:34:33 -0000      1.24
+++ java/text/RuleBasedCollator.java    29 May 2004 17:28:32 -0000
@@ -158,23 +158,17 @@
     String expansion;
 
     CollationElement(String key, int primary, short secondary, short tertiary,
-                    short equality, String expansion)
+                    short equality, String expansion, boolean ignore)
     {
       this.key = key;
       this.primary = primary;
       this.secondary = secondary;
       this.tertiary = tertiary;
       this.equality = equality;
-      this.ignore = false;
+      this.ignore = ignore;
       this.expansion = expansion;
     }
     
-    CollationElement(String key)
-    {
-      this.key = key;
-      this.ignore = true;
-    }
-
     final int getValue()
     {
       return (primary << 16) + (secondary << 8) + tertiary;
@@ -196,13 +190,13 @@
     static final int GREATERT = 2;
     static final int EQUAL = 3;
     static final int RESET = 4;
-    static final int IGNORE = 5;
-    static final int INVERSE_SECONDARY = 6;
+    static final int INVERSE_SECONDARY = 5;
     
     int comparisonType;
     String textElement;
     int hashText;
     int offset;
+    boolean ignore;
 
     String expansionOrdering;
   }
@@ -250,7 +244,7 @@
    */
   static final CollationElement SPECIAL_UNKNOWN_SEQ = 
     new CollationElement("", (short) 32767, (short) 0, (short) 0,
-                        (short) 0, null);
+                        (short) 0, null, false);
   
   /**
    * This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -447,26 +441,16 @@
          throw new ParseException
            ("Modifier '!' is not yet supported by Classpath", i+base_offset);
        case '<':
-         ignoreChars = false;
          type = CollationSorter.GREATERP;
          break;
        case ';':
-         if (!ignoreChars)
            type = CollationSorter.GREATERS;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case ',':
-         if (!ignoreChars)
            type = CollationSorter.GREATERT;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case '=':
-         if (!ignoreChars)
            type = CollationSorter.EQUAL;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case '\'':
          eatingChars = !eatingChars;
@@ -549,10 +533,14 @@
 
        CollationSorter sorter = new CollationSorter();
        
+       if (operator == CollationSorter.GREATERP)
+         ignoreChars = false;
+
        sorter.comparisonType = operator;
        sorter.textElement = sb.toString();
        sorter.hashText = sorter.textElement.hashCode();
        sorter.offset = base_offset+rules.length();
+       sorter.ignore = ignoreChars;
        sb.setLength(0);
 
        v.add(sorter);
@@ -568,10 +556,14 @@
            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
          throw new ParseException("text element empty at " + pos, pos);
 
+       if (operator == CollationSorter.GREATERP)
+         ignoreChars = false;
+
        sorter.comparisonType = operator;
        sorter.textElement = sb.toString();
        sorter.hashText = sorter.textElement.hashCode();
        sorter.offset = base_offset+pos;
+       sorter.ignore = ignoreChars;
        v.add(sorter);
       }
 
@@ -674,8 +666,6 @@
              last_tertiary_seq = tertiary_seq;
            equality_seq = 0;
            break;
-         case CollationSorter.IGNORE:
-           ignoreChar = true;
          case CollationSorter.EQUAL:
            equality_seq++;
            break;
@@ -687,18 +677,9 @@
              ("Invalid unknown state '" + elt.comparisonType + "'", 
elt.offset);
          }
 
-       CollationElement e;
-
-       if (!ignoreChar)
-         {
-           e = new CollationElement(elt.textElement, primary_seq,
+       v.add(new CollationElement(elt.textElement, primary_seq,
                                     secondary_seq, tertiary_seq,
-                                    equality_seq, elt.expansionOrdering);
-         }
-       else
-         e = new CollationElement(elt.textElement);
-
-       v.add(e);
+                                  equality_seq, elt.expansionOrdering, 
elt.ignore));
       }
 
     this.inverseAccentComparison = inverseComparisons; 
@@ -741,17 +722,45 @@
   public int compare(String source, String target)
   {
     CollationElementIterator cs, ct;
+    CollationElement ord1block = null;
+    CollationElement ord2block = null;
+    boolean advance_block_1 = true;
+    boolean advance_block_2 = true;
 
     cs = getCollationElementIterator(source);
     ct = getCollationElementIterator(target);
 
     for(;;)
       {
-        CollationElement ord1block = cs.nextBlock(); 
-        CollationElement ord2block = ct.nextBlock(); 
        int ord1;
        int ord2;
 
+       /*
+        * We have to check whether the characters are ignorable.
+        * If it is the case then forget them. 
+        */
+       if (advance_block_1)
+         {
+           ord1block = cs.nextBlock();
+           if (ord1block != null && ord1block.ignore)
+             continue;
+         }
+       
+       if (advance_block_2)
+         {
+           ord2block = ct.nextBlock();
+           if (ord2block != null && ord2block.ignore)
+             {
+               advance_block_1 = false;
+               continue;
+             }
+        }
+       else
+         advance_block_2 = true;
+
+       if (!advance_block_1)
+         advance_block_1 = true;
+
        if (ord1block != null)
          ord1 = ord1block.getValue();
        else
@@ -781,12 +790,12 @@
        
        if (prim1 == 0 && getStrength() < TERTIARY)
          {
-           ct.previousBlock();
+            advance_block_2 = false;
            continue;
          }
        else if (prim2 == 0 && getStrength() < TERTIARY)
          {
-           cs.previousBlock();
+           advance_block_1 = false;
            continue;
          }
 
@@ -861,7 +870,7 @@
     else
       v = (short) c;
     return new CollationElement("" + c, last_primary_value + v,
-                               (short) 0, (short) 0, (short) 0, null);
+                               (short) 0, (short) 0, (short) 0, null, false);
   }
 
   /**
@@ -883,7 +892,7 @@
     else
       v = (short) c;
     return new CollationElement("" + c, (short) 0,
-                               (short) 0, (short) (last_tertiary_value + v), 
(short) 0, null);
+                               (short) 0, (short) (last_tertiary_value + v), 
(short) 0, null, false);
   }
 
   /**
@@ -945,6 +954,13 @@
 
     while (ord != CollationElementIterator.NULLORDER)
       {
+       // If the primary order is null, it means this is an ignorable
+       // character.
+       if (CollationElementIterator.primaryOrder(ord) == 0)
+         {
+            ord = cei.next();
+           continue;
+         }
         switch (getStrength())
           {
             case PRIMARY:
Index: java/text/CollationElementIterator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/CollationElementIterator.java,v
retrieving revision 1.17
diff -u -b -B -r1.17 CollationElementIterator.java
--- java/text/CollationElementIterator.java     20 May 2004 10:34:33 -0000      
1.17
+++ java/text/CollationElementIterator.java     29 May 2004 17:28:32 -0000
@@ -357,11 +357,8 @@
            /* Third case: the simplest. We have got the prefix and it
             * has not to be expanded.
             */
-           if (!prefix.ignore)
-             {
                v.add (prefix);
                vi.add (new Integer(idx_idx));
-             }
            idx += prefix.key.length();
            /* If the sequence is in an expansion, we must decrease the
             * counter.

reply via email to

[Prev in Thread] Current Thread [Next in Thread]