classpath
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

java.util.HashSet


From: Eric Blake
Subject: java.util.HashSet
Date: Fri, 07 Sep 2001 23:08:07 -0600

I was just looking at the code for HashSet tonight, and noticed that it
just uses HashMap as its backing store.  This may be the way that Sun
does it, according to their javadoc, but I don't think the
implementation is a necessary detail; just the API and behavior.  And it
seems to me that making a HashMap of (Object, java.lang.Boolean.TRUE)
pairs is a waste of memory and processor time, when the set could be
implemented directly.

Does anyone see why this patch should not be applied?

2001-09-07  Eric Blake  <address@hidden>
        * java/util/HashSet.java: implement the set directly, instead
        of with a backing HashMap

-- 
This signature intentionally left boring.

Eric Blake             address@hidden
  BYU student, free software programmer
Index: java/util/HashSet.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/HashSet.java,v
retrieving revision 1.8
diff -u -r1.8 HashSet.java
--- java/util/HashSet.java      2001/02/18 15:39:58     1.8
+++ java/util/HashSet.java      2001/09/08 05:01:12
@@ -1,5 +1,5 @@
-/* HashSet.java -- a class providing a HashMap-backet Set
-   Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+/* HashSet.java -- a class providing a hash-based Set
+   Copyright (C) 1998, 1999, 2001 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -32,187 +32,480 @@
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 
+// NOTE: This implementation is very similar to that of Hashtable and
+// HashMap. If you fix a bug in here, chances are you should make a
+// similar change in those locations.
+
 /**
- * This class provides a HashMap-backed implementation of the 
- * Set interface.
+ * This class provides a hashing implementation of the Set interface.
+ * While the Sun implentation simply uses a backing HashMap, this
+ * version is more memory efficient; but binary serial compatibility
+ * remains intact.
+ * <p>
  *
- * Each element in the Set is a key in the backing HashMap; each key
- * maps to a static token, denoting that the key does, in fact, exist.
+ * The set is not synchronized; if multiple threads must handle this
+ * set, wrap it with address@hidden Collections#synchronizedSet(Set)}.  
Meanwhile,
+ * the iterator is <em>failfast</em>, so that modifications performed
+ * while an iterator is in operation throw a
+ * address@hidden ConcurrentModificationException}.
+ * <p>
  *
  * Most operations are O(1), assuming no hash collisions.  In the worst
- * case (where all hases collide), operations are O(n).
+ * case (where all hashes collide), operations are O(n). Setting the
+ * initial capacity too low will force many resizing operations, but
+ * setting the initial capacity too high (or loadfactor too low) leads
+ * to wasted memory and slower iteration.
+ * <p>
  *
  * HashSet is a part of the JDK1.2 Collections API.
  *
- * @author      Jon Zeppieri
+ * @author Jon Zeppieri
+ * @author Eric Blake <address@hidden>
+ * @since 1.2
  */
 public class HashSet extends AbstractSet
   implements Set, Cloneable, Serializable
 {
-  /** the HashMap which backs this Set */
-  transient HashMap map;
-  static final long serialVersionUID = -5024744406713321676L;
+  /**
+   * Default number of buckets.
+   */
+  private static final int DEFAULT_CAPACITY = 11;
+
+  /**
+   * Default load factor.
+   */
+  private static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+  /**
+   * compatible with JDK 1.2+
+   */
+  private static final long serialVersionUID = -5024744406713321676L;
+
+  /**
+   * The load factor of the set; if size() exceeds capacity * loadFactor,
+   * the set is rehashed
+   */
+  private transient float loadFactor = DEFAULT_LOAD_FACTOR;
+
+  /**
+   * The backing storage
+   */
+  private transient Entry[] buckets;
+
+  /** 
+   * counts the number of modifications this HashSet has undergone, used 
+   * by Iterators to know when to throw ConcurrentModificationExceptions.
+   */
+  private transient int modCount;
 
   /**
-   * construct a new, empty HashSet whose backing HashMap has the default 
-   * capacity and loadFacor
+   * the size of this HashSet:  denotes the number of elements in the set
    */
+  private transient int size;
+
+  /**
+   * Class to represent an entry in the set.
+   */
+  private static final class Entry
+  {
+    Entry next;
+
+    Object element;
+
+    Entry(Object element)
+    {
+      this.element = element;
+    }
+
+    protected Object clone()
+    {
+      Entry copy = new Entry(element);
+      if (next != null)
+        copy.next = (Entry) next.clone();
+      return copy;
+    }
+  }
+
+  /**
+   * construct a new, empty HashSet with default capacity (11) and
+   * loadFacor (0.75).
+   */
   public HashSet()
   {
-    map = new HashMap();
+    this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR);
   }
 
   /**
-   * construct a new, empty HashSet whose backing HashMap has the supplied
-   * capacity and the default load factor
+   * construct a new, empty HashSet with the supplied capacity and the
+   * default load factor (0.75)
    *
-   * @param          initialCapacity          the initial capacity of the 
backing
-   *                                          HashMap
+   * @param initialCapacity the initial capacity of the set
+   * @throws IllegalArgumentException if the capacity is negative
    */
   public HashSet(int initialCapacity)
   {
-    map = new HashMap(initialCapacity);
+    this(initialCapacity, DEFAULT_LOAD_FACTOR);
   }
 
   /**
-   * construct a new, empty HashSet whose backing HashMap has the supplied
-   * capacity and load factor
+   * Construct a new, empty HashSet with the supplied capacity and load
+   * factor.  The load factor determines when the set will be rehashed.
    *
-   * @param          initialCapacity          the initial capacity of the 
backing
-   *                                          HashMap
-   * @param          loadFactor               the load factor of the backing 
HashMap
+   * @param initialCapacity the initial capacity of the set
+   * @param loadFactor the load factor of the set
+   * @throws IllegalArgumentException if either argument is negative, or
+   *         if loadFactor is POSITIVE_INFINITY or NaN
    */
   public HashSet(int initialCapacity, float loadFactor)
   {
-    map = new HashMap(initialCapacity, loadFactor);
+    if (initialCapacity < 0)
+      throw new IllegalArgumentException("Illegal initial capacity: "
+                                         + initialCapacity);
+    if (loadFactor <= 0 || ! (Float.POSITIVE_INFINITY > loadFactor))
+      throw new IllegalArgumentException("Illegal load factor: " + loadFactor);
+
+    if (initialCapacity == 0)
+      initialCapacity = 1;
+    buckets = new Entry[initialCapacity];
+    this.loadFactor = loadFactor;
   }
 
   /**
    * construct a new HashSet with the same elements as are in the supplied
-   * collection (eliminating any duplicates, of course; the backing HashMap
-   * will have the default capacity and load factor
+   * collection (eliminating any duplicates, of course). The backing storage
+   * has twice the size of the collection, or the default size of 11,
+   * whichever is greater; and the default load factor (0.75).
    *
-   * @param          c          a collection containing the elements with
-   *                            which this set will be initialized
+   * @param c a collection containing the elements with which this set
+   *        will be initialized
+   * @throws NullPointerException if c is null
    */
   public HashSet(Collection c)
   {
-    map = new HashMap();
+    this(Math.max(2 * c.size(), DEFAULT_CAPACITY));
     addAll(c);
   }
 
   /**
-   * adds the given Object to the set if it is not already in the Set,
-   * returns true if teh element was added, false otherwise
+   * Adds the given Object to the set if it is not already in the Set.
+   * For this class, null may be added.
    *
-   * @param       o       the Object to add to this Set
+   * @param o the Object to add to this Set
+   * @return true if the set did not already contain the object
    */
   public boolean add(Object o)
   {
-    return (map.put(o, Boolean.TRUE) == null);
+    modCount++;
+    int idx = hash(o);
+    Entry e = buckets[idx];
+
+    while (e != null)
+      if (o == null ? e.element == null : o.equals(e.element))
+        // already in set
+        return false;
+      else
+        e = e.next;
+
+    // At this point, we know we need to add a new entry.
+    if (++size > (int) (loadFactor * buckets.length))
+      {
+        rehash();
+        // Need a new hash value to suit the bigger table.
+        idx = hash(o);
+      }
+
+    e = new Entry(o);
+    e.next = buckets[idx];
+    buckets[idx] = e;
+    return true;
   }
 
   /**
-   * empties this Set of all elements; this is a fast operation [O(1)]
+   * Empties this Set of all elements; this is a fast operation [O(1)]
    */
   public void clear()
   {
-    map.clear();
+    modCount++;
+    buckets = new Entry[buckets.length];
+    size = 0;
   }
 
   /**
-   * returns a shallow copy of this Set (the Set itself is cloned; its 
-   * elements are not)
+   * Returns a shallow copy of this Set (the Set itself is cloned; its 
+   * elements are not).
+   *
+   * @return a shallow clone of the set
    */
   public Object clone()
   {
-    HashSet copy = null;
     try
       {
-       copy = (HashSet) super.clone();
+        HashSet copy = (HashSet) super.clone();
+        copy.buckets = new Entry[buckets.length];
+        for (int i = 0; i < buckets.length; i++)
+          if (buckets[i] != null)
+            copy.buckets[i] = (Entry) buckets[i].clone();
+        return copy;
       }
     catch (CloneNotSupportedException x)
       {
+        // can't happen
+        return null;
       }
-    copy.map = (HashMap) map.clone();
-    return copy;
   }
 
   /**
-   * returns true if the supplied element is in this Set, false otherwise
+   * Returns true if the supplied element is in this Set.
    *
-   * @param        o         the Object whose presence in this Set we are 
testing for
+   * @param o the Object whose presence in this Set we are testing for
+   * @return true if in the set, false otherwise
    */
   public boolean contains(Object o)
   {
-    return map.containsKey(o);
+    Entry e = buckets[hash(o)];
+    while (e != null)
+      if (o == null ? e.element == null : o.equals(e.element))
+        return true;
+      else
+        e = e.next;
+    return false;
   }
 
   /** 
-   * returns true if this set has no elements in it (size() == 0)
+   * Returns true if this set has no elements in it.
+   *
+   * @return <code>size() == 0</code>.
    */
   public boolean isEmpty()
   {
-    return map.isEmpty();
+    return size == 0;
   }
 
   /**
-   * returns an Iterator over the elements of this Set; the Iterator allows
-   * removal of elements
+   * Returns an Iterator over the elements of this Set, which visits the
+   * elements in no particular order.  For this class, the Iterator allows
+   * removal of elements.
+   *
+   * @return a set iterator
+   * @see ConcurrentModificationException
    */
   public Iterator iterator()
   {
-    return map.keySet().iterator();
+    return new SetIterator();
   }
 
   /**
-   * removes the supplied Object from this Set if it is in the Set; returns
-   * true if an element was removed, false otherwise
+   * Removes the supplied Object from this Set if it is in the Set.
+   *
+   * @param o the object to remove
+   * @return true if an element was removed
    */
   public boolean remove(Object o)
   {
-    return (map.remove(o) != null);
+    modCount++;
+    int idx = hash(o);
+    Entry e = buckets[idx];
+    Entry last = null;
+
+    while (e != null)
+      {
+        if (o == null ? e.element == null : o.equals(e.element))
+          {
+            if (last == null)
+              buckets[idx] = e.next;
+            else
+              last.next = e.next;
+            size--;
+            return true;
+          }
+        last = e;
+        e = e.next;
+      }
+    return false;
   }
 
   /**
-   * returns the number of elements in this Set
+   * Returns the number of elements in this Set (its cardinality).
+   *
+   * @return the size of the set
    */
   public int size()
   {
-    return map.size();
+    return size;
   }
 
-  /** Serialize this Object in a manner which is binary-compatible with the 
-    * JDK */
+  /**
+   * Hashes an object.
+   *
+   * @param o the object to hash
+   * @return the index of the bucket it is in
+   */
+  private int hash(Object o)
+  {
+    return o == null ? 0 : Math.abs(o.hashCode() % buckets.length);
+  }  
+
+  /**
+   * Increases the capacity of the set, and rehashes all keys to new buckets.
+   * This is called when the addition of an element causes size > capacity *
+   * threshole. Existing Entry objects are reused.
+   */
+  private void rehash()
+  {
+    Entry[] oldBuckets = buckets;
+    buckets = new Entry[(oldBuckets.length * 2) + 1];
+
+    for (int i = 0; i < oldBuckets.length; i++)
+      {
+        Entry e = oldBuckets[i];
+        while (e != null)
+          {
+            int idx = hash(e.element);
+            Entry tmp = buckets[idx];
+            if (tmp != null)
+              {
+                while (tmp.next != null)
+                  tmp = tmp.next;
+                tmp.next = e;
+              }
+            else
+              buckets[idx] = e;
+
+            tmp = e.next;
+            e.next = null;
+            e = tmp;
+          }
+      }
+  }
+
+  /**
+   * Serialize this Object in a manner which is binary-compatible with the 
+   * JDK: int capacity, float loadFactor, int size, Object elements...
+   *
+   * @param s the write target
+   * @throws IOException if any underlying operation fails
+   */
   private void writeObject(ObjectOutputStream s) throws IOException
   {
+    s.writeInt(buckets.length);
+    s.writeFloat(loadFactor);
+    s.writeInt(size);
     Iterator it = iterator();
-    s.writeInt(map.buckets.length);
-    s.writeFloat(map.loadFactor);
-    s.writeInt(map.size);
     while (it.hasNext())
       s.writeObject(it.next());
   }
 
-  /** Deserialize this Object in a manner which is binary-compatible with 
-    * the JDK */
-  private void readObject(ObjectInputStream s) throws IOException,
-    ClassNotFoundException
+  /**
+   * Deserialize this Object in a manner which is binary-compatible with 
+   * the JDK. See address@hidden #writeObject(ObjectOutputStream)} for the 
format.
+   *
+   * @param s the read source
+   * @throws IOException if any underlying operation fails
+   * @throws ClassNotFoundException if any underlying operation does
+   */
+  private void readObject(ObjectInputStream s)
+    throws IOException, ClassNotFoundException
   {
-    int i, size, capacity;
-    float loadFactor;
-    Object element;
-
-    capacity = s.readInt();
+    buckets = new Entry[s.readInt()];
     loadFactor = s.readFloat();
-    size = s.readInt();
-
-    map = new HashMap(capacity, loadFactor);
-
-    for (i = 0; i < size; i++)
-      {
-       element = s.readObject();
-       map.put(element, Boolean.TRUE);
-      }
+    final int count = s.readInt();
+    for (int i = 0; i < count; i++)
+      add(s.readObject());
+  }
+
+
+  /**
+   * Iterates over a HashSet's entries.
+   * @author Eric Blake <address@hidden>
+   */
+  private final class SetIterator implements Iterator {
+    /**
+     * The number of modifications we know about.
+     */
+    private int knownMod = modCount;
+
+    /**
+     * The count of elements we have returned by address@hidden #next()}.
+     */
+    private int count;
+
+    /**
+     * The current index in the physical table.
+     */
+    private int idx;
+
+    /**
+     * The last Entry returned by address@hidden #next()}.
+     */
+    private Entry last;
+
+    /**
+     * The next entry to return, if there are multiple entries in this
+     * bucket, or null if we must visit the next bucket
+     */
+    private Entry next;
+
+    /**
+     * Checks if there are more elements to iterate over.
+     *
+     * @return true if there are more elements
+     * @throws ConcurrentModificationException if the set was modified by
+     *         anything but this iterator's address@hidden #delete()} since 
this
+     *         was created
+     */
+    public boolean hasNext()
+    {
+      if (knownMod != modCount)
+        throw new ConcurrentModificationException();
+      return count < size;
+    }
+
+    /**
+     * Returns the next element in the Iterator's sequential view
+     *
+     * @return the next set element
+     * @throws ConcurrentModificationException if the set was modified by
+     *         anything but this iterator's address@hidden #delete()} since 
this
+     *         was created
+     * @throws NoSuchElementException if the set has no more elements
+     */
+    public Object next()
+    {
+      if (knownMod != modCount)
+        throw new ConcurrentModificationException();
+      if (count == size)
+        throw new NoSuchElementException();
+      count++;
+      Entry e = next;
+
+      while (e == null)
+        e = buckets[idx++];
+
+      next = e.next;
+      last = e;
+      return e.element;
+    }
+
+    /**
+     * Removes the last element fetched with address@hidden #next()} from the 
set.
+     *
+     * @throws ConcurrentModificationException if the set was modified by
+     *         anything but this iterator's address@hidden #delete()} since 
this
+     *         was created
+     * @throws IllegalStateException if the was no last element, or the
+     *         last element was already removed
+     */
+    public void remove()
+    {
+      if (knownMod != modCount)
+        throw new ConcurrentModificationException();
+      if (last == null)
+        throw new IllegalStateException();
+      HashSet.this.remove(last.element);
+      knownMod++;
+      count--;
+      last = null;
+    }
   }
+
 }

reply via email to

[Prev in Thread] Current Thread [Next in Thread]