--- classpath-backup/java/nio/charset/CharsetDecoder.java 2003-09-20 11:40:54.000000000 +0100
+++ classpath/java/nio/charset/CharsetDecoder.java 2003-09-21 23:24:47.000000000 +0100
@@ -41,30 +41,198 @@
import java.nio.CharBuffer;
/**
+ * CharsetDecoder
provides facilities to decode a sequence of
+ * bytes (one or more address@hidden ByteBuffer}s) that are in a particular
+ * charset into a sequence of standard Java 16-bit Unicode characters (one or
+ * more address@hidden CharBuffer}s).
+ *
+ *
The input data are provided in a address@hidden ByteBuffer} or, by invoking + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} + * repeatedly and passing a different address@hidden ByteBuffer} as a parameter each + * time, the data can be provided in a sequence of address@hidden ByteBuffer}s. + * + *
The output data are written to a address@hidden CharBuffer} or, by invoking + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} + * repeatedly and altering the address@hidden CharBuffer} passed as a parameter each + * time, the data can be written to a sequence of address@hidden CharBuffer}s. + * + *
Methods are normally invoked on a CharsetDecoder
in a certain order:
+ *
+ *
1. address@hidden #reset()} - invoke if the CharsetDecoder
has
+ * been used before.
+ *
+ *
2. address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} + * - invoke repeatedly as long as there are address@hidden ByteBuffer}s to be decoded. + * + *
endOfInput
should be false
until the
+ * current invocation of
+ * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} is the
+ * last, i.e., pass true
only when there are no more
+ * data to decode after the current invocation.
+ *
+ *
Each invocation of + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} decodes + * as many bytes as possible, and returns a address@hidden CoderResult} object to + * describe the reason for returning. The invoker can examine this object to + * determine the reason and act accordingly, e.g., by filling the input + * address@hidden ByteBuffer}. + * + *
3. address@hidden #flush(CharBuffer out)} to ensure that the
+ * CharsetDecoder
writes all data to the output
+ * address@hidden CharBuffer}.
+ *
+ *
If the input address@hidden ByteBuffer} does not contain a valid sequence of + * bytes for the specified address@hidden Charset} the input is malformed + * (address@hidden CoderResult#isMalformed()}). + * + *
If the input address@hidden ByteBuffer} is valid, but there is no equivalent + * Unicode character to map part of it to, the input has an unmappable + * character (address@hidden CoderResult#isUnmappable()}). + * + *
The methods address@hidden #onMalformedInput(CodingErrorAction newAction)} and + * address@hidden #onUnmappableCharacter(CodingErrorAction newAction)} allow an + * action (address@hidden CodingErrorAction}) to be specified for the respective + * decoding errors. + * + *
The actions can be one of: + *
To implement a decoder for a specific charset, create a subclass of + * this class and implement the protected abstract method + * address@hidden #decodeLoop(ByteBuffer in, CharBuffer out)}. + * + *
To ensure that any internal state in a subclass is cleared correctly,
+ * also override address@hidden #flush(CharBuffer out)} and address@hidden #reset()}, but make
+ * sure that you invoke super.flush(out)
and
+ * super.reset()
in their respective methods.
+ *
+ * Instances of this class are NOT threadsafe.
+ *
* @author Jesse Rosenstock
+ * @author Ricky Clarkson
CharsetDecoder
has been reset.
+ *
+ * See address@hidden #state}.
+ */
private static final int STATE_RESET = 0;
+
+ /**
+ * Flag meaning that the CharsetDecoder
is currently decoding
+ * data.
+ *
+ *
See address@hidden #state}.
+ */
private static final int STATE_CODING = 1;
+
+ /**
+ * Flag meaning that the CharsetDecoder
has finished decoding
+ * data.
+ *
+ *
See address@hidden #state}.
+ */
private static final int STATE_END = 2;
+
+ /**
+ * Flag meaning that the CharsetDecoder
has been flushed.
+ *
+ *
See address@hidden #state}.
+ */
private static final int STATE_FLUSHED = 3;
+ /**
+ * The default value for address@hidden #replacement} before {link #replaceWith}
+ * is invoked.
+ */
private static final String DEFAULT_REPLACEMENT = "\uFFFD";
+ /**
+ * The address@hidden Charset} that instantiated this CharsetDecoder
.
+ */
private final Charset charset;
+
+ /**
+ * The expected average number of characters output per byte input.
+ *
+ *
For example, if 3 bytes correspond to 1 character, this value will be + * equal to 1.0f/3. + */ private final float averageCharsPerByte; + + /** + * The maximum number of characters output per byte. + * + *
For example, if between 2 and 5 bytes correspond to 1 character, this + * value will be 0.5f + */ private final float maxCharsPerByte; + + /** + * The address@hidden String} that will be inserted if a sequence of bytes is + * unmappable (see address@hidden CoderResult#isUnmappable()}). + * Set during construction and also by address@hidden #replaceWith()}. + */ + //Is there any reason that this cannot be set to DEFAULT_REPLACEMENT + //here, and maybe DEFAULT_REPLACEMENT removed? + //Maybe removing DEFAULT_REPLACEMENT would cause Serialization problems. private String replacement; + /** + * The current state of the decoder. One of: + *
CharsetDecoder
for the specified
+ * address@hidden Charset}.
+ *
+ * This is normally only invoked via address@hidden Charset#newDecoder()}. + * + *
The method address@hidden #averageCharsPerByte()} will return the value
+ * passed as the parameter averageCharsPerByte
.
+ *
+ *
The method address@hidden #maxCharsPerByte()} will return the value
+ * passed as the parameter maxCharsPerByte
.
+ *
+ *
The replacement address@hidden String} will be set to
+ * "\uFFFD".
+ * See address@hidden #replacement()}.
+ *
+ * @param cs the address@hidden Charset} that instantiated this
+ * CharsetDecoder
.
+ *
+ * @param averageCharsPerByte a positive value describing the expected
+ * average number of characters of output per byte of input.
+ *
+ * @param maxCharsPerByte a positive value describing the maximum
+ * number of characters of output per byte of input.
+ *
+ * @throws IllegalArgumentException if averageCharsPerByte
+ * or maxCharsPerByte
are less than or equal to 0.
+ */
protected CharsetDecoder (Charset cs, float averageCharsPerByte,
float maxCharsPerByte)
{
this (cs, averageCharsPerByte, maxCharsPerByte, DEFAULT_REPLACEMENT);
}
+ /**
+ * Returns the average number of characters output per byte of input.
+ *
+ *
For example, if 3 bytes correspond to 1 character, this value will be
+ * equal to 1.0f/3.
+ *
+ * @return the average number of characters output per byte of input.
+ */
public final float averageCharsPerByte ()
{
return averageCharsPerByte;
}
+ /**
+ * Returns the address@hidden Charset} that instantiated this
+ * CharsetDecoder
.
+ *
+ * @return the address@hidden Charset} that instantiated this
+ * CharsetDecoder
.
+ */
public final Charset charset ()
{
return charset;
}
+ /**
+ * Decodes the input address@hidden ByteBuffer}, allocates a address@hidden CharBuffer}
+ * for output and places the decoded characters in the output
+ * address@hidden CharBuffer}.
+ *
+ *
This method probably should reset the CharsetDecoder
, but
+ * instead throws an address@hidden IllegalStateException} if the
+ * CharsetDecoder
has not already been reset. Sun's
+ * documentation conflicts with itself here, so it would be useful to run
+ * a test to find out what Sun's actual behavior is.
+ *
+ *
It then allocates a address@hidden CharBuffer} and populates it with decoded + * data, and invokes address@hidden #flush(CharBuffer out)}. + * + *
This method should not be invoked between any invocations of + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} and + * address@hidden #reset()}, and after use address@hidden #reset()} should be invoked on it + * before invoking + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)}. + * + *
The current algorithm for this uses the value that
+ * address@hidden #maxCharsPerByte()} returns to allocate storage, so it is not
+ * optimal in memory usage.
+ *
+ * @param in the input address@hidden ByteBuffer}.
+ * @return the new and populated address@hidden CharBuffer}, which is at position 0
+ * and is limited to the size of the data.
+ *
+ * @throws IllegalStateException if the CharsetDecoder
has not
+ * been address@hidden #reset()}.
+ *
+ * @throws MalformedInputException if the input is not valid for the
+ * address@hidden Charset} and the malformed input action is
+ * address@hidden CodingErrorAction#REPORT}.
+ *
+ * @throws UnmappableCharacterException if the input contains a character
+ * for which there is no known mapping to Unicode and the unmappable
+ * character action is set to address@hidden CodingErrorAction#REPORT}.
+ *
+ * @throws CharacterCodingException because
+ * address@hidden CoderResult#throwException()} is declared to throw this kind of
+ * exception.
+ */
public final CharBuffer decode (ByteBuffer in)
throws CharacterCodingException
{
@@ -133,6 +387,52 @@
return out;
}
+ /**
+ * Decodes the data passed in the input address@hidden ByteBuffer}
+ * and places the decoded characters in the output address@hidden CharBuffer}.
+ *
+ *
Make sure that endOfInput
is true if and only if this
+ * invocation is the last invocation in this sequence of calls to
+ * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)}.
+ *
+ *
Both buffers are used starting at their current positions, i.e.,
+ * rewind()
is NOT invoked on either.
+ *
+ *
This method will modify the current position, but will not affect the + * marks and limits. + * + *
Returns one of the following: + *
CharsetDecoder
+ * is ready for more input, or that the decoding has finished, depending
+ * on whether the invoker has more data.
+ * in
are the last data to be passed to this method.
+ *
+ * @return a address@hidden CoderResult} describing why the method returns.
+ *
+ * @throws IllegalStateException if the most recent method invocation
+ * (other than accessors and mutators) was not one of
+ * address@hidden #reset()} or
+ * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} where
+ * endOfInput
is true
.
+ *
+ * @throws CoderMalfunctionError if address@hidden #decodeLoop} throws a non-checked
+ * exception.
+ */
public final CoderResult decode (ByteBuffer in, CharBuffer out,
boolean endOfInput)
{
@@ -188,13 +488,72 @@
}
}
+ /**
+ * Decodes the bytes in the input address@hidden ByteBuffer} and puts the
+ * resulting characters in the output address@hidden CharBuffer}.
+ *
+ * The buffers are not rewound (rewind()
is not invoked on
+ * either), and the marks and limits are not set. The positions will be
+ * modified.
+ *
+ *
Returns the same values as + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)}. + * + * @param in a buffer holding the data to be decoded. + * @param out a buffer to hold the decoded data (characters). + * @return a address@hidden CoderResult} instance describing the reason for + * returning. + */ protected abstract CoderResult decodeLoop (ByteBuffer in, CharBuffer out); + /** + * If a subclass is capable of detecting the address@hidden Charset} based on input + * data, it should override this. + * + * @return the detected address@hidden Charset}. + * + * @throws UnsupportedOperationException if detecting the address@hidden Charset} + * based on input data is not supported (this is the default). + * + * @throws IllegalStateException if not enough data have been processed yet + * to detect the address@hidden Charset}. + */ public Charset detectedCharset () { throw new UnsupportedOperationException (); } - + + /** + * Empties all buffers. + * + *
Some subclasses may need to be notified when decoding has finished, + * so that they can do some cleanup, such as releasing resources, or + * appending some characters to the end of the output. This method does + * some housekeeping of its own, then invokes implFlush, which a subclass may + * override to do this cleanup. + * + *
The method returns one of two values: + *
Note that this method can be invoked directly after address@hidden #reset} + * without + * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)} + * being invoked, so a subclass should probably check to see whether decode + * has been invoked, to decide what output to write. + * + * @param out the address@hidden CharBuffer} to write to. + * @return a address@hidden CoderResult} instance describing the success or failure + * state of the method. + */ public final CoderResult flush (CharBuffer out) { // It seems weird that you can flush after reset, but Sun's javadoc @@ -214,11 +573,34 @@ return implFlush (out); } + /** + * Does any cleanup that subclasses need to do. + * + *
This method is intended to be subclassed to do any cleanup. + * + * See address@hidden #flush(CharBuffer out)}. + * + * @param out the address@hidden CharBuffer} to write to. + * @return a address@hidden CoderResult} instance describing whether the method + * succeeded or failed. + */ protected CoderResult implFlush (CharBuffer out) { return CoderResult.UNDERFLOW; } + /** + * Sets the action to be taken when invalid input is received. + * + *
For details of the available actions, see address@hidden CodingErrorAction}.
+ *
+ * @param newAction the action to be taken when invalid input is received.
+ *
+ * @return the CharsetDecoder
that this method was invoked on.
+ *
+ * @throws IllegalArgumentException if newAction
is
+ * null
.
+ */
public final CharsetDecoder onMalformedInput (CodingErrorAction newAction)
{
if (newAction == null)
@@ -229,46 +611,140 @@
return this;
}
+ /**
+ * Notifies a subclass when the invoker has changed the action to be taken
+ * when malformed input is received.
+ *
+ *
The default implementation does nothing; it is just intended to be + * overridden if required. + * + *
This method is invoked by address@hidden #onMalformedInput}. + * + * @param newAction the action to be taken when malformed input is received. + */ protected void implOnMalformedInput (CodingErrorAction newAction) { // default implementation does nothing } + /** + * Notifies a subclass when the invoker has changed the action to be taken + * when a sequence of bytes cannot be mapped to a Unicode character. + * + *
The default implementation does nothing; it is just intended to be + * overridden if required. + * + *
This method is invoked by address@hidden #onUnmappableCharacter}. + * + * @param newAction the action to be taken when a sequence of bytes is + * received that cannot be mapped to a Unicode character. + */ protected void implOnUnmappableCharacter (CodingErrorAction newAction) { // default implementation does nothing } + /** + * Notifies a subclass when the invoker has changed the replacement + * address@hidden String} (the sequence of characters to be output when a + * character is encountered that cannot be mapped to Unicode in the current + * address@hidden Charset}). + * + *
The default implementation does nothing; it is just intended to be + * overridden if required. + * + *
This method is invoked by address@hidden #replaceWith}. + * + * @param newReplacement the new replacement String. + */ protected void implReplaceWith (String newReplacement) { // default implementation does nothing } + /** + * Notifies a subclass when the invoker has invoked address@hidden #reset()}. + * + *
The default implementation does nothing; it is just intended to be + * overridden if required. + * + *
This method is invoked by address@hidden #reset()}.
+ */
protected void implReset ()
{
// default implementation does nothing
}
+ /**
+ * Determines whether this CharsetDecoder
can work out how to
+ * decode data based on the actual data, i.e., whether it can auto-detect
+ * the charset.
+ *
+ *
The default implementation always returns true; subclasses should
+ * override it if they implement auto-detection.
+ *
+ * @return true
if this CharsetDecoder
is
+ * auto-detecting, false
otherwise.
+ */
public boolean isAutoDetecting ()
{
return false;
}
+ /**
+ * Determines whether or not an auto-detecting CharsetDecoder
+ * has yet detected the address@hidden Charset} of the data.
+ *
+ * @return true
if the auto-detecting
+ * CharsetDecoder
has detected the address@hidden Charset} of the
+ * data, false
otherwise. Note that a false
value
+ * does not indicate that bytes have not been decoded, just that the
+ * CharsetDecoder
has not detected the address@hidden Charset} of
+ * the data.
+ *
+ * @throws UnsupportedOperationException if the CharsetDecoder
+ * does not support auto-detection (this is the default).
+ */
public boolean isCharsetDetected ()
{
throw new UnsupportedOperationException ();
}
+ /**
+ * Returns the action to be taken when invalid data is received.
+ *
+ * @return the action to be taken when invalid data is received.
+ */
public CodingErrorAction malformedInputAction ()
{
return malformedInputAction;
}
+ /**
+ * Returns the maximum number of characters output per byte of input.
+ *
+ *
For example, if a character of output needs between 2 and 5 bytes,
+ * then this value will be 0.5f.
+ *
+ * @return the maximum number of characters output per byte of input.
+ */
public final float maxCharsPerByte ()
{
return maxCharsPerByte;
}
+ /**
+ * Changes the action to be taken when a character is received that does not
+ * map into Unicode in this address@hidden Charset}.
+ *
+ * @param newAction the action to be taken when a character is received that
+ * does not map into Unicode in this address@hidden Charset}.
+ *
+ * @return the CharsetDecoder
that this method was invoked on,
+ * for convenience.
+ *
+ * @throws IllegalArgumentException if newAction
is null.
+ */
public final CharsetDecoder onUnmappableCharacter
(CodingErrorAction newAction)
{
@@ -280,11 +756,33 @@
return this;
}
+ /**
+ * Returns the sequence of characters used to replace a character that
+ * cannot be mapped to Unicode.
+ *
+ *
The address@hidden String} returned always has some content, i.e., it is
+ * never null
and it is never empty.
+ *
+ * @return the sequence of characters used to replace a character that
+ * cannot be mapped to Unicode.
+ */
public final String replacement ()
{
return replacement;
}
+ /**
+ * Changes the sequence of characters used to replace a character that
+ * cannot be mapped to Unicode.
+ *
+ * @param newReplacement the new sequence of characters to be used to
+ * replace characters that cannot be mapped to Unicode.
+ *
+ * @return the CharsetDecoder
that this method was invoked on.
+ *
+ * @throws IllegalArgumentException if newReplacement
is
+ * null
or empty (a length of 0).
+ */
public final CharsetDecoder replaceWith (String newReplacement)
{
if (newReplacement == null)
@@ -298,6 +796,15 @@
return this;
}
+ /**
+ * Resets this CharsetDecoder
, ready for
+ * address@hidden #decode(ByteBuffer in, CharBuffer out, boolean endOfInput)}.
+ *
+ *
Invokes address@hidden #implReset} to ensure that any reset actions in the
+ * subclass are performed.
+ *
+ * @return the CharsetDecoder
that this method was invoked on.
+ */
public final CharsetDecoder reset ()
{
state = STATE_RESET;
@@ -305,6 +812,13 @@
return this;
}
+ /**
+ * Returns the action to be taken when a character is received that cannot
+ * be represented in Unicode.
+ *
+ * @return the action to be taken when a character is received that cannot
+ * be represented in Unicode.
+ */
public CodingErrorAction unmappableCharacterAction ()
{
return unmappableCharacterAction;