Index: gnu/java/nio/charset/EncodingHelper.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/nio/charset/EncodingHelper.java,v retrieving revision 1.1 diff -u -r1.1 EncodingHelper.java --- gnu/java/nio/charset/EncodingHelper.java 8 Apr 2005 21:46:05 -0000 1.1 +++ gnu/java/nio/charset/EncodingHelper.java 9 Apr 2005 03:38:38 -0000 @@ -116,6 +116,24 @@ return (oldCanonical != null)?oldCanonical : newCanonical; } + /** + * Returns if a charset is a form of ISO latin-1. + * This is only used for java.io classes which may be requested to + * to use latin-1 during bootstrap before nio is initialized, and + * they can't seem to decide on what alias to use. + */ + public static boolean isISOLatin1(String s) + { + if(s.equals("ISO-8859-1") || + s.equals("8859_1") || + s.equals("ISO_8859-1") || + s.equals("latin1") || + s.equals("ISO8859_1") || + s.equals("ISO_8859_1")) + return true; + return false; + } + /** * Gets a charset, throwing the java.io exception and not * the java.nio exception if an error occurs. Index: java/io/InputStreamReader.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/io/InputStreamReader.java,v retrieving revision 1.21 diff -u -r1.21 InputStreamReader.java --- java/io/InputStreamReader.java 10 Mar 2005 19:35:51 -0000 1.21 +++ java/io/InputStreamReader.java 9 Apr 2005 03:38:38 -0000 @@ -38,12 +38,16 @@ package java.io; -import java.nio.channels.Channels; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; - -import gnu.java.io.EncodingManager; -import gnu.java.io.decode.Decoder; +import java.nio.CharBuffer; +import java.nio.ByteBuffer; +import gnu.java.nio.charset.EncodingHelper; /** * This class reads characters from a byte input stream. The characters @@ -86,20 +90,55 @@ * @see BufferedReader * @see InputStream * + * @author Robert Schuster * @author Aaron M. Renn (address@hidden) * @author Per Bothner (address@hidden) * @date April 22, 1998. */ public class InputStreamReader extends Reader { - /* - * This is the byte-character decoder class that does the reading and - * translation of bytes from the underlying stream. + /** + * The input stream. + */ + private InputStream in; + + /** + * The charset decoder. + */ + private CharsetDecoder decoder; + + /** + * End of stream reached. + */ + private boolean isDone = false; + + /** + * Need this. */ - private Reader in; + private float maxBytesPerChar; + /** + * Default buffer size + */ + private static final int DEFAULT_BUFFER_CAP = 8192; + + /** + * Buffer holding surplus loaded bytes (if any) + */ + private ByteBuffer byteBuffer; + + /** + * java.io canonical name of the encoding. + */ private String encoding; - + + /** + * We might decode to a 2-char UTF-16 surrogate, which won't fit in the + * output buffer. In this case we need to save the surrogate char. + */ + private char savedSurrogate; + private boolean hasSavedSurrogate = false; + /** * This method initializes a new instance of InputStreamReader * to read from the specified stream using the default encoding. @@ -110,11 +149,38 @@ { if (in == null) throw new NullPointerException(); - - Decoder decoder = EncodingManager.getDecoder(in); - encoding = decoder.getSchemeName(); - - this.in = decoder; + this.in = in; + try + { + encoding = System.getProperty("file.encoding"); + // Don't use NIO if avoidable + if(EncodingHelper.isISOLatin1(encoding)) + { + encoding = "ISO8859_1"; + maxBytesPerChar = 1f; + decoder = null; + return; + } + Charset cs = EncodingHelper.getCharset(encoding); + decoder = cs.newDecoder(); + encoding = EncodingHelper.getOldCanonical(cs.name()); + try { + maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); + } catch(UnsupportedOperationException _){ + maxBytesPerChar = 1f; + } + decoder.onMalformedInput(CodingErrorAction.REPLACE); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + decoder.reset(); + } catch(RuntimeException e) { + encoding = "ISO8859_1"; + maxBytesPerChar = 1f; + decoder = null; + } catch(UnsupportedEncodingException e) { + encoding = "ISO8859_1"; + maxBytesPerChar = 1f; + decoder = null; + } } /** @@ -136,41 +202,72 @@ || encoding_name == null) throw new NullPointerException(); - Decoder decoder = EncodingManager.getDecoder(in, encoding_name); - encoding = decoder.getSchemeName(); - - this.in = decoder; - + this.in = in; + // Don't use NIO if avoidable + if(EncodingHelper.isISOLatin1(encoding_name)) + { + encoding = "ISO8859_1"; + maxBytesPerChar = 1f; + decoder = null; + return; + } + try { + Charset cs = EncodingHelper.getCharset(encoding_name); + try { + maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); + } catch(UnsupportedOperationException _){ + maxBytesPerChar = 1f; + } + + decoder = cs.newDecoder(); + decoder.onMalformedInput(CodingErrorAction.REPLACE); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + decoder.reset(); + + // The encoding should be the old name, if such exists. + encoding = EncodingHelper.getOldCanonical(cs.name()); + } catch(RuntimeException e) { + encoding = "ISO8859_1"; + maxBytesPerChar = 1f; + decoder = null; + } } /** * Creates an InputStreamReader that uses a decoder of the given * charset to decode the bytes in the InputStream into * characters. - * @since 1.4 */ - public InputStreamReader(InputStream in, Charset charset) - { - /* FIXME: InputStream is wrapped in Channel which is read by a - * Reader-implementation for channels. However to fix this we - * need to completely move to NIO-style character - * encoding/decoding. - */ - this.in = Channels.newReader(Channels.newChannel(in), charset.newDecoder(), - -1); - encoding = charset.name(); + public InputStreamReader(InputStream in, Charset charset) { + this.in = in; + decoder = charset.newDecoder(); + + // JDK reports errors, so we do the same. + decoder.onMalformedInput(CodingErrorAction.REPORT); + decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + decoder.reset(); + encoding = EncodingHelper.getOldCanonical(charset.name()); } /** * Creates an InputStreamReader that uses the given charset decoder * to decode the bytes in the InputStream into characters. - * @since 1.4 */ - public InputStreamReader(InputStream in, CharsetDecoder decoder) - { - // FIXME: see address@hidden InputStreamReader(InputStream, Charset) - this.in = Channels.newReader(Channels.newChannel(in), decoder, -1); - encoding = decoder.charset().name(); + public InputStreamReader(InputStream in, CharsetDecoder decoder) { + this.in = in; + this.decoder = decoder; + + try { + maxBytesPerChar = decoder.charset().newEncoder().maxBytesPerChar(); + } catch(UnsupportedOperationException _){ + maxBytesPerChar = 1f; + } + + // JDK reports errors, so we do the same. + decoder.onMalformedInput(CodingErrorAction.REPORT); + decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + decoder.reset(); + encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); } /** @@ -183,9 +280,14 @@ { synchronized (lock) { + // Makes sure all intermediate data is released by the decoder. + if (decoder != null) + decoder.reset(); if (in != null) - in.close(); + in.close(); in = null; + isDone = true; + decoder = null; } } @@ -202,7 +304,7 @@ } /** - * This method checks to see if the stream is read to be read. It + * This method checks to see if the stream is ready to be read. It * will return true if is, or false if it is not. * If the stream is not ready to be read, it could (although is not required * to) block on the next read attempt. @@ -217,7 +319,7 @@ if (in == null) throw new IOException("Reader has been closed"); - return in.ready(); + return in.available() != 0; } /** @@ -233,45 +335,108 @@ * * @exception IOException If an error occurs */ - public int read (char[] buf, int offset, int length) throws IOException + public int read(char[] buf, int offset, int length) throws IOException { if (in == null) throw new IOException("Reader has been closed"); - - return in.read(buf, offset, length); + if (isDone) + return -1; + + if(decoder != null){ + int totalBytes = (int)((double)length * maxBytesPerChar); + byte[] bytes = new byte[totalBytes]; + + int remaining = 0; + if(byteBuffer != null) + { + remaining = byteBuffer.remaining(); + byteBuffer.get(bytes, 0, remaining); + } + int read; + if(totalBytes - remaining > 0) + { + read = in.read(bytes, remaining, totalBytes - remaining); + if(read == -1){ + read = remaining; + isDone = true; + } else + read += remaining; + } else + read = remaining; + byteBuffer = ByteBuffer.wrap(bytes, 0, read); + CharBuffer cb = CharBuffer.wrap(buf, offset, length); + + if(hasSavedSurrogate){ + hasSavedSurrogate = false; + cb.put(savedSurrogate); + read++; + } + + CoderResult cr = decoder.decode(byteBuffer, cb, isDone); + decoder.reset(); + + // 1 char remains which is the first half of a surrogate pair. + if(cr.isOverflow() && cb.hasRemaining()){ + CharBuffer overflowbuf = CharBuffer.allocate(2); + cr = decoder.decode(byteBuffer, overflowbuf, isDone); + overflowbuf.flip(); + cb.put(overflowbuf.get()); + savedSurrogate = overflowbuf.get(); + hasSavedSurrogate = true; + isDone = false; + } + + if(byteBuffer.hasRemaining()) { + byteBuffer.compact(); + byteBuffer.flip(); + isDone = false; + } else + byteBuffer = null; + + return (read == 0)?-1:cb.position(); + } else { + byte[] bytes = new byte[length]; + int read = in.read(bytes); + for(int i=0;i + * This method will block until the char can be read. * - * @return The char read, as an int, or -1 if end of stream. + * @return The char read or -1 if end of stream * * @exception IOException If an error occurs */ public int read() throws IOException { - if (in == null) - throw new IOException("Reader has been closed"); - - return in.read(); + char[] buf = new char[1]; + int count = read(buf, 0, 1); + return count > 0 ? buf[0] : -1; } - /** - * Skips the specified number of chars in the stream. It - * returns the actual number of chars skipped, which may be less than the - * requested amount. - * - * @param count The requested number of chars to skip - * - * @return The actual number of chars skipped. - * - * @exception IOException If an error occurs - */ + /** + * Skips the specified number of chars in the stream. It + * returns the actual number of chars skipped, which may be less than the + * requested amount. + * + * @param count The requested number of chars to skip + * + * @return The actual number of chars skipped. + * + * @exception IOException If an error occurs + */ public long skip(long count) throws IOException { if (in == null) throw new IOException("Reader has been closed"); return super.skip(count); - } + } } Index: java/io/OutputStreamWriter.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/io/OutputStreamWriter.java,v retrieving revision 1.14 diff -u -r1.14 OutputStreamWriter.java --- java/io/OutputStreamWriter.java 16 Feb 2005 11:18:37 -0000 1.14 +++ java/io/OutputStreamWriter.java 9 Apr 2005 03:38:38 -0000 @@ -38,8 +38,17 @@ package java.io; -import gnu.java.io.EncodingManager; -import gnu.java.io.encode.Encoder; +import gnu.java.nio.charset.EncodingHelper; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.MalformedInputException; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; /** * This class writes characters to an output stream that is byte oriented @@ -76,13 +85,20 @@ */ public class OutputStreamWriter extends Writer { + /** + * The output stream. + */ + private OutputStream out; /** - * This is the byte-character encoder class that does the writing and - * translation of characters to bytes before writing to the underlying - * class. + * The charset encoder. */ - private Encoder out; + private CharsetEncoder encoder; + + /** + * java.io canonical name of the encoding. + */ + private String encodingName; /** * This method initializes a new instance of OutputStreamWriter @@ -100,7 +116,31 @@ public OutputStreamWriter (OutputStream out, String encoding_scheme) throws UnsupportedEncodingException { - this.out = EncodingManager.getEncoder (out, encoding_scheme); + this.out = out; + try + { + // Don't use NIO if avoidable + if(EncodingHelper.isISOLatin1(encoding_scheme)) + { + encodingName = "ISO8859_1"; + encoder = null; + return; + } + + Charset cs = EncodingHelper.getCharset(encoding_scheme); + if(cs == null) + throw new UnsupportedEncodingException("Encoding "+encoding_scheme+ + " unknown"); + encoder = cs.newEncoder(); + encodingName = EncodingHelper.getOldCanonical(cs.name()); + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } catch(RuntimeException e) { + // Default to ISO Latin-1, will happen if this is called, for instance, + // before the NIO provider is loadable. + encoder = null; + encodingName = "ISO8859_1"; + } } /** @@ -111,7 +151,22 @@ */ public OutputStreamWriter (OutputStream out) { - this.out = EncodingManager.getEncoder (out); + this.out = out; + try + { + String encoding = System.getProperty("file.encoding"); + Charset cs = Charset.forName(encoding); + encoder = cs.newEncoder(); + encodingName = EncodingHelper.getOldCanonical(cs.name()); + } catch(RuntimeException e) { + encoder = null; + encodingName = "ISO8859_1"; + } + if(encoder != null) + { + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } } /** @@ -122,7 +177,10 @@ */ public void close () throws IOException { + if(out == null) + throw new IOException("Stream is closed."); out.close (); + out = null; } /** @@ -134,7 +192,7 @@ */ public String getEncoding () { - return out != null ? out.getSchemeName () : null; + return out != null ? encodingName : null; } /** @@ -144,7 +202,8 @@ */ public void flush () throws IOException { - out.flush (); + if(out != null) + out.flush (); } /** @@ -160,7 +219,37 @@ */ public void write (char[] buf, int offset, int count) throws IOException { - out.write (buf, offset, count); + if(out == null) + throw new IOException("Stream is closed."); + if(buf == null) + throw new IOException("Buffer is null."); + + if(encoder == null) + { + byte[] b = new byte[count]; + for(int i=0;i data.length) throw new StringIndexOutOfBoundsException(); - // XXX Consider using java.nio here. - value = EncodingManager.getDecoder(encoding) - .convertToChars(data, offset, count); - this.offset = 0; - this.count = value.length; + try + { + CharsetDecoder csd = Charset.forName(encoding).newDecoder(); + csd.onMalformedInput(CodingErrorAction.REPLACE); + csd.onUnmappableCharacter(CodingErrorAction.REPLACE); + CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); + if(cbuf.hasArray()) + { + value = cbuf.array(); + this.offset = cbuf.position(); + this.count = cbuf.remaining(); + } else { + // Doubt this will happen. But just in case. + value = new char[cbuf.remaining()]; + cbuf.get(value); + this.offset = 0; + this.count = value.length; + } + } catch(CharacterCodingException e){ + throw new UnsupportedEncodingException("Encoding: "+encoding+ + " not found."); + } catch(IllegalCharsetNameException e){ + throw new UnsupportedEncodingException("Encoding: "+encoding+ + " not found."); + } catch(UnsupportedCharsetException e){ + throw new UnsupportedEncodingException("Encoding: "+encoding+ + " not found."); + } } /** @@ -359,7 +389,7 @@ * * @param data byte array to copy * @param offset the offset to start at - * @param count the number of characters in the array to use + * @param count the number of bytes in the array to use * @throws NullPointerException if data is null * @throws IndexOutOfBoundsException if offset or count is incorrect * @throws Error if the decoding fails @@ -370,11 +400,41 @@ { if (offset < 0 || count < 0 || offset + count > data.length) throw new StringIndexOutOfBoundsException(); - // XXX Consider using java.nio here. - value = EncodingManager.getDecoder() - .convertToChars(data, offset, count); - this.offset = 0; - this.count = value.length; + int o, c; + char[] v; + String encoding; + try + { + encoding = System.getProperty("file.encoding"); + CharsetDecoder csd = Charset.forName(encoding).newDecoder(); + csd.onMalformedInput(CodingErrorAction.REPLACE); + csd.onUnmappableCharacter(CodingErrorAction.REPLACE); + CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); + if(cbuf.hasArray()) + { + v = cbuf.array(); + o = cbuf.position(); + c = cbuf.remaining(); + } else { + // Doubt this will happen. But just in case. + v = new char[cbuf.remaining()]; + cbuf.get(v); + o = 0; + c = v.length; + } + } catch(Exception ex){ + // If anything goes wrong (System property not set, + // NIO provider not available, etc) + // Default to the 'safe' encoding ISO8859_1 + v = new char[count]; + o = 0; + c = count; + for (int i=0;i