1 /* 2 * Copyright (c) 1995, 2005, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.misc; 27 28 import java.io.InputStream; 29 import java.io.ByteArrayInputStream; 30 import java.io.OutputStream; 31 import java.io.ByteArrayOutputStream; 32 import java.io.PrintStream; 33 import java.io.IOException; 34 import java.nio.ByteBuffer; 35 36 37 /** 38 * This class defines the encoding half of character encoders. 39 * A character encoder is an algorithim for transforming 8 bit binary 40 * data into text (generally 7 bit ASCII or 8 bit ISO-Latin-1 text) 41 * for transmition over text channels such as e-mail and network news. 42 * 43 * The character encoders have been structured around a central theme 44 * that, in general, the encoded text has the form: 45 * 46 * <pre> 47 * [Buffer Prefix] 48 * [Line Prefix][encoded data atoms][Line Suffix] 49 * [Buffer Suffix] 50 * </pre> 51 * 52 * In the CharacterEncoder and CharacterDecoder classes, one complete 53 * chunk of data is referred to as a <i>buffer</i>. Encoded buffers 54 * are all text, and decoded buffers (sometimes just referred to as 55 * buffers) are binary octets. 56 * 57 * To create a custom encoder, you must, at a minimum, overide three 58 * abstract methods in this class. 59 * <DL> 60 * <DD>bytesPerAtom which tells the encoder how many bytes to 61 * send to encodeAtom 62 * <DD>encodeAtom which encodes the bytes sent to it as text. 63 * <DD>bytesPerLine which tells the encoder the maximum number of 64 * bytes per line. 65 * </DL> 66 * 67 * Several useful encoders have already been written and are 68 * referenced in the See Also list below. 69 * 70 * @author Chuck McManis 71 * @see CharacterDecoder; 72 * @see UCEncoder 73 * @see UUEncoder 74 * @see BASE64Encoder 75 */ 76 public abstract class CharacterEncoder { 77 78 /** Stream that understands "printing" */ 79 protected PrintStream pStream; 80 81 /** Return the number of bytes per atom of encoding */ bytesPerAtom()82 abstract protected int bytesPerAtom(); 83 84 /** Return the number of bytes that can be encoded per line */ bytesPerLine()85 abstract protected int bytesPerLine(); 86 87 /** 88 * Encode the prefix for the entire buffer. By default is simply 89 * opens the PrintStream for use by the other functions. 90 */ encodeBufferPrefix(OutputStream aStream)91 protected void encodeBufferPrefix(OutputStream aStream) throws IOException { 92 pStream = new PrintStream(aStream); 93 } 94 95 /** 96 * Encode the suffix for the entire buffer. 97 */ encodeBufferSuffix(OutputStream aStream)98 protected void encodeBufferSuffix(OutputStream aStream) throws IOException { 99 } 100 101 /** 102 * Encode the prefix that starts every output line. 103 */ encodeLinePrefix(OutputStream aStream, int aLength)104 protected void encodeLinePrefix(OutputStream aStream, int aLength) 105 throws IOException { 106 } 107 108 /** 109 * Encode the suffix that ends every output line. By default 110 * this method just prints a <newline> into the output stream. 111 */ encodeLineSuffix(OutputStream aStream)112 protected void encodeLineSuffix(OutputStream aStream) throws IOException { 113 pStream.println(); 114 } 115 116 /** Encode one "atom" of information into characters. */ encodeAtom(OutputStream aStream, byte someBytes[], int anOffset, int aLength)117 abstract protected void encodeAtom(OutputStream aStream, byte someBytes[], 118 int anOffset, int aLength) throws IOException; 119 120 /** 121 * This method works around the bizarre semantics of BufferedInputStream's 122 * read method. 123 */ readFully(InputStream in, byte buffer[])124 protected int readFully(InputStream in, byte buffer[]) 125 throws java.io.IOException { 126 for (int i = 0; i < buffer.length; i++) { 127 int q = in.read(); 128 if (q == -1) 129 return i; 130 buffer[i] = (byte)q; 131 } 132 return buffer.length; 133 } 134 135 /** 136 * Encode bytes from the input stream, and write them as text characters 137 * to the output stream. This method will run until it exhausts the 138 * input stream, but does not print the line suffix for a final 139 * line that is shorter than bytesPerLine(). 140 */ encode(InputStream inStream, OutputStream outStream)141 public void encode(InputStream inStream, OutputStream outStream) 142 throws IOException { 143 int j; 144 int numBytes; 145 byte tmpbuffer[] = new byte[bytesPerLine()]; 146 147 encodeBufferPrefix(outStream); 148 149 while (true) { 150 numBytes = readFully(inStream, tmpbuffer); 151 if (numBytes == 0) { 152 break; 153 } 154 encodeLinePrefix(outStream, numBytes); 155 for (j = 0; j < numBytes; j += bytesPerAtom()) { 156 157 if ((j + bytesPerAtom()) <= numBytes) { 158 encodeAtom(outStream, tmpbuffer, j, bytesPerAtom()); 159 } else { 160 encodeAtom(outStream, tmpbuffer, j, (numBytes)- j); 161 } 162 } 163 if (numBytes < bytesPerLine()) { 164 break; 165 } else { 166 encodeLineSuffix(outStream); 167 } 168 } 169 encodeBufferSuffix(outStream); 170 } 171 172 /** 173 * Encode the buffer in <i>aBuffer</i> and write the encoded 174 * result to the OutputStream <i>aStream</i>. 175 */ encode(byte aBuffer[], OutputStream aStream)176 public void encode(byte aBuffer[], OutputStream aStream) 177 throws IOException { 178 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer); 179 encode(inStream, aStream); 180 } 181 182 /** 183 * A 'streamless' version of encode that simply takes a buffer of 184 * bytes and returns a string containing the encoded buffer. 185 */ encode(byte aBuffer[])186 public String encode(byte aBuffer[]) { 187 ByteArrayOutputStream outStream = new ByteArrayOutputStream(); 188 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer); 189 String retVal = null; 190 try { 191 encode(inStream, outStream); 192 // explicit ascii->unicode conversion 193 retVal = outStream.toString("8859_1"); 194 } catch (Exception IOException) { 195 // This should never happen. 196 throw new Error("CharacterEncoder.encode internal error"); 197 } 198 return (retVal); 199 } 200 201 /** 202 * Return a byte array from the remaining bytes in this ByteBuffer. 203 * <P> 204 * The ByteBuffer's position will be advanced to ByteBuffer's limit. 205 * <P> 206 * To avoid an extra copy, the implementation will attempt to return the 207 * byte array backing the ByteBuffer. If this is not possible, a 208 * new byte array will be created. 209 */ getBytes(ByteBuffer bb)210 private byte [] getBytes(ByteBuffer bb) { 211 /* 212 * This should never return a BufferOverflowException, as we're 213 * careful to allocate just the right amount. 214 */ 215 byte [] buf = null; 216 217 /* 218 * If it has a usable backing byte buffer, use it. Use only 219 * if the array exactly represents the current ByteBuffer. 220 */ 221 if (bb.hasArray()) { 222 byte [] tmp = bb.array(); 223 if ((tmp.length == bb.capacity()) && 224 (tmp.length == bb.remaining())) { 225 buf = tmp; 226 bb.position(bb.limit()); 227 } 228 } 229 230 if (buf == null) { 231 /* 232 * This class doesn't have a concept of encode(buf, len, off), 233 * so if we have a partial buffer, we must reallocate 234 * space. 235 */ 236 buf = new byte[bb.remaining()]; 237 238 /* 239 * position() automatically updated 240 */ 241 bb.get(buf); 242 } 243 244 return buf; 245 } 246 247 /** 248 * Encode the <i>aBuffer</i> ByteBuffer and write the encoded 249 * result to the OutputStream <i>aStream</i>. 250 * <P> 251 * The ByteBuffer's position will be advanced to ByteBuffer's limit. 252 */ encode(ByteBuffer aBuffer, OutputStream aStream)253 public void encode(ByteBuffer aBuffer, OutputStream aStream) 254 throws IOException { 255 byte [] buf = getBytes(aBuffer); 256 encode(buf, aStream); 257 } 258 259 /** 260 * A 'streamless' version of encode that simply takes a ByteBuffer 261 * and returns a string containing the encoded buffer. 262 * <P> 263 * The ByteBuffer's position will be advanced to ByteBuffer's limit. 264 */ encode(ByteBuffer aBuffer)265 public String encode(ByteBuffer aBuffer) { 266 byte [] buf = getBytes(aBuffer); 267 return encode(buf); 268 } 269 270 /** 271 * Encode bytes from the input stream, and write them as text characters 272 * to the output stream. This method will run until it exhausts the 273 * input stream. It differs from encode in that it will add the 274 * line at the end of a final line that is shorter than bytesPerLine(). 275 */ encodeBuffer(InputStream inStream, OutputStream outStream)276 public void encodeBuffer(InputStream inStream, OutputStream outStream) 277 throws IOException { 278 int j; 279 int numBytes; 280 byte tmpbuffer[] = new byte[bytesPerLine()]; 281 282 encodeBufferPrefix(outStream); 283 284 while (true) { 285 numBytes = readFully(inStream, tmpbuffer); 286 if (numBytes == 0) { 287 break; 288 } 289 encodeLinePrefix(outStream, numBytes); 290 for (j = 0; j < numBytes; j += bytesPerAtom()) { 291 if ((j + bytesPerAtom()) <= numBytes) { 292 encodeAtom(outStream, tmpbuffer, j, bytesPerAtom()); 293 } else { 294 encodeAtom(outStream, tmpbuffer, j, (numBytes)- j); 295 } 296 } 297 encodeLineSuffix(outStream); 298 if (numBytes < bytesPerLine()) { 299 break; 300 } 301 } 302 encodeBufferSuffix(outStream); 303 } 304 305 /** 306 * Encode the buffer in <i>aBuffer</i> and write the encoded 307 * result to the OutputStream <i>aStream</i>. 308 */ encodeBuffer(byte aBuffer[], OutputStream aStream)309 public void encodeBuffer(byte aBuffer[], OutputStream aStream) 310 throws IOException { 311 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer); 312 encodeBuffer(inStream, aStream); 313 } 314 315 /** 316 * A 'streamless' version of encode that simply takes a buffer of 317 * bytes and returns a string containing the encoded buffer. 318 */ encodeBuffer(byte aBuffer[])319 public String encodeBuffer(byte aBuffer[]) { 320 ByteArrayOutputStream outStream = new ByteArrayOutputStream(); 321 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer); 322 try { 323 encodeBuffer(inStream, outStream); 324 } catch (Exception IOException) { 325 // This should never happen. 326 throw new Error("CharacterEncoder.encodeBuffer internal error"); 327 } 328 return (outStream.toString()); 329 } 330 331 /** 332 * Encode the <i>aBuffer</i> ByteBuffer and write the encoded 333 * result to the OutputStream <i>aStream</i>. 334 * <P> 335 * The ByteBuffer's position will be advanced to ByteBuffer's limit. 336 */ encodeBuffer(ByteBuffer aBuffer, OutputStream aStream)337 public void encodeBuffer(ByteBuffer aBuffer, OutputStream aStream) 338 throws IOException { 339 byte [] buf = getBytes(aBuffer); 340 encodeBuffer(buf, aStream); 341 } 342 343 /** 344 * A 'streamless' version of encode that simply takes a ByteBuffer 345 * and returns a string containing the encoded buffer. 346 * <P> 347 * The ByteBuffer's position will be advanced to ByteBuffer's limit. 348 */ encodeBuffer(ByteBuffer aBuffer)349 public String encodeBuffer(ByteBuffer aBuffer) { 350 byte [] buf = getBytes(aBuffer); 351 return encodeBuffer(buf); 352 } 353 354 } 355