1 /*
2  * Copyright (c) 1995, 2004, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.misc;
27 
28 import java.io.OutputStream;
29 import java.io.ByteArrayOutputStream;
30 import java.io.InputStream;
31 import java.io.PushbackInputStream;
32 import java.io.ByteArrayInputStream;
33 import java.io.IOException;
34 import java.nio.ByteBuffer;
35 
36 /**
37  * This class defines the decoding half of character encoders.
38  * A character decoder is an algorithim for transforming 8 bit
39  * binary data that has been encoded into text by a character
40  * encoder, back into original binary form.
41  *
42  * The character encoders, in general, have been structured
43  * around a central theme that binary data can be encoded into
44  * text that has the form:
45  *
46  * <pre>
47  *      [Buffer Prefix]
48  *      [Line Prefix][encoded data atoms][Line Suffix]
49  *      [Buffer Suffix]
50  * </pre>
51  *
52  * Of course in the simplest encoding schemes, the buffer has no
53  * distinct prefix of suffix, however all have some fixed relationship
54  * between the text in an 'atom' and the binary data itself.
55  *
56  * In the CharacterEncoder and CharacterDecoder classes, one complete
57  * chunk of data is referred to as a <i>buffer</i>. Encoded buffers
58  * are all text, and decoded buffers (sometimes just referred to as
59  * buffers) are binary octets.
60  *
61  * To create a custom decoder, you must, at a minimum,  overide three
62  * abstract methods in this class.
63  * <DL>
64  * <DD>bytesPerAtom which tells the decoder how many bytes to
65  * expect from decodeAtom
66  * <DD>decodeAtom which decodes the bytes sent to it as text.
67  * <DD>bytesPerLine which tells the encoder the maximum number of
68  * bytes per line.
69  * </DL>
70  *
71  * In general, the character decoders return error in the form of a
72  * CEFormatException. The syntax of the detail string is
73  * <pre>
74  *      DecoderClassName: Error message.
75  * </pre>
76  *
77  * Several useful decoders have already been written and are
78  * referenced in the See Also list below.
79  *
80  * @author      Chuck McManis
81  * @see         CEFormatException
82  * @see         CharacterEncoder
83  * @see         UCDecoder
84  * @see         UUDecoder
85  * @see         BASE64Decoder
86  */
87 
88 public abstract class CharacterDecoder {
89 
90     /** Return the number of bytes per atom of decoding */
bytesPerAtom()91     abstract protected int bytesPerAtom();
92 
93     /** Return the maximum number of bytes that can be encoded per line */
bytesPerLine()94     abstract protected int bytesPerLine();
95 
96     /** decode the beginning of the buffer, by default this is a NOP. */
decodeBufferPrefix(PushbackInputStream aStream, OutputStream bStream)97     protected void decodeBufferPrefix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
98 
99     /** decode the buffer suffix, again by default it is a NOP. */
decodeBufferSuffix(PushbackInputStream aStream, OutputStream bStream)100     protected void decodeBufferSuffix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
101 
102     /**
103      * This method should return, if it knows, the number of bytes
104      * that will be decoded. Many formats such as uuencoding provide
105      * this information. By default we return the maximum bytes that
106      * could have been encoded on the line.
107      */
decodeLinePrefix(PushbackInputStream aStream, OutputStream bStream)108     protected int decodeLinePrefix(PushbackInputStream aStream, OutputStream bStream) throws IOException {
109         return (bytesPerLine());
110     }
111 
112     /**
113      * This method post processes the line, if there are error detection
114      * or correction codes in a line, they are generally processed by
115      * this method. The simplest version of this method looks for the
116      * (newline) character.
117      */
decodeLineSuffix(PushbackInputStream aStream, OutputStream bStream)118     protected void decodeLineSuffix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
119 
120     /**
121      * This method does an actual decode. It takes the decoded bytes and
122      * writes them to the OutputStream. The integer <i>l</i> tells the
123      * method how many bytes are required. This is always <= bytesPerAtom().
124      */
decodeAtom(PushbackInputStream aStream, OutputStream bStream, int l)125     protected void decodeAtom(PushbackInputStream aStream, OutputStream bStream, int l) throws IOException {
126         throw new CEStreamExhausted();
127     }
128 
129     /**
130      * This method works around the bizarre semantics of BufferedInputStream's
131      * read method.
132      */
readFully(InputStream in, byte buffer[], int offset, int len)133     protected int readFully(InputStream in, byte buffer[], int offset, int len)
134         throws java.io.IOException {
135         for (int i = 0; i < len; i++) {
136             int q = in.read();
137             if (q == -1)
138                 return ((i == 0) ? -1 : i);
139             buffer[i+offset] = (byte)q;
140         }
141         return len;
142     }
143 
144     /**
145      * Decode the text from the InputStream and write the decoded
146      * octets to the OutputStream. This method runs until the stream
147      * is exhausted.
148      * @exception CEFormatException An error has occured while decoding
149      * @exception CEStreamExhausted The input stream is unexpectedly out of data
150      */
decodeBuffer(InputStream aStream, OutputStream bStream)151     public void decodeBuffer(InputStream aStream, OutputStream bStream) throws IOException {
152         int     i;
153         int     totalBytes = 0;
154 
155         PushbackInputStream ps = new PushbackInputStream (aStream);
156         decodeBufferPrefix(ps, bStream);
157         while (true) {
158             int length;
159 
160             try {
161                 length = decodeLinePrefix(ps, bStream);
162                 for (i = 0; (i+bytesPerAtom()) < length; i += bytesPerAtom()) {
163                     decodeAtom(ps, bStream, bytesPerAtom());
164                     totalBytes += bytesPerAtom();
165                 }
166                 if ((i + bytesPerAtom()) == length) {
167                     decodeAtom(ps, bStream, bytesPerAtom());
168                     totalBytes += bytesPerAtom();
169                 } else {
170                     decodeAtom(ps, bStream, length - i);
171                     totalBytes += (length - i);
172                 }
173                 decodeLineSuffix(ps, bStream);
174             } catch (CEStreamExhausted e) {
175                 break;
176             }
177         }
178         decodeBufferSuffix(ps, bStream);
179     }
180 
181     /**
182      * Alternate decode interface that takes a String containing the encoded
183      * buffer and returns a byte array containing the data.
184      * @exception CEFormatException An error has occured while decoding
185      */
186     public byte decodeBuffer(String inputString)[] throws IOException {
187         byte    inputBuffer[] = new byte[inputString.length()];
188         ByteArrayInputStream inStream;
189         ByteArrayOutputStream outStream;
190 
191         inputString.getBytes(0, inputString.length(), inputBuffer, 0);
192         inStream = new ByteArrayInputStream(inputBuffer);
193         outStream = new ByteArrayOutputStream();
decodeBuffer(inStream, outStream)194         decodeBuffer(inStream, outStream);
195         return (outStream.toByteArray());
196     }
197 
198     /**
199      * Decode the contents of the inputstream into a buffer.
200      */
201     public byte decodeBuffer(InputStream in)[] throws IOException {
202         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
decodeBuffer(in, outStream)203         decodeBuffer(in, outStream);
204         return (outStream.toByteArray());
205     }
206 
207     /**
208      * Decode the contents of the String into a ByteBuffer.
209      */
decodeBufferToByteBuffer(String inputString)210     public ByteBuffer decodeBufferToByteBuffer(String inputString)
211         throws IOException {
212         return ByteBuffer.wrap(decodeBuffer(inputString));
213     }
214 
215     /**
216      * Decode the contents of the inputStream into a ByteBuffer.
217      */
decodeBufferToByteBuffer(InputStream in)218     public ByteBuffer decodeBufferToByteBuffer(InputStream in)
219         throws IOException {
220         return ByteBuffer.wrap(decodeBuffer(in));
221     }
222 }
223