1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 
17 package libcore.io;
18 
19 import java.io.ByteArrayOutputStream;
20 import java.nio.charset.StandardCharsets;
21 
22 /**
23  * Perform encoding and decoding of Base64 byte arrays as described in
24  * http://www.ietf.org/rfc/rfc2045.txt
25  */
26 public final class Base64 {
27     private static final byte[] BASE_64_ALPHABET = initializeBase64Alphabet();
28 
initializeBase64Alphabet()29     private static byte[] initializeBase64Alphabet() {
30         return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
31                     .getBytes(StandardCharsets.US_ASCII);
32     }
33 
34     // Bit masks for the 4 output 6-bit values from 3 input bytes.
35     private static final int FIRST_OUTPUT_BYTE_MASK = 0x3f << 18;
36     private static final int SECOND_OUTPUT_BYTE_MASK = 0x3f << 12;
37     private static final int THIRD_OUTPUT_BYTE_MASK = 0x3f << 6;
38     private static final int FOURTH_OUTPUT_BYTE_MASK = 0x3f;
39 
Base64()40     private Base64() {}
41 
encode(byte[] in)42     public static String encode(byte[] in) {
43         int len = in.length;
44         int outputLen = computeEncodingOutputLen(len);
45         byte[] output = new byte[outputLen];
46 
47         int outputIndex = 0;
48         for (int i = 0; i < len; i += 3) {
49             // Only a "triplet" if there are there are at least three remaining bytes
50             // in the input...
51             // Mask with 0xff to avoid signed extension.
52             int byteTripletAsInt = in[i] & 0xff;
53             if (i + 1 < len) {
54                 // Add second byte to the triplet.
55                 byteTripletAsInt <<= 8;
56                 byteTripletAsInt |= in[i + 1] & 0xff;
57                 if (i + 2 < len) {
58                     byteTripletAsInt <<= 8;
59                     byteTripletAsInt |= in[i + 2] & 0xff;
60                 } else {
61                     // Insert 2 zero bits as to make output 18 bits long.
62                     byteTripletAsInt <<= 2;
63                 }
64             } else {
65                 // Insert 4 zero bits as to make output 12 bits long.
66                 byteTripletAsInt <<= 4;
67             }
68 
69             if (i + 2 < len) {
70                 // The int may have up to 24 non-zero bits.
71                 output[outputIndex++] = BASE_64_ALPHABET[
72                         (byteTripletAsInt & FIRST_OUTPUT_BYTE_MASK) >>> 18];
73             }
74             if (i + 1 < len) {
75                 // The int may have up to 18 non-zero bits.
76                 output[outputIndex++] = BASE_64_ALPHABET[
77                         (byteTripletAsInt & SECOND_OUTPUT_BYTE_MASK) >>> 12];
78             }
79             output[outputIndex++] = BASE_64_ALPHABET[
80                     (byteTripletAsInt & THIRD_OUTPUT_BYTE_MASK) >>> 6];
81             output[outputIndex++] = BASE_64_ALPHABET[
82                     byteTripletAsInt & FOURTH_OUTPUT_BYTE_MASK];
83         }
84 
85         int inLengthMod3 = len % 3;
86         // Add padding as per the spec.
87         if (inLengthMod3 > 0) {
88             output[outputIndex++] = '=';
89             if (inLengthMod3 == 1) {
90                 output[outputIndex++] = '=';
91             }
92         }
93 
94         return new String(output, StandardCharsets.US_ASCII);
95     }
96 
computeEncodingOutputLen(int inLength)97     private static int computeEncodingOutputLen(int inLength) {
98         int inLengthMod3 = inLength % 3;
99         int outputLen = (inLength / 3) * 4;
100         if (inLengthMod3 == 2) {
101             // Need 3 6-bit characters as to express the last 16 bits, plus 1 padding.
102             outputLen += 4;
103         } else if (inLengthMod3 == 1) {
104             // Need 2 6-bit characters as to express the last 8 bits, plus 2 padding.
105             outputLen += 4;
106         }
107         return outputLen;
108     }
109 
decode(byte[] in)110     public static byte[] decode(byte[] in) {
111         return decode(in, in.length);
112     }
113 
114     /** Decodes the input from position 0 (inclusive) to len (exclusive). */
decode(byte[] in, int len)115     public static byte[] decode(byte[] in, int len) {
116         final int inLength = Math.min(in.length, len);
117         // Overestimating 3 bytes per each 4 blocks of input (plus a possibly incomplete one).
118         ByteArrayOutputStream output = new ByteArrayOutputStream((inLength / 4) * 3 + 3);
119         // Position in the input. Use an array so we can pass it to {@code getNextByte}.
120         int[] pos = new int[1];
121 
122         try {
123             while (pos[0] < inLength) {
124                 int byteTripletAsInt = 0;
125 
126                 // j is the index in a 4-tuple of 6-bit characters where are trying to read from the
127                 // input.
128                 for (int j = 0; j < 4; j++) {
129                     byte c = getNextByte(in, pos, inLength);
130                     if (c == END_OF_INPUT || c == PAD_AS_BYTE) {
131                         // Padding or end of file...
132                         switch (j) {
133                             case 0:
134                             case 1:
135                                 return (c == END_OF_INPUT) ? output.toByteArray() : null;
136                             case 2:
137                                 // The input is over with two 6-bit characters: a single byte padded
138                                 // with 4 extra 0's.
139 
140                                 if (c == END_OF_INPUT) {
141                                     // Do not consider the block, since padding is not present.
142                                     return checkNoTrailingAndReturn(output, in, pos[0], inLength);
143                                 }
144                                 // We are at a pad character, consume and look for the second one.
145                                 pos[0]++;
146                                 c = getNextByte(in, pos, inLength);
147                                 if (c == END_OF_INPUT) {
148                                     // Do not consider the block, since padding is not present.
149                                     return checkNoTrailingAndReturn(output, in, pos[0], inLength);
150                                 }
151                                 if (c == PAD_AS_BYTE) {
152                                     byteTripletAsInt >>= 4;
153                                     output.write(byteTripletAsInt);
154                                     return checkNoTrailingAndReturn(output, in, pos[0], inLength);
155                                 }
156                                 // Something other than pad and non-alphabet characters, illegal.
157                                 return null;
158 
159 
160                             case 3:
161                                 // The input is over with three 6-bit characters: two bytes padded
162                                 // with 2 extra 0's.
163                                 if (c == PAD_AS_BYTE) {
164                                     // Consider the block only if padding is present.
165                                     byteTripletAsInt >>= 2;
166                                     output.write(byteTripletAsInt >> 8);
167                                     output.write(byteTripletAsInt & 0xff);
168                                 }
169                                 return checkNoTrailingAndReturn(output, in, pos[0], inLength);
170                         }
171                     } else {
172                         byteTripletAsInt <<= 6;
173                         byteTripletAsInt += (c & 0xff);
174                         pos[0]++;
175                     }
176                 }
177                 // We have four 6-bit characters: output the corresponding 3 bytes
178                 output.write(byteTripletAsInt >> 16);
179                 output.write((byteTripletAsInt >> 8) & 0xff);
180                 output.write(byteTripletAsInt & 0xff);
181             }
182             return checkNoTrailingAndReturn(output, in, pos[0], inLength);
183         } catch (InvalidBase64ByteException e) {
184             return null;
185         }
186     }
187 
188     /**
189      * On decoding, an illegal character always return null.
190      *
191      * Using this exception to avoid "if" checks every time.
192      */
193 
194     private static class InvalidBase64ByteException extends Exception { }
195 
196     /**
197      * Obtain the numeric value corresponding to the next relevant byte in the input.
198      *
199      * Calculates the numeric value (6-bit, 0 <= x <= 63) of the next Base64 encoded byte in
200      * {@code in} at or after {@code pos[0]} and before {@code inLength}. Returns
201      * {@link #WHITESPACE_AS_BYTE}, {@link #PAD_AS_BYTE}, {@link #END_OF_INPUT} or the 6-bit value.
202      * {@code pos[0]} is updated as a side effect of this method.
203      */
getNextByte(byte[] in, int[] pos, int inLength)204     private static byte getNextByte(byte[] in, int[] pos, int inLength)
205             throws InvalidBase64ByteException {
206         // Ignore all whitespace.
207         while (pos[0] < inLength) {
208             byte c = base64AlphabetToNumericalValue(in[pos[0]]);
209             if (c != WHITESPACE_AS_BYTE) {
210                 return c;
211             }
212             pos[0]++;
213         }
214         return END_OF_INPUT;
215     }
216 
217     /**
218      * Check that there are no invalid trailing characters (ie, other then whitespace and padding)
219      *
220      * Returns {@code output} as a byte array in case of success, {@code null} in case of invalid
221      * characters.
222      */
checkNoTrailingAndReturn( ByteArrayOutputStream output, byte[] in, int i, int inLength)223     private static byte[] checkNoTrailingAndReturn(
224             ByteArrayOutputStream output, byte[] in, int i, int inLength)
225                     throws InvalidBase64ByteException{
226         while (i < inLength) {
227             byte c = base64AlphabetToNumericalValue(in[i]);
228             if (c != WHITESPACE_AS_BYTE && c != PAD_AS_BYTE) {
229                 return null;
230             }
231             i++;
232         }
233         return output.toByteArray();
234     }
235 
236     private static final byte PAD_AS_BYTE = -1;
237     private static final byte WHITESPACE_AS_BYTE = -2;
238     private static final byte END_OF_INPUT = -3;
base64AlphabetToNumericalValue(byte c)239     private static byte base64AlphabetToNumericalValue(byte c) throws InvalidBase64ByteException {
240         if ('A' <= c && c <= 'Z') {
241             return (byte) (c - 'A');
242         }
243         if ('a' <= c && c <= 'z') {
244             return (byte) (c - 'a' + 26);
245         }
246         if ('0' <= c && c <= '9') {
247             return (byte) (c - '0' + 52);
248         }
249         if (c == '+') {
250             return (byte) 62;
251         }
252         if (c == '/') {
253             return (byte) 63;
254         }
255         if (c == '=') {
256             return PAD_AS_BYTE;
257         }
258         if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
259             return WHITESPACE_AS_BYTE;
260         }
261         throw new InvalidBase64ByteException();
262     }
263 }
264