1 /* Jackson JSON-processor.
2  *
3  * Copyright (c) 2007- Tatu Saloranta, tatu.saloranta@iki.fi
4  */
5 package com.fasterxml.jackson.core;
6 
7 import java.util.Arrays;
8 
9 import com.fasterxml.jackson.core.util.ByteArrayBuilder;
10 
11 /**
12  * Class used to define specific details of which
13  * variant of Base64 encoding/decoding is to be used. Although there is
14  * somewhat standard basic version (so-called "MIME Base64"), other variants
15  * exists, see <a href="http://en.wikipedia.org/wiki/Base64">Base64 Wikipedia entry</a> for details.
16  *
17  * @author Tatu Saloranta
18  */
19 public final class Base64Variant
20     implements java.io.Serializable
21 {
22     private final static int INT_SPACE = 0x20;
23 
24     // We'll only serialize name
25     private static final long serialVersionUID = 1L;
26 
27     /**
28      * Placeholder used by "no padding" variant, to be used when a character
29      * value is needed.
30      */
31     final static char PADDING_CHAR_NONE = '\0';
32 
33     /**
34      * Marker used to denote ascii characters that do not correspond
35      * to a 6-bit value (in this variant), and is not used as a padding
36      * character.
37      */
38     public final static int BASE64_VALUE_INVALID = -1;
39 
40     /**
41      * Marker used to denote ascii character (in decoding table) that
42      * is the padding character using this variant (if any).
43      */
44     public final static int BASE64_VALUE_PADDING = -2;
45 
46     /*
47     /**********************************************************
48     /* Encoding/decoding tables
49     /**********************************************************
50      */
51 
52     /**
53      * Decoding table used for base 64 decoding.
54      */
55     private final transient int[] _asciiToBase64 = new int[128];
56 
57     /**
58      * Encoding table used for base 64 decoding when output is done
59      * as characters.
60      */
61     private final transient char[] _base64ToAsciiC = new char[64];
62 
63     /**
64      * Alternative encoding table used for base 64 decoding when output is done
65      * as ascii bytes.
66      */
67     private final transient byte[] _base64ToAsciiB = new byte[64];
68 
69     /*
70     /**********************************************************
71     /* Other configuration
72     /**********************************************************
73      */
74 
75     /**
76      * Symbolic name of variant; used for diagnostics/debugging.
77      *<p>
78      * Note that this is the only non-transient field; used when reading
79      * back from serialized state.
80      *<p>
81      * Also: must not be private, accessed from `BaseVariants`
82      */
83     final String _name;
84 
85     /**
86      * Whether this variant uses padding or not.
87      */
88     private final transient boolean _usesPadding;
89 
90     /**
91      * Character used for padding, if any ({@link #PADDING_CHAR_NONE} if not).
92      */
93     private final transient char _paddingChar;
94 
95     /**
96      * Maximum number of encoded base64 characters to output during encoding
97      * before adding a linefeed, if line length is to be limited
98      * ({@link java.lang.Integer#MAX_VALUE} if not limited).
99      *<p>
100      * Note: for some output modes (when writing attributes) linefeeds may
101      * need to be avoided, and this value ignored.
102      */
103     private final transient int _maxLineLength;
104 
105     /*
106     /**********************************************************
107     /* Life-cycle
108     /**********************************************************
109      */
110 
Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength)111     public Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength)
112     {
113         _name = name;
114         _usesPadding = usesPadding;
115         _paddingChar = paddingChar;
116         _maxLineLength = maxLineLength;
117 
118         // Ok and then we need to create codec tables.
119 
120         // First the main encoding table:
121         int alphaLen = base64Alphabet.length();
122         if (alphaLen != 64) {
123             throw new IllegalArgumentException("Base64Alphabet length must be exactly 64 (was "+alphaLen+")");
124         }
125 
126         // And then secondary encoding table and decoding table:
127         base64Alphabet.getChars(0, alphaLen, _base64ToAsciiC, 0);
128         Arrays.fill(_asciiToBase64, BASE64_VALUE_INVALID);
129         for (int i = 0; i < alphaLen; ++i) {
130             char alpha = _base64ToAsciiC[i];
131             _base64ToAsciiB[i] = (byte) alpha;
132             _asciiToBase64[alpha] = i;
133         }
134 
135         // Plus if we use padding, add that in too
136         if (usesPadding) {
137             _asciiToBase64[(int) paddingChar] = BASE64_VALUE_PADDING;
138         }
139     }
140 
141     /**
142      * "Copy constructor" that can be used when the base alphabet is identical
143      * to one used by another variant except for the maximum line length
144      * (and obviously, name).
145      */
Base64Variant(Base64Variant base, String name, int maxLineLength)146     public Base64Variant(Base64Variant base, String name, int maxLineLength)
147     {
148         this(base, name, base._usesPadding, base._paddingChar, maxLineLength);
149     }
150 
151     /**
152      * "Copy constructor" that can be used when the base alphabet is identical
153      * to one used by another variant, but other details (padding, maximum
154      * line length) differ
155      */
Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength)156     public Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength)
157     {
158         _name = name;
159         byte[] srcB = base._base64ToAsciiB;
160         System.arraycopy(srcB, 0, this._base64ToAsciiB, 0, srcB.length);
161         char[] srcC = base._base64ToAsciiC;
162         System.arraycopy(srcC, 0, this._base64ToAsciiC, 0, srcC.length);
163         int[] srcV = base._asciiToBase64;
164         System.arraycopy(srcV, 0, this._asciiToBase64, 0, srcV.length);
165 
166         _usesPadding = usesPadding;
167         _paddingChar = paddingChar;
168         _maxLineLength = maxLineLength;
169     }
170 
171     /*
172     /**********************************************************
173     /* Serializable overrides
174     /**********************************************************
175      */
176 
177     /**
178      * Method used to "demote" deserialized instances back to
179      * canonical ones
180      */
readResolve()181     protected Object readResolve() {
182         return Base64Variants.valueOf(_name);
183     }
184 
185     /*
186     /**********************************************************
187     /* Public accessors
188     /**********************************************************
189      */
190 
getName()191     public String getName() { return _name; }
192 
usesPadding()193     public boolean usesPadding() { return _usesPadding; }
usesPaddingChar(char c)194     public boolean usesPaddingChar(char c) { return c == _paddingChar; }
usesPaddingChar(int ch)195     public boolean usesPaddingChar(int ch) { return ch == (int) _paddingChar; }
getPaddingChar()196     public char getPaddingChar() { return _paddingChar; }
getPaddingByte()197     public byte getPaddingByte() { return (byte)_paddingChar; }
198 
getMaxLineLength()199     public int getMaxLineLength() { return _maxLineLength; }
200 
201     /*
202     /**********************************************************
203     /* Decoding support
204     /**********************************************************
205      */
206 
207     /**
208      * @return 6-bit decoded value, if valid character;
209      */
decodeBase64Char(char c)210     public int decodeBase64Char(char c)
211     {
212         int ch = (int) c;
213         return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
214     }
215 
decodeBase64Char(int ch)216     public int decodeBase64Char(int ch)
217     {
218         return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
219     }
220 
decodeBase64Byte(byte b)221     public int decodeBase64Byte(byte b)
222     {
223         int ch = (int) b;
224         // note: cast retains sign, so it's from -128 to +127
225         if (ch < 0) {
226             return BASE64_VALUE_INVALID;
227         }
228         return _asciiToBase64[ch];
229     }
230 
231     /*
232     /**********************************************************
233     /* Encoding support
234     /**********************************************************
235      */
236 
encodeBase64BitsAsChar(int value)237     public char encodeBase64BitsAsChar(int value)
238     {
239         /* Let's assume caller has done necessary checks; this
240          * method must be fast and inlinable
241          */
242         return _base64ToAsciiC[value];
243     }
244 
245     /**
246      * Method that encodes given right-aligned (LSB) 24-bit value
247      * into 4 base64 characters, stored in given result buffer.
248      */
encodeBase64Chunk(int b24, char[] buffer, int ptr)249     public int encodeBase64Chunk(int b24, char[] buffer, int ptr)
250     {
251         buffer[ptr++] = _base64ToAsciiC[(b24 >> 18) & 0x3F];
252         buffer[ptr++] = _base64ToAsciiC[(b24 >> 12) & 0x3F];
253         buffer[ptr++] = _base64ToAsciiC[(b24 >> 6) & 0x3F];
254         buffer[ptr++] = _base64ToAsciiC[b24 & 0x3F];
255         return ptr;
256     }
257 
encodeBase64Chunk(StringBuilder sb, int b24)258     public void encodeBase64Chunk(StringBuilder sb, int b24)
259     {
260         sb.append(_base64ToAsciiC[(b24 >> 18) & 0x3F]);
261         sb.append(_base64ToAsciiC[(b24 >> 12) & 0x3F]);
262         sb.append(_base64ToAsciiC[(b24 >> 6) & 0x3F]);
263         sb.append(_base64ToAsciiC[b24 & 0x3F]);
264     }
265 
266     /**
267      * Method that outputs partial chunk (which only encodes one
268      * or two bytes of data). Data given is still aligned same as if
269      * it as full data; that is, missing data is at the "right end"
270      * (LSB) of int.
271      *
272      * @param outputBytes Number of encoded bytes included (either 1 or 2)
273      */
encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr)274     public int encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr)
275     {
276         buffer[outPtr++] = _base64ToAsciiC[(bits >> 18) & 0x3F];
277         buffer[outPtr++] = _base64ToAsciiC[(bits >> 12) & 0x3F];
278         if (_usesPadding) {
279             buffer[outPtr++] = (outputBytes == 2) ?
280                 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar;
281             buffer[outPtr++] = _paddingChar;
282         } else {
283             if (outputBytes == 2) {
284                 buffer[outPtr++] = _base64ToAsciiC[(bits >> 6) & 0x3F];
285             }
286         }
287         return outPtr;
288     }
289 
encodeBase64Partial(StringBuilder sb, int bits, int outputBytes)290     public void encodeBase64Partial(StringBuilder sb, int bits, int outputBytes)
291     {
292         sb.append(_base64ToAsciiC[(bits >> 18) & 0x3F]);
293         sb.append(_base64ToAsciiC[(bits >> 12) & 0x3F]);
294         if (_usesPadding) {
295             sb.append((outputBytes == 2) ?
296                       _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar);
297             sb.append(_paddingChar);
298         } else {
299             if (outputBytes == 2) {
300                 sb.append(_base64ToAsciiC[(bits >> 6) & 0x3F]);
301             }
302         }
303     }
304 
encodeBase64BitsAsByte(int value)305     public byte encodeBase64BitsAsByte(int value)
306     {
307         // As with above, assuming it is 6-bit value
308         return _base64ToAsciiB[value];
309     }
310 
311     /**
312      * Method that encodes given right-aligned (LSB) 24-bit value
313      * into 4 base64 bytes (ascii), stored in given result buffer.
314      */
encodeBase64Chunk(int b24, byte[] buffer, int ptr)315     public int encodeBase64Chunk(int b24, byte[] buffer, int ptr)
316     {
317         buffer[ptr++] = _base64ToAsciiB[(b24 >> 18) & 0x3F];
318         buffer[ptr++] = _base64ToAsciiB[(b24 >> 12) & 0x3F];
319         buffer[ptr++] = _base64ToAsciiB[(b24 >> 6) & 0x3F];
320         buffer[ptr++] = _base64ToAsciiB[b24 & 0x3F];
321         return ptr;
322     }
323 
324     /**
325      * Method that outputs partial chunk (which only encodes one
326      * or two bytes of data). Data given is still aligned same as if
327      * it as full data; that is, missing data is at the "right end"
328      * (LSB) of int.
329      *
330      * @param outputBytes Number of encoded bytes included (either 1 or 2)
331      */
encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr)332     public int encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr)
333     {
334         buffer[outPtr++] = _base64ToAsciiB[(bits >> 18) & 0x3F];
335         buffer[outPtr++] = _base64ToAsciiB[(bits >> 12) & 0x3F];
336         if (_usesPadding) {
337             byte pb = (byte) _paddingChar;
338             buffer[outPtr++] = (outputBytes == 2) ?
339                 _base64ToAsciiB[(bits >> 6) & 0x3F] : pb;
340             buffer[outPtr++] = pb;
341         } else {
342             if (outputBytes == 2) {
343                 buffer[outPtr++] = _base64ToAsciiB[(bits >> 6) & 0x3F];
344             }
345         }
346         return outPtr;
347     }
348 
349     /*
350     /**********************************************************
351     /* Convenience conversion methods for String to/from bytes
352     /* use case.
353     /**********************************************************
354      */
355 
356     /**
357      * Convenience method for converting given byte array as base64 encoded
358      * String using this variant's settings.
359      * Resulting value is "raw", that is, not enclosed in double-quotes.
360      *
361      * @param input Byte array to encode
362      */
encode(byte[] input)363     public String encode(byte[] input)
364     {
365         return encode(input, false);
366     }
367 
368     /**
369      * Convenience method for converting given byte array as base64 encoded String
370      * using this variant's settings, optionally enclosed in double-quotes.
371      * Linefeeds added, if needed, are expressed as 2-character JSON (and Java source)
372      * escape sequence of backslash + `n`.
373      *
374      * @param input Byte array to encode
375      * @param addQuotes Whether to surround resulting value in double quotes or not
376      */
encode(byte[] input, boolean addQuotes)377     public String encode(byte[] input, boolean addQuotes)
378     {
379         final int inputEnd = input.length;
380         final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3));
381         if (addQuotes) {
382             sb.append('"');
383         }
384 
385         int chunksBeforeLF = getMaxLineLength() >> 2;
386 
387         // Ok, first we loop through all full triplets of data:
388         int inputPtr = 0;
389         int safeInputEnd = inputEnd-3; // to get only full triplets
390 
391         while (inputPtr <= safeInputEnd) {
392             // First, mash 3 bytes into lsb of 32-bit int
393             int b24 = ((int) input[inputPtr++]) << 8;
394             b24 |= ((int) input[inputPtr++]) & 0xFF;
395             b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF);
396             encodeBase64Chunk(sb, b24);
397             if (--chunksBeforeLF <= 0) {
398                 // note: must quote in JSON value, so not really useful...
399                 sb.append('\\');
400                 sb.append('n');
401                 chunksBeforeLF = getMaxLineLength() >> 2;
402             }
403         }
404 
405         // And then we may have 1 or 2 leftover bytes to encode
406         int inputLeft = inputEnd - inputPtr; // 0, 1 or 2
407         if (inputLeft > 0) { // yes, but do we have room for output?
408             int b24 = ((int) input[inputPtr++]) << 16;
409             if (inputLeft == 2) {
410                 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8;
411             }
412             encodeBase64Partial(sb, b24, inputLeft);
413         }
414 
415         if (addQuotes) {
416             sb.append('"');
417         }
418         return sb.toString();
419     }
420 
421     /**
422      * Convenience method for converting given byte array as base64 encoded String
423      * using this variant's settings, optionally enclosed in double-quotes.
424      * Linefeed character to use is passed explicitly.
425      *
426      * @param input Byte array to encode
427      * @param addQuotes Whether to surround resulting value in double quotes or not
428      *
429      * @since 2.10
430      */
encode(byte[] input, boolean addQuotes, String linefeed)431     public String encode(byte[] input, boolean addQuotes, String linefeed)
432     {
433         final int inputEnd = input.length;
434         final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3));
435         if (addQuotes) {
436             sb.append('"');
437         }
438 
439         int chunksBeforeLF = getMaxLineLength() >> 2;
440 
441         int inputPtr = 0;
442         int safeInputEnd = inputEnd-3;
443 
444         while (inputPtr <= safeInputEnd) {
445             int b24 = ((int) input[inputPtr++]) << 8;
446             b24 |= ((int) input[inputPtr++]) & 0xFF;
447             b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF);
448             encodeBase64Chunk(sb, b24);
449             if (--chunksBeforeLF <= 0) {
450                 sb.append(linefeed);
451                 chunksBeforeLF = getMaxLineLength() >> 2;
452             }
453         }
454         int inputLeft = inputEnd - inputPtr;
455         if (inputLeft > 0) {
456             int b24 = ((int) input[inputPtr++]) << 16;
457             if (inputLeft == 2) {
458                 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8;
459             }
460             encodeBase64Partial(sb, b24, inputLeft);
461         }
462 
463         if (addQuotes) {
464             sb.append('"');
465         }
466         return sb.toString();
467     }
468 
469     /**
470      * Convenience method for decoding contents of a Base64-encoded String,
471      * using this variant's settings.
472      *
473      * @param input
474      *
475      * @since 2.3
476      *
477      * @throws IllegalArgumentException if input is not valid base64 encoded data
478      */
479     @SuppressWarnings("resource")
decode(String input)480     public byte[] decode(String input) throws IllegalArgumentException
481     {
482         ByteArrayBuilder b = new ByteArrayBuilder();
483         decode(input, b);
484         return b.toByteArray();
485     }
486 
487     /**
488      * Convenience method for decoding contents of a Base64-encoded String,
489      * using this variant's settings
490      * and appending decoded binary data using provided {@link ByteArrayBuilder}.
491      *<p>
492      * NOTE: builder will NOT be reset before decoding (nor cleared afterwards);
493      * assumption is that caller will ensure it is given in proper state, and
494      * used as appropriate afterwards.
495      *
496      * @since 2.3
497      *
498      * @throws IllegalArgumentException if input is not valid base64 encoded data
499      */
decode(String str, ByteArrayBuilder builder)500     public void decode(String str, ByteArrayBuilder builder) throws IllegalArgumentException
501     {
502         int ptr = 0;
503         int len = str.length();
504 
505     main_loop:
506         while (true) {
507             // first, we'll skip preceding white space, if any
508             char ch;
509             do {
510                 if (ptr >= len) {
511                     break main_loop;
512                 }
513                 ch = str.charAt(ptr++);
514             } while (ch <= INT_SPACE);
515             int bits = decodeBase64Char(ch);
516             if (bits < 0) {
517                 _reportInvalidBase64(ch, 0, null);
518             }
519             int decodedData = bits;
520             // then second base64 char; can't get padding yet, nor ws
521             if (ptr >= len) {
522                 _reportBase64EOF();
523             }
524             ch = str.charAt(ptr++);
525             bits = decodeBase64Char(ch);
526             if (bits < 0) {
527                 _reportInvalidBase64(ch, 1, null);
528             }
529             decodedData = (decodedData << 6) | bits;
530             // third base64 char; can be padding, but not ws
531             if (ptr >= len) {
532                 // but as per [JACKSON-631] can be end-of-input, iff not using padding
533                 if (!usesPadding()) {
534                     decodedData >>= 4;
535                     builder.append(decodedData);
536                     break;
537                 }
538                 _reportBase64EOF();
539             }
540             ch = str.charAt(ptr++);
541             bits = decodeBase64Char(ch);
542 
543             // First branch: can get padding (-> 1 byte)
544             if (bits < 0) {
545                 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
546                     _reportInvalidBase64(ch, 2, null);
547                 }
548                 // Ok, must get padding
549                 if (ptr >= len) {
550                     _reportBase64EOF();
551                 }
552                 ch = str.charAt(ptr++);
553                 if (!usesPaddingChar(ch)) {
554                     _reportInvalidBase64(ch, 3, "expected padding character '"+getPaddingChar()+"'");
555                 }
556                 // Got 12 bits, only need 8, need to shift
557                 decodedData >>= 4;
558                 builder.append(decodedData);
559                 continue;
560             }
561             // Nope, 2 or 3 bytes
562             decodedData = (decodedData << 6) | bits;
563             // fourth and last base64 char; can be padding, but not ws
564             if (ptr >= len) {
565                 // but as per [JACKSON-631] can be end-of-input, iff not using padding
566                 if (!usesPadding()) {
567                     decodedData >>= 2;
568                     builder.appendTwoBytes(decodedData);
569                     break;
570                 }
571                 _reportBase64EOF();
572             }
573             ch = str.charAt(ptr++);
574             bits = decodeBase64Char(ch);
575             if (bits < 0) {
576                 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
577                     _reportInvalidBase64(ch, 3, null);
578                 }
579                 decodedData >>= 2;
580                 builder.appendTwoBytes(decodedData);
581             } else {
582                 // otherwise, our triple is now complete
583                 decodedData = (decodedData << 6) | bits;
584                 builder.appendThreeBytes(decodedData);
585             }
586         }
587     }
588 
589     /*
590     /**********************************************************
591     /* Overridden standard methods
592     /**********************************************************
593      */
594 
595     @Override
toString()596     public String toString() { return _name; }
597 
598     @Override
equals(Object o)599     public boolean equals(Object o) {
600         // identity comparison should be dine
601         return (o == this);
602     }
603 
604     @Override
hashCode()605     public int hashCode() {
606         return _name.hashCode();
607     }
608 
609     /*
610     /**********************************************************
611     /* Internal helper methods
612     /**********************************************************
613      */
614 
615     /**
616      * @param bindex Relative index within base64 character unit; between 0
617      *   and 3 (as unit has exactly 4 characters)
618      */
_reportInvalidBase64(char ch, int bindex, String msg)619     protected void _reportInvalidBase64(char ch, int bindex, String msg)
620         throws IllegalArgumentException
621     {
622         String base;
623         if (ch <= INT_SPACE) {
624             base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units";
625         } else if (usesPaddingChar(ch)) {
626             base = "Unexpected padding character ('"+getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character";
627         } else if (!Character.isDefined(ch) || Character.isISOControl(ch)) {
628             // Not sure if we can really get here... ? (most illegal xml chars are caught at lower level)
629             base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content";
630         } else {
631             base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content";
632         }
633         if (msg != null) {
634             base = base + ": " + msg;
635         }
636         throw new IllegalArgumentException(base);
637     }
638 
_reportBase64EOF()639     protected void _reportBase64EOF() throws IllegalArgumentException {
640         throw new IllegalArgumentException(missingPaddingMessage());
641     }
642 
643     /**
644      * Helper method that will construct a message to use in exceptions for cases where input ends
645      * prematurely in place where padding would be expected.
646      *
647      * @since 2.10
648      */
missingPaddingMessage()649     public String missingPaddingMessage() {
650         return String.format("Unexpected end of base64-encoded String: base64 variant '%s' expects padding (one or more '%c' characters) at the end",
651                 getName(), getPaddingChar());
652     }
653 
654 }
655 
656