1 /*
2  * Copyright (C) 2012 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.io;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 import static com.google.common.base.Preconditions.checkNotNull;
19 import static com.google.common.base.Preconditions.checkPositionIndexes;
20 import static com.google.common.base.Preconditions.checkState;
21 import static com.google.common.math.IntMath.divide;
22 import static com.google.common.math.IntMath.log2;
23 import static java.math.RoundingMode.CEILING;
24 import static java.math.RoundingMode.FLOOR;
25 import static java.math.RoundingMode.UNNECESSARY;
26 
27 import com.google.common.annotations.GwtCompatible;
28 import com.google.common.annotations.GwtIncompatible;
29 import com.google.common.base.Ascii;
30 import com.google.common.base.Objects;
31 import com.google.errorprone.annotations.concurrent.LazyInit;
32 import java.io.IOException;
33 import java.io.InputStream;
34 import java.io.OutputStream;
35 import java.io.Reader;
36 import java.io.Writer;
37 import java.util.Arrays;
38 import org.checkerframework.checker.nullness.qual.Nullable;
39 
40 /**
41  * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
42  * strings. This class includes several constants for encoding schemes specified by <a
43  * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
44  *
45  * <pre>{@code
46  * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
47  * }</pre>
48  *
49  * <p>returns the string {@code "MZXW6==="}, and
50  *
51  * <pre>{@code
52  * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
53  * }</pre>
54  *
55  * <p>...returns the ASCII bytes of the string {@code "foo"}.
56  *
57  * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
58  * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
59  * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
60  * behavior:
61  *
62  * <pre>{@code
63  * BaseEncoding.base16().lowerCase().decode("deadbeef");
64  * }</pre>
65  *
66  * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
67  * on the receiving instance; you must store and use the new encoding instance it returns, instead.
68  *
69  * <pre>{@code
70  * // Do NOT do this
71  * BaseEncoding hex = BaseEncoding.base16();
72  * hex.lowerCase(); // does nothing!
73  * return hex.decode("deadbeef"); // throws an IllegalArgumentException
74  * }</pre>
75  *
76  * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
77  * x}, but the reverse does not necessarily hold.
78  *
79  * <table>
80  * <caption>Encodings</caption>
81  * <tr>
82  * <th>Encoding
83  * <th>Alphabet
84  * <th>{@code char:byte} ratio
85  * <th>Default padding
86  * <th>Comments
87  * <tr>
88  * <td>{@link #base16()}
89  * <td>0-9 A-F
90  * <td>2.00
91  * <td>N/A
92  * <td>Traditional hexadecimal. Defaults to upper case.
93  * <tr>
94  * <td>{@link #base32()}
95  * <td>A-Z 2-7
96  * <td>1.60
97  * <td>=
98  * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
99  * <tr>
100  * <td>{@link #base32Hex()}
101  * <td>0-9 A-V
102  * <td>1.60
103  * <td>=
104  * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
105  * <tr>
106  * <td>{@link #base64()}
107  * <td>A-Z a-z 0-9 + /
108  * <td>1.33
109  * <td>=
110  * <td>
111  * <tr>
112  * <td>{@link #base64Url()}
113  * <td>A-Z a-z 0-9 - _
114  * <td>1.33
115  * <td>=
116  * <td>Safe to use as filenames, or to pass in URLs without escaping
117  * </table>
118  *
119  * <p>All instances of this class are immutable, so they may be stored safely as static constants.
120  *
121  * @author Louis Wasserman
122  * @since 14.0
123  */
124 @GwtCompatible(emulated = true)
125 public abstract class BaseEncoding {
126   // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
127 
BaseEncoding()128   BaseEncoding() {}
129 
130   /**
131    * Exception indicating invalid base-encoded input encountered while decoding.
132    *
133    * @author Louis Wasserman
134    * @since 15.0
135    */
136   public static final class DecodingException extends IOException {
DecodingException(String message)137     DecodingException(String message) {
138       super(message);
139     }
140 
DecodingException(Throwable cause)141     DecodingException(Throwable cause) {
142       super(cause);
143     }
144   }
145 
146   /** Encodes the specified byte array, and returns the encoded {@code String}. */
encode(byte[] bytes)147   public String encode(byte[] bytes) {
148     return encode(bytes, 0, bytes.length);
149   }
150 
151   /**
152    * Encodes the specified range of the specified byte array, and returns the encoded {@code
153    * String}.
154    */
encode(byte[] bytes, int off, int len)155   public final String encode(byte[] bytes, int off, int len) {
156     checkPositionIndexes(off, off + len, bytes.length);
157     StringBuilder result = new StringBuilder(maxEncodedSize(len));
158     try {
159       encodeTo(result, bytes, off, len);
160     } catch (IOException impossible) {
161       throw new AssertionError(impossible);
162     }
163     return result.toString();
164   }
165 
166   /**
167    * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
168    * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
169    * Writer}.
170    */
171   @GwtIncompatible // Writer,OutputStream
encodingStream(Writer writer)172   public abstract OutputStream encodingStream(Writer writer);
173 
174   /**
175    * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
176    */
177   @GwtIncompatible // ByteSink,CharSink
encodingSink(final CharSink encodedSink)178   public final ByteSink encodingSink(final CharSink encodedSink) {
179     checkNotNull(encodedSink);
180     return new ByteSink() {
181       @Override
182       public OutputStream openStream() throws IOException {
183         return encodingStream(encodedSink.openStream());
184       }
185     };
186   }
187 
188   // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
189 
190   private static byte[] extract(byte[] result, int length) {
191     if (length == result.length) {
192       return result;
193     } else {
194       byte[] trunc = new byte[length];
195       System.arraycopy(result, 0, trunc, 0, length);
196       return trunc;
197     }
198   }
199 
200   /**
201    * Determines whether the specified character sequence is a valid encoded string according to this
202    * encoding.
203    *
204    * @since 20.0
205    */
206   public abstract boolean canDecode(CharSequence chars);
207 
208   /**
209    * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
210    * inverse operation to {@link #encode(byte[])}.
211    *
212    * @throws IllegalArgumentException if the input is not a valid encoded string according to this
213    *     encoding.
214    */
215   public final byte[] decode(CharSequence chars) {
216     try {
217       return decodeChecked(chars);
218     } catch (DecodingException badInput) {
219       throw new IllegalArgumentException(badInput);
220     }
221   }
222 
223   /**
224    * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
225    * inverse operation to {@link #encode(byte[])}.
226    *
227    * @throws DecodingException if the input is not a valid encoded string according to this
228    *     encoding.
229    */ final byte[] decodeChecked(CharSequence chars)
230       throws DecodingException {
231     chars = trimTrailingPadding(chars);
232     byte[] tmp = new byte[maxDecodedSize(chars.length())];
233     int len = decodeTo(tmp, chars);
234     return extract(tmp, len);
235   }
236 
237   /**
238    * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
239    * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
240    */
241   @GwtIncompatible // Reader,InputStream
242   public abstract InputStream decodingStream(Reader reader);
243 
244   /**
245    * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
246    * CharSource}.
247    */
248   @GwtIncompatible // ByteSource,CharSource
249   public final ByteSource decodingSource(final CharSource encodedSource) {
250     checkNotNull(encodedSource);
251     return new ByteSource() {
252       @Override
253       public InputStream openStream() throws IOException {
254         return decodingStream(encodedSource.openStream());
255       }
256     };
257   }
258 
259   // Implementations for encoding/decoding
260 
261   abstract int maxEncodedSize(int bytes);
262 
263   abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
264 
265   abstract int maxDecodedSize(int chars);
266 
267   abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
268 
269   CharSequence trimTrailingPadding(CharSequence chars) {
270     return checkNotNull(chars);
271   }
272 
273   // Modified encoding generators
274 
275   /**
276    * Returns an encoding that behaves equivalently to this encoding, but omits any padding
277    * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
278    * section 3.2</a>, Padding of Encoded Data.
279    */
280   public abstract BaseEncoding omitPadding();
281 
282   /**
283    * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
284    * for padding.
285    *
286    * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
287    *     separator
288    */
289   public abstract BaseEncoding withPadChar(char padChar);
290 
291   /**
292    * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
293    * after every {@code n} characters. Any occurrences of any characters that occur in the separator
294    * are skipped over in decoding.
295    *
296    * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
297    *     string, or if {@code n <= 0}
298    * @throws UnsupportedOperationException if this encoding already uses a separator
299    */
300   public abstract BaseEncoding withSeparator(String separator, int n);
301 
302   /**
303    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
304    * uppercase letters. Padding and separator characters remain in their original case.
305    *
306    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
307    *     lower-case characters
308    */
309   public abstract BaseEncoding upperCase();
310 
311   /**
312    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
313    * lowercase letters. Padding and separator characters remain in their original case.
314    *
315    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
316    *     lower-case characters
317    */
318   public abstract BaseEncoding lowerCase();
319 
320   private static final BaseEncoding BASE64 =
321       new Base64Encoding(
322           "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
323 
324   /**
325    * The "base64" base encoding specified by <a
326    * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
327    * (This is the same as the base 64 encoding from <a
328    * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
329    *
330    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
331    * omitted} or {@linkplain #withPadChar(char) replaced}.
332    *
333    * <p>No line feeds are added by default, as per <a
334    * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
335    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
336    */
337   public static BaseEncoding base64() {
338     return BASE64;
339   }
340 
341   private static final BaseEncoding BASE64_URL =
342       new Base64Encoding(
343           "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
344 
345   /**
346    * The "base64url" encoding specified by <a
347    * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
348    * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
349    * is the same as the base 64 encoding with URL and filename safe alphabet from <a
350    * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
351    *
352    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
353    * omitted} or {@linkplain #withPadChar(char) replaced}.
354    *
355    * <p>No line feeds are added by default, as per <a
356    * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
357    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
358    */
359   public static BaseEncoding base64Url() {
360     return BASE64_URL;
361   }
362 
363   private static final BaseEncoding BASE32 =
364       new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
365 
366   /**
367    * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
368    * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
369    * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
370    *
371    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
372    * omitted} or {@linkplain #withPadChar(char) replaced}.
373    *
374    * <p>No line feeds are added by default, as per <a
375    * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
376    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
377    */
378   public static BaseEncoding base32() {
379     return BASE32;
380   }
381 
382   private static final BaseEncoding BASE32_HEX =
383       new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
384 
385   /**
386    * The "base32hex" encoding specified by <a
387    * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
388    * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
389    *
390    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
391    * omitted} or {@linkplain #withPadChar(char) replaced}.
392    *
393    * <p>No line feeds are added by default, as per <a
394    * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
395    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
396    */
397   public static BaseEncoding base32Hex() {
398     return BASE32_HEX;
399   }
400 
401   private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
402 
403   /**
404    * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
405    * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
406    * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
407    * "hexadecimal" format.
408    *
409    * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
410    * have no effect.
411    *
412    * <p>No line feeds are added by default, as per <a
413    * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
414    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
415    */
416   public static BaseEncoding base16() {
417     return BASE16;
418   }
419 
420   private static final class Alphabet {
421     private final String name;
422     // this is meant to be immutable -- don't modify it!
423     private final char[] chars;
424     final int mask;
425     final int bitsPerChar;
426     final int charsPerChunk;
427     final int bytesPerChunk;
428     private final byte[] decodabet;
429     private final boolean[] validPadding;
430 
431     Alphabet(String name, char[] chars) {
432       this.name = checkNotNull(name);
433       this.chars = checkNotNull(chars);
434       try {
435         this.bitsPerChar = log2(chars.length, UNNECESSARY);
436       } catch (ArithmeticException e) {
437         throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
438       }
439 
440       /*
441        * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
442        * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
443        */
444       int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
445       try {
446         this.charsPerChunk = 8 / gcd;
447         this.bytesPerChunk = bitsPerChar / gcd;
448       } catch (ArithmeticException e) {
449         throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
450       }
451 
452       this.mask = chars.length - 1;
453 
454       byte[] decodabet = new byte[Ascii.MAX + 1];
455       Arrays.fill(decodabet, (byte) -1);
456       for (int i = 0; i < chars.length; i++) {
457         char c = chars[i];
458         checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
459         checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
460         decodabet[c] = (byte) i;
461       }
462       this.decodabet = decodabet;
463 
464       boolean[] validPadding = new boolean[charsPerChunk];
465       for (int i = 0; i < bytesPerChunk; i++) {
466         validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
467       }
468       this.validPadding = validPadding;
469     }
470 
471     char encode(int bits) {
472       return chars[bits];
473     }
474 
475     boolean isValidPaddingStartPosition(int index) {
476       return validPadding[index % charsPerChunk];
477     }
478 
479     boolean canDecode(char ch) {
480       return ch <= Ascii.MAX && decodabet[ch] != -1;
481     }
482 
483     int decode(char ch) throws DecodingException {
484       if (ch > Ascii.MAX) {
485         throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
486       }
487       int result = decodabet[ch];
488       if (result == -1) {
489         if (ch <= 0x20 || ch == Ascii.MAX) {
490           throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
491         } else {
492           throw new DecodingException("Unrecognized character: " + ch);
493         }
494       }
495       return result;
496     }
497 
498     private boolean hasLowerCase() {
499       for (char c : chars) {
500         if (Ascii.isLowerCase(c)) {
501           return true;
502         }
503       }
504       return false;
505     }
506 
507     private boolean hasUpperCase() {
508       for (char c : chars) {
509         if (Ascii.isUpperCase(c)) {
510           return true;
511         }
512       }
513       return false;
514     }
515 
516     Alphabet upperCase() {
517       if (!hasLowerCase()) {
518         return this;
519       } else {
520         checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
521         char[] upperCased = new char[chars.length];
522         for (int i = 0; i < chars.length; i++) {
523           upperCased[i] = Ascii.toUpperCase(chars[i]);
524         }
525         return new Alphabet(name + ".upperCase()", upperCased);
526       }
527     }
528 
529     Alphabet lowerCase() {
530       if (!hasUpperCase()) {
531         return this;
532       } else {
533         checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
534         char[] lowerCased = new char[chars.length];
535         for (int i = 0; i < chars.length; i++) {
536           lowerCased[i] = Ascii.toLowerCase(chars[i]);
537         }
538         return new Alphabet(name + ".lowerCase()", lowerCased);
539       }
540     }
541 
542     public boolean matches(char c) {
543       return c < decodabet.length && decodabet[c] != -1;
544     }
545 
546     @Override
547     public String toString() {
548       return name;
549     }
550 
551     @Override
552     public boolean equals(@Nullable Object other) {
553       if (other instanceof Alphabet) {
554         Alphabet that = (Alphabet) other;
555         return Arrays.equals(this.chars, that.chars);
556       }
557       return false;
558     }
559 
560     @Override
561     public int hashCode() {
562       return Arrays.hashCode(chars);
563     }
564   }
565 
566   static class StandardBaseEncoding extends BaseEncoding {
567     // TODO(lowasser): provide a useful toString
568     final Alphabet alphabet;
569 
570     final @Nullable Character paddingChar;
571 
572     StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
573       this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
574     }
575 
576     StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
577       this.alphabet = checkNotNull(alphabet);
578       checkArgument(
579           paddingChar == null || !alphabet.matches(paddingChar),
580           "Padding character %s was already in alphabet",
581           paddingChar);
582       this.paddingChar = paddingChar;
583     }
584 
585     @Override
586     int maxEncodedSize(int bytes) {
587       return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
588     }
589 
590     @GwtIncompatible // Writer,OutputStream
591     @Override
592     public OutputStream encodingStream(final Writer out) {
593       checkNotNull(out);
594       return new OutputStream() {
595         int bitBuffer = 0;
596         int bitBufferLength = 0;
597         int writtenChars = 0;
598 
599         @Override
600         public void write(int b) throws IOException {
601           bitBuffer <<= 8;
602           bitBuffer |= b & 0xFF;
603           bitBufferLength += 8;
604           while (bitBufferLength >= alphabet.bitsPerChar) {
605             int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
606             out.write(alphabet.encode(charIndex));
607             writtenChars++;
608             bitBufferLength -= alphabet.bitsPerChar;
609           }
610         }
611 
612         @Override
613         public void flush() throws IOException {
614           out.flush();
615         }
616 
617         @Override
618         public void close() throws IOException {
619           if (bitBufferLength > 0) {
620             int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
621             out.write(alphabet.encode(charIndex));
622             writtenChars++;
623             if (paddingChar != null) {
624               while (writtenChars % alphabet.charsPerChunk != 0) {
625                 out.write(paddingChar.charValue());
626                 writtenChars++;
627               }
628             }
629           }
630           out.close();
631         }
632       };
633     }
634 
635     @Override
636     void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
637       checkNotNull(target);
638       checkPositionIndexes(off, off + len, bytes.length);
639       for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
640         encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
641       }
642     }
643 
644     void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
645       checkNotNull(target);
646       checkPositionIndexes(off, off + len, bytes.length);
647       checkArgument(len <= alphabet.bytesPerChunk);
648       long bitBuffer = 0;
649       for (int i = 0; i < len; ++i) {
650         bitBuffer |= bytes[off + i] & 0xFF;
651         bitBuffer <<= 8; // Add additional zero byte in the end.
652       }
653       // Position of first character is length of bitBuffer minus bitsPerChar.
654       final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
655       int bitsProcessed = 0;
656       while (bitsProcessed < len * 8) {
657         int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
658         target.append(alphabet.encode(charIndex));
659         bitsProcessed += alphabet.bitsPerChar;
660       }
661       if (paddingChar != null) {
662         while (bitsProcessed < alphabet.bytesPerChunk * 8) {
663           target.append(paddingChar.charValue());
664           bitsProcessed += alphabet.bitsPerChar;
665         }
666       }
667     }
668 
669     @Override
670     int maxDecodedSize(int chars) {
671       return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
672     }
673 
674     @Override
675     CharSequence trimTrailingPadding(CharSequence chars) {
676       checkNotNull(chars);
677       if (paddingChar == null) {
678         return chars;
679       }
680       char padChar = paddingChar.charValue();
681       int l;
682       for (l = chars.length() - 1; l >= 0; l--) {
683         if (chars.charAt(l) != padChar) {
684           break;
685         }
686       }
687       return chars.subSequence(0, l + 1);
688     }
689 
690     @Override
691     public boolean canDecode(CharSequence chars) {
692       checkNotNull(chars);
693       chars = trimTrailingPadding(chars);
694       if (!alphabet.isValidPaddingStartPosition(chars.length())) {
695         return false;
696       }
697       for (int i = 0; i < chars.length(); i++) {
698         if (!alphabet.canDecode(chars.charAt(i))) {
699           return false;
700         }
701       }
702       return true;
703     }
704 
705     @Override
706     int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
707       checkNotNull(target);
708       chars = trimTrailingPadding(chars);
709       if (!alphabet.isValidPaddingStartPosition(chars.length())) {
710         throw new DecodingException("Invalid input length " + chars.length());
711       }
712       int bytesWritten = 0;
713       for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
714         long chunk = 0;
715         int charsProcessed = 0;
716         for (int i = 0; i < alphabet.charsPerChunk; i++) {
717           chunk <<= alphabet.bitsPerChar;
718           if (charIdx + i < chars.length()) {
719             chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
720           }
721         }
722         final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
723         for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
724           target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
725         }
726       }
727       return bytesWritten;
728     }
729 
730     @Override
731     @GwtIncompatible // Reader,InputStream
732     public InputStream decodingStream(final Reader reader) {
733       checkNotNull(reader);
734       return new InputStream() {
735         int bitBuffer = 0;
736         int bitBufferLength = 0;
737         int readChars = 0;
738         boolean hitPadding = false;
739 
740         @Override
741         public int read() throws IOException {
742           while (true) {
743             int readChar = reader.read();
744             if (readChar == -1) {
745               if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
746                 throw new DecodingException("Invalid input length " + readChars);
747               }
748               return -1;
749             }
750             readChars++;
751             char ch = (char) readChar;
752             if (paddingChar != null && paddingChar.charValue() == ch) {
753               if (!hitPadding
754                   && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
755                 throw new DecodingException("Padding cannot start at index " + readChars);
756               }
757               hitPadding = true;
758             } else if (hitPadding) {
759               throw new DecodingException(
760                   "Expected padding character but found '" + ch + "' at index " + readChars);
761             } else {
762               bitBuffer <<= alphabet.bitsPerChar;
763               bitBuffer |= alphabet.decode(ch);
764               bitBufferLength += alphabet.bitsPerChar;
765 
766               if (bitBufferLength >= 8) {
767                 bitBufferLength -= 8;
768                 return (bitBuffer >> bitBufferLength) & 0xFF;
769               }
770             }
771           }
772         }
773 
774         @Override
775         public int read(byte[] buf, int off, int len) throws IOException {
776           // Overriding this to work around the fact that InputStream's default implementation of
777           // this method will silently swallow exceptions thrown by the single-byte read() method
778           // (other than on the first call to it), which in this case can cause invalid encoded
779           // strings to not throw an exception.
780           // See https://github.com/google/guava/issues/3542
781           checkPositionIndexes(off, off + len, buf.length);
782 
783           int i = off;
784           for (; i < off + len; i++) {
785             int b = read();
786             if (b == -1) {
787               int read = i - off;
788               return read == 0 ? -1 : read;
789             }
790             buf[i] = (byte) b;
791           }
792           return i - off;
793         }
794 
795         @Override
796         public void close() throws IOException {
797           reader.close();
798         }
799       };
800     }
801 
802     @Override
803     public BaseEncoding omitPadding() {
804       return (paddingChar == null) ? this : newInstance(alphabet, null);
805     }
806 
807     @Override
808     public BaseEncoding withPadChar(char padChar) {
809       if (8 % alphabet.bitsPerChar == 0
810           || (paddingChar != null && paddingChar.charValue() == padChar)) {
811         return this;
812       } else {
813         return newInstance(alphabet, padChar);
814       }
815     }
816 
817     @Override
818     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
819       for (int i = 0; i < separator.length(); i++) {
820         checkArgument(
821             !alphabet.matches(separator.charAt(i)),
822             "Separator (%s) cannot contain alphabet characters",
823             separator);
824       }
825       if (paddingChar != null) {
826         checkArgument(
827             separator.indexOf(paddingChar.charValue()) < 0,
828             "Separator (%s) cannot contain padding character",
829             separator);
830       }
831       return new SeparatedBaseEncoding(this, separator, afterEveryChars);
832     }
833 
834     @LazyInit private transient @Nullable BaseEncoding upperCase;
835     @LazyInit private transient @Nullable BaseEncoding lowerCase;
836 
837     @Override
838     public BaseEncoding upperCase() {
839       BaseEncoding result = upperCase;
840       if (result == null) {
841         Alphabet upper = alphabet.upperCase();
842         result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
843       }
844       return result;
845     }
846 
847     @Override
848     public BaseEncoding lowerCase() {
849       BaseEncoding result = lowerCase;
850       if (result == null) {
851         Alphabet lower = alphabet.lowerCase();
852         result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
853       }
854       return result;
855     }
856 
857     BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
858       return new StandardBaseEncoding(alphabet, paddingChar);
859     }
860 
861     @Override
862     public String toString() {
863       StringBuilder builder = new StringBuilder("BaseEncoding.");
864       builder.append(alphabet.toString());
865       if (8 % alphabet.bitsPerChar != 0) {
866         if (paddingChar == null) {
867           builder.append(".omitPadding()");
868         } else {
869           builder.append(".withPadChar('").append(paddingChar).append("')");
870         }
871       }
872       return builder.toString();
873     }
874 
875     @Override
876     public boolean equals(@Nullable Object other) {
877       if (other instanceof StandardBaseEncoding) {
878         StandardBaseEncoding that = (StandardBaseEncoding) other;
879         return this.alphabet.equals(that.alphabet)
880             && Objects.equal(this.paddingChar, that.paddingChar);
881       }
882       return false;
883     }
884 
885     @Override
886     public int hashCode() {
887       return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
888     }
889   }
890 
891   static final class Base16Encoding extends StandardBaseEncoding {
892     final char[] encoding = new char[512];
893 
894     Base16Encoding(String name, String alphabetChars) {
895       this(new Alphabet(name, alphabetChars.toCharArray()));
896     }
897 
898     private Base16Encoding(Alphabet alphabet) {
899       super(alphabet, null);
900       checkArgument(alphabet.chars.length == 16);
901       for (int i = 0; i < 256; ++i) {
902         encoding[i] = alphabet.encode(i >>> 4);
903         encoding[i | 0x100] = alphabet.encode(i & 0xF);
904       }
905     }
906 
907     @Override
908     void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
909       checkNotNull(target);
910       checkPositionIndexes(off, off + len, bytes.length);
911       for (int i = 0; i < len; ++i) {
912         int b = bytes[off + i] & 0xFF;
913         target.append(encoding[b]);
914         target.append(encoding[b | 0x100]);
915       }
916     }
917 
918     @Override
919     int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
920       checkNotNull(target);
921       if (chars.length() % 2 == 1) {
922         throw new DecodingException("Invalid input length " + chars.length());
923       }
924       int bytesWritten = 0;
925       for (int i = 0; i < chars.length(); i += 2) {
926         int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
927         target[bytesWritten++] = (byte) decoded;
928       }
929       return bytesWritten;
930     }
931 
932     @Override
933     BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
934       return new Base16Encoding(alphabet);
935     }
936   }
937 
938   static final class Base64Encoding extends StandardBaseEncoding {
939     Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
940       this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
941     }
942 
943     private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
944       super(alphabet, paddingChar);
945       checkArgument(alphabet.chars.length == 64);
946     }
947 
948     @Override
949     void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
950       checkNotNull(target);
951       checkPositionIndexes(off, off + len, bytes.length);
952       int i = off;
953       for (int remaining = len; remaining >= 3; remaining -= 3) {
954         int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
955         target.append(alphabet.encode(chunk >>> 18));
956         target.append(alphabet.encode((chunk >>> 12) & 0x3F));
957         target.append(alphabet.encode((chunk >>> 6) & 0x3F));
958         target.append(alphabet.encode(chunk & 0x3F));
959       }
960       if (i < off + len) {
961         encodeChunkTo(target, bytes, i, off + len - i);
962       }
963     }
964 
965     @Override
966     int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
967       checkNotNull(target);
968       chars = trimTrailingPadding(chars);
969       if (!alphabet.isValidPaddingStartPosition(chars.length())) {
970         throw new DecodingException("Invalid input length " + chars.length());
971       }
972       int bytesWritten = 0;
973       for (int i = 0; i < chars.length(); ) {
974         int chunk = alphabet.decode(chars.charAt(i++)) << 18;
975         chunk |= alphabet.decode(chars.charAt(i++)) << 12;
976         target[bytesWritten++] = (byte) (chunk >>> 16);
977         if (i < chars.length()) {
978           chunk |= alphabet.decode(chars.charAt(i++)) << 6;
979           target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
980           if (i < chars.length()) {
981             chunk |= alphabet.decode(chars.charAt(i++));
982             target[bytesWritten++] = (byte) (chunk & 0xFF);
983           }
984         }
985       }
986       return bytesWritten;
987     }
988 
989     @Override
990     BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
991       return new Base64Encoding(alphabet, paddingChar);
992     }
993   }
994 
995   @GwtIncompatible
996   static Reader ignoringReader(final Reader delegate, final String toIgnore) {
997     checkNotNull(delegate);
998     checkNotNull(toIgnore);
999     return new Reader() {
1000       @Override
1001       public int read() throws IOException {
1002         int readChar;
1003         do {
1004           readChar = delegate.read();
1005         } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1006         return readChar;
1007       }
1008 
1009       @Override
1010       public int read(char[] cbuf, int off, int len) throws IOException {
1011         throw new UnsupportedOperationException();
1012       }
1013 
1014       @Override
1015       public void close() throws IOException {
1016         delegate.close();
1017       }
1018     };
1019   }
1020 
1021   static Appendable separatingAppendable(
1022       final Appendable delegate, final String separator, final int afterEveryChars) {
1023     checkNotNull(delegate);
1024     checkNotNull(separator);
1025     checkArgument(afterEveryChars > 0);
1026     return new Appendable() {
1027       int charsUntilSeparator = afterEveryChars;
1028 
1029       @Override
1030       public Appendable append(char c) throws IOException {
1031         if (charsUntilSeparator == 0) {
1032           delegate.append(separator);
1033           charsUntilSeparator = afterEveryChars;
1034         }
1035         delegate.append(c);
1036         charsUntilSeparator--;
1037         return this;
1038       }
1039 
1040       @Override
1041       public Appendable append(@Nullable CharSequence chars, int off, int len) throws IOException {
1042         throw new UnsupportedOperationException();
1043       }
1044 
1045       @Override
1046       public Appendable append(@Nullable CharSequence chars) throws IOException {
1047         throw new UnsupportedOperationException();
1048       }
1049     };
1050   }
1051 
1052   @GwtIncompatible // Writer
1053   static Writer separatingWriter(
1054       final Writer delegate, final String separator, final int afterEveryChars) {
1055     final Appendable separatingAppendable =
1056         separatingAppendable(delegate, separator, afterEveryChars);
1057     return new Writer() {
1058       @Override
1059       public void write(int c) throws IOException {
1060         separatingAppendable.append((char) c);
1061       }
1062 
1063       @Override
1064       public void write(char[] chars, int off, int len) throws IOException {
1065         throw new UnsupportedOperationException();
1066       }
1067 
1068       @Override
1069       public void flush() throws IOException {
1070         delegate.flush();
1071       }
1072 
1073       @Override
1074       public void close() throws IOException {
1075         delegate.close();
1076       }
1077     };
1078   }
1079 
1080   static final class SeparatedBaseEncoding extends BaseEncoding {
1081     private final BaseEncoding delegate;
1082     private final String separator;
1083     private final int afterEveryChars;
1084 
1085     SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1086       this.delegate = checkNotNull(delegate);
1087       this.separator = checkNotNull(separator);
1088       this.afterEveryChars = afterEveryChars;
1089       checkArgument(
1090           afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1091     }
1092 
1093     @Override
1094     CharSequence trimTrailingPadding(CharSequence chars) {
1095       return delegate.trimTrailingPadding(chars);
1096     }
1097 
1098     @Override
1099     int maxEncodedSize(int bytes) {
1100       int unseparatedSize = delegate.maxEncodedSize(bytes);
1101       return unseparatedSize
1102           + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1103     }
1104 
1105     @GwtIncompatible // Writer,OutputStream
1106     @Override
1107     public OutputStream encodingStream(final Writer output) {
1108       return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1109     }
1110 
1111     @Override
1112     void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1113       delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1114     }
1115 
1116     @Override
1117     int maxDecodedSize(int chars) {
1118       return delegate.maxDecodedSize(chars);
1119     }
1120 
1121     @Override
1122     public boolean canDecode(CharSequence chars) {
1123       StringBuilder builder = new StringBuilder();
1124       for (int i = 0; i < chars.length(); i++) {
1125         char c = chars.charAt(i);
1126         if (separator.indexOf(c) < 0) {
1127           builder.append(c);
1128         }
1129       }
1130       return delegate.canDecode(builder);
1131     }
1132 
1133     @Override
1134     int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1135       StringBuilder stripped = new StringBuilder(chars.length());
1136       for (int i = 0; i < chars.length(); i++) {
1137         char c = chars.charAt(i);
1138         if (separator.indexOf(c) < 0) {
1139           stripped.append(c);
1140         }
1141       }
1142       return delegate.decodeTo(target, stripped);
1143     }
1144 
1145     @Override
1146     @GwtIncompatible // Reader,InputStream
1147     public InputStream decodingStream(final Reader reader) {
1148       return delegate.decodingStream(ignoringReader(reader, separator));
1149     }
1150 
1151     @Override
1152     public BaseEncoding omitPadding() {
1153       return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1154     }
1155 
1156     @Override
1157     public BaseEncoding withPadChar(char padChar) {
1158       return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1159     }
1160 
1161     @Override
1162     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1163       throw new UnsupportedOperationException("Already have a separator");
1164     }
1165 
1166     @Override
1167     public BaseEncoding upperCase() {
1168       return delegate.upperCase().withSeparator(separator, afterEveryChars);
1169     }
1170 
1171     @Override
1172     public BaseEncoding lowerCase() {
1173       return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1174     }
1175 
1176     @Override
1177     public String toString() {
1178       return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1179     }
1180   }
1181 }
1182