1 /* ====================================================================
2  * Copyright (c) 2006 J.T. Beetstra
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  * ====================================================================
23  */
24 
25 package com.beetstra.jutf7;
26 
27 import java.util.Arrays;
28 
29 /**
30  * <p>
31  * Represent a base 64 mapping. The 64 characters used in the encoding can be
32  * specified, since modified-UTF-7 uses other characters than UTF-7 (',' instead
33  * of '/').
34  * </p>
35  * <p>
36  * The exact type of the arguments and result values is adapted to the needs of
37  * the encoder and decoder, as opposed to following a strict interpretation of
38  * base 64.
39  * </p>
40  * <p>
41  * Base 64, as specified in RFC 2045, is an encoding used to encode bytes as
42  * characters. In (modified-)UTF-7 however, it is used to encode characters as
43  * bytes, using some intermediate steps:
44  * </p>
45  * <ol>
46  * <li>Encode all characters as a 16-bit (UTF-16) integer value</li>
47  * <li>Write this as stream of bytes (most-significant first)</li>
48  * <li>Encode these bytes using (modified) base 64 encoding</li>
49  * <li>Write the thus formed stream of characters as a stream of bytes, using
50  * ASCII encoding</li>
51  * </ol>
52  *
53  * @author Jaap Beetstra
54  */
55 class Base64Util {
56     private static final int ALPHABET_LENGTH = 64;
57     private final char[] alphabet;
58     private final int[] inverseAlphabet;
59 
60     /**
61      * Initializes the class with the specified encoding/decoding alphabet.
62      *
63      * @param alphabet
64      * @throws IllegalArgumentException if alphabet is not 64 characters long or
65      *             contains characters which are not 7-bit ASCII
66      */
Base64Util(final String alphabet)67     Base64Util(final String alphabet) {
68         this.alphabet = alphabet.toCharArray();
69         if (alphabet.length() != ALPHABET_LENGTH)
70             throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not "
71                     + alphabet.length() + ")");
72         inverseAlphabet = new int[128];
73         Arrays.fill(inverseAlphabet, -1);
74         for (int i = 0; i < this.alphabet.length; i++) {
75             final char ch = this.alphabet[i];
76             if (ch >= 128)
77                 throw new IllegalArgumentException("invalid character in alphabet: " + ch);
78             inverseAlphabet[ch] = i;
79         }
80     }
81 
82     /**
83      * Returns the integer value of the six bits represented by the specified
84      * character.
85      *
86      * @param ch The character, as a ASCII encoded byte
87      * @return The six bits, as an integer value, or -1 if the byte is not in
88      *         the alphabet
89      */
getSextet(final byte ch)90     int getSextet(final byte ch) {
91         if (ch >= 128)
92             return -1;
93         return inverseAlphabet[ch];
94     }
95 
96     /**
97      * Tells whether the alphabet contains the specified character.
98      *
99      * @param ch The character
100      * @return true if the alphabet contains <code>ch</code>, false otherwise
101      */
contains(final char ch)102     boolean contains(final char ch) {
103         if (ch >= 128)
104             return false;
105         return inverseAlphabet[ch] >= 0;
106     }
107 
108     /**
109      * Encodes the six bit group as a character.
110      *
111      * @param sextet The six bit group to be encoded
112      * @return The ASCII value of the character
113      */
getChar(final int sextet)114     byte getChar(final int sextet) {
115         return (byte)alphabet[sextet];
116     }
117 }
118