1 /* ==================================================================== 2 * Copyright (c) 2006 J.T. Beetstra 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * ==================================================================== 23 */ 24 25 package com.beetstra.jutf7; 26 27 import java.util.Arrays; 28 29 /** 30 * <p> 31 * Represent a base 64 mapping. The 64 characters used in the encoding can be 32 * specified, since modified-UTF-7 uses other characters than UTF-7 (',' instead 33 * of '/'). 34 * </p> 35 * <p> 36 * The exact type of the arguments and result values is adapted to the needs of 37 * the encoder and decoder, as opposed to following a strict interpretation of 38 * base 64. 39 * </p> 40 * <p> 41 * Base 64, as specified in RFC 2045, is an encoding used to encode bytes as 42 * characters. In (modified-)UTF-7 however, it is used to encode characters as 43 * bytes, using some intermediate steps: 44 * </p> 45 * <ol> 46 * <li>Encode all characters as a 16-bit (UTF-16) integer value</li> 47 * <li>Write this as stream of bytes (most-significant first)</li> 48 * <li>Encode these bytes using (modified) base 64 encoding</li> 49 * <li>Write the thus formed stream of characters as a stream of bytes, using 50 * ASCII encoding</li> 51 * </ol> 52 * 53 * @author Jaap Beetstra 54 */ 55 class Base64Util { 56 private static final int ALPHABET_LENGTH = 64; 57 private final char[] alphabet; 58 private final int[] inverseAlphabet; 59 60 /** 61 * Initializes the class with the specified encoding/decoding alphabet. 62 * 63 * @param alphabet 64 * @throws IllegalArgumentException if alphabet is not 64 characters long or 65 * contains characters which are not 7-bit ASCII 66 */ Base64Util(final String alphabet)67 Base64Util(final String alphabet) { 68 this.alphabet = alphabet.toCharArray(); 69 if (alphabet.length() != ALPHABET_LENGTH) 70 throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not " 71 + alphabet.length() + ")"); 72 inverseAlphabet = new int[128]; 73 Arrays.fill(inverseAlphabet, -1); 74 for (int i = 0; i < this.alphabet.length; i++) { 75 final char ch = this.alphabet[i]; 76 if (ch >= 128) 77 throw new IllegalArgumentException("invalid character in alphabet: " + ch); 78 inverseAlphabet[ch] = i; 79 } 80 } 81 82 /** 83 * Returns the integer value of the six bits represented by the specified 84 * character. 85 * 86 * @param ch The character, as a ASCII encoded byte 87 * @return The six bits, as an integer value, or -1 if the byte is not in 88 * the alphabet 89 */ getSextet(final byte ch)90 int getSextet(final byte ch) { 91 if (ch >= 128) 92 return -1; 93 return inverseAlphabet[ch]; 94 } 95 96 /** 97 * Tells whether the alphabet contains the specified character. 98 * 99 * @param ch The character 100 * @return true if the alphabet contains <code>ch</code>, false otherwise 101 */ contains(final char ch)102 boolean contains(final char ch) { 103 if (ch >= 128) 104 return false; 105 return inverseAlphabet[ch] >= 0; 106 } 107 108 /** 109 * Encodes the six bit group as a character. 110 * 111 * @param sextet The six bit group to be encoded 112 * @return The ASCII value of the character 113 */ getChar(final int sextet)114 byte getChar(final int sextet) { 115 return (byte)alphabet[sextet]; 116 } 117 } 118