1 package SQLite; 2 3 /** 4 * String encoder/decoder for SQLite. 5 * 6 * This module was kindly donated by Eric van der Maarel of Nedap N.V. 7 * 8 * This encoder was implemented based on an original idea from an anonymous 9 * author in the source code of the SQLite distribution. 10 * I feel obliged to provide a quote from the original C-source code: 11 * 12 * "The author disclaims copyright to this source code. In place of 13 * a legal notice, here is a blessing: 14 * 15 * May you do good and not evil. 16 * May you find forgiveness for yourself and forgive others. 17 * May you share freely, never taking more than you give." 18 * 19 */ 20 21 public class StringEncoder { 22 23 /** 24 * Encodes the given byte array into a string that can be used by 25 * the SQLite database. The database cannot handle null (0x00) and 26 * the character '\'' (0x27). The encoding consists of escaping 27 * these characters with a reserved character (0x01). The escaping 28 * is applied after determining and applying a shift that minimizes 29 * the number of escapes required. 30 * With this encoding the data of original size n is increased to a 31 * maximum of 1+(n*257)/254. 32 * For sufficiently large n the overhead is thus less than 1.2%. 33 * @param a the byte array to be encoded. A null reference is handled as 34 * an empty array. 35 * @return the encoded bytes as a string. When an empty array is 36 * provided a string of length 1 is returned, the value of 37 * which is bogus. 38 * When decoded with this class' <code>decode</code> method 39 * a string of size 1 will return an empty byte array. 40 */ 41 encode(byte[] a)42 public static String encode(byte[] a) { 43 // check input 44 if (a == null || a.length == 0) { 45 // bogus shift, no data 46 return "x"; 47 } 48 // determine count 49 int[] cnt = new int[256]; 50 for (int i = 0 ; i < a.length; i++) { 51 cnt[a[i] & 0xff]++; 52 } 53 // determine shift for minimum number of escapes 54 int shift = 1; 55 int nEscapes = a.length; 56 for (int i = 1; i < 256; i++) { 57 if (i == '\'') { 58 continue; 59 } 60 int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff]; 61 if (sum < nEscapes) { 62 nEscapes = sum; 63 shift = i; 64 if (nEscapes == 0) { 65 // cannot become smaller 66 break; 67 } 68 } 69 } 70 // construct encoded output 71 int outLen = a.length + nEscapes + 1; 72 StringBuffer out = new StringBuffer(outLen); 73 out.append((char)shift); 74 for (int i = 0; i < a.length; i++) { 75 // apply shift 76 char c = (char)((a[i] - shift)&0xff); 77 // insert escapes 78 if (c == 0) { // forbidden 79 out.append((char)1); 80 out.append((char)1); 81 } else if (c == 1) { // escape character 82 out.append((char)1); 83 out.append((char)2); 84 } else if (c == '\'') { // forbidden 85 out.append((char)1); 86 out.append((char)3); 87 } else { 88 out.append(c); 89 } 90 } 91 return out.toString(); 92 } 93 94 /** 95 * Decodes the given string that is assumed to be a valid encoding 96 * of a byte array. Typically the given string is generated by 97 * this class' <code>encode</code> method. 98 * @param s the given string encoding. 99 * @return the byte array obtained from the decoding. 100 * @throws IllegalArgumentException when the string given is not 101 * a valid encoded string for this encoder. 102 */ 103 decode(String s)104 public static byte[] decode(String s) { 105 char[] a = s.toCharArray(); 106 if (a.length > 2 && a[0] == 'X' && 107 a[1] == '\'' && a[a.length-1] == '\'') { 108 // SQLite3 BLOB syntax 109 byte[] result = new byte[(a.length-3)/2]; 110 for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) { 111 byte tmp; 112 switch (a[i]) { 113 case '0': tmp = 0; break; 114 case '1': tmp = 1; break; 115 case '2': tmp = 2; break; 116 case '3': tmp = 3; break; 117 case '4': tmp = 4; break; 118 case '5': tmp = 5; break; 119 case '6': tmp = 6; break; 120 case '7': tmp = 7; break; 121 case '8': tmp = 8; break; 122 case '9': tmp = 9; break; 123 case 'A': 124 case 'a': tmp = 10; break; 125 case 'B': 126 case 'b': tmp = 11; break; 127 case 'C': 128 case 'c': tmp = 12; break; 129 case 'D': 130 case 'd': tmp = 13; break; 131 case 'E': 132 case 'e': tmp = 14; break; 133 case 'F': 134 case 'f': tmp = 15; break; 135 default: tmp = 0; break; 136 } 137 result[k] = (byte) (tmp << 4); 138 switch (a[i+1]) { 139 case '0': tmp = 0; break; 140 case '1': tmp = 1; break; 141 case '2': tmp = 2; break; 142 case '3': tmp = 3; break; 143 case '4': tmp = 4; break; 144 case '5': tmp = 5; break; 145 case '6': tmp = 6; break; 146 case '7': tmp = 7; break; 147 case '8': tmp = 8; break; 148 case '9': tmp = 9; break; 149 case 'A': 150 case 'a': tmp = 10; break; 151 case 'B': 152 case 'b': tmp = 11; break; 153 case 'C': 154 case 'c': tmp = 12; break; 155 case 'D': 156 case 'd': tmp = 13; break; 157 case 'E': 158 case 'e': tmp = 14; break; 159 case 'F': 160 case 'f': tmp = 15; break; 161 default: tmp = 0; break; 162 } 163 result[k] |= tmp; 164 } 165 return result; 166 } 167 // first element is the shift 168 byte[] result = new byte[a.length-1]; 169 int i = 0; 170 int shift = s.charAt(i++); 171 int j = 0; 172 while (i < s.length()) { 173 int c; 174 if ((c = s.charAt(i++)) == 1) { // escape character found 175 if ((c = s.charAt(i++)) == 1) { 176 c = 0; 177 } else if (c == 2) { 178 c = 1; 179 } else if (c == 3) { 180 c = '\''; 181 } else { 182 throw new IllegalArgumentException( 183 "invalid string passed to decoder: " + j); 184 } 185 } 186 // do shift 187 result[j++] = (byte)((c + shift) & 0xff); 188 } 189 int outLen = j; 190 // provide array of correct length 191 if (result.length != outLen) { 192 result = byteCopy(result, 0, outLen, new byte[outLen]); 193 } 194 return result; 195 } 196 197 /** 198 * Copies count elements from source, starting at element with 199 * index offset, to the given target. 200 * @param source the source. 201 * @param offset the offset. 202 * @param count the number of elements to be copied. 203 * @param target the target to be returned. 204 * @return the target being copied to. 205 */ 206 byteCopy(byte[] source, int offset, int count, byte[] target)207 private static byte[] byteCopy(byte[] source, int offset, 208 int count, byte[] target) { 209 for (int i = offset, j = 0; i < offset + count; i++, j++) { 210 target[j] = source[i]; 211 } 212 return target; 213 } 214 215 216 static final char[] xdigits = { 217 '0', '1', '2', '3', '4', '5', '6', '7', 218 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 219 }; 220 221 /** 222 * Encodes the given byte array into SQLite3 blob notation, ie X'..' 223 * @param a the byte array to be encoded. A null reference is handled as 224 * an empty array. 225 * @return the encoded bytes as a string. 226 */ 227 encodeX(byte[] a)228 public static String encodeX(byte[] a) { 229 // check input 230 if (a == null || a.length == 0) { 231 return "X''"; 232 } 233 int outLen = a.length * 2 + 3; 234 StringBuffer out = new StringBuffer(outLen); 235 out.append('X'); 236 out.append('\''); 237 for (int i = 0; i < a.length; i++) { 238 out.append(xdigits[(a[i] >> 4) & 0x0F]); 239 out.append(xdigits[a[i] & 0x0F]); 240 } 241 out.append('\''); 242 return out.toString(); 243 } 244 } 245