1 package SQLite;
2 
3 /**
4  * String encoder/decoder for SQLite.
5  *
6  * This module was kindly donated by Eric van der Maarel of Nedap N.V.
7  *
8  * This encoder was implemented based on an original idea from an anonymous
9  * author in the source code of the SQLite distribution.
10  * I feel obliged to provide a quote from the original C-source code:
11  *
12  * "The author disclaims copyright to this source code.  In place of
13  *  a legal notice, here is a blessing:
14  *
15  *     May you do good and not evil.
16  *     May you find forgiveness for yourself and forgive others.
17  *     May you share freely, never taking more than you give."
18  *
19  */
20 
21 public class StringEncoder {
22 
23     /**
24      * Encodes the given byte array into a string that can be used by
25      * the SQLite database. The database cannot handle null (0x00) and
26      * the character '\'' (0x27). The encoding consists of escaping
27      * these characters with a reserved character (0x01). The escaping
28      * is applied after determining and applying a shift that minimizes
29      * the number of escapes required.
30      * With this encoding the data of original size n is increased to a
31      * maximum of 1+(n*257)/254.
32      * For sufficiently large n the overhead is thus less than 1.2%.
33      * @param a the byte array to be encoded. A null reference is handled as
34      *     an empty array.
35      * @return the encoded bytes as a string. When an empty array is
36      *     provided a string of length 1 is returned, the value of
37      *     which is bogus.
38      *     When decoded with this class' <code>decode</code> method
39      *     a string of size 1 will return an empty byte array.
40      */
41 
encode(byte[] a)42     public static String encode(byte[] a) {
43 	// check input
44 	if (a == null || a.length == 0) {
45 	    // bogus shift, no data
46 	    return "x";
47 	}
48 	// determine count
49 	int[] cnt = new int[256];
50 	for (int i = 0 ; i < a.length; i++) {
51 	    cnt[a[i] & 0xff]++;
52 	}
53 	// determine shift for minimum number of escapes
54 	int shift = 1;
55 	int nEscapes = a.length;
56 	for (int i = 1; i < 256; i++) {
57 	    if (i == '\'') {
58 		continue;
59 	    }
60 	    int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff];
61 	    if (sum < nEscapes) {
62 		nEscapes = sum;
63 		shift = i;
64 		if (nEscapes == 0) {
65 		    // cannot become smaller
66 		    break;
67 		}
68 	    }
69 	}
70 	// construct encoded output
71 	int outLen = a.length + nEscapes + 1;
72 	StringBuffer out = new StringBuffer(outLen);
73 	out.append((char)shift);
74 	for (int i = 0; i < a.length; i++) {
75 	    // apply shift
76 	    char c = (char)((a[i] - shift)&0xff);
77 	    // insert escapes
78 	    if (c == 0) { // forbidden
79 		out.append((char)1);
80 		out.append((char)1);
81 	    } else if (c == 1) { // escape character
82 		out.append((char)1);
83 		out.append((char)2);
84 	    } else if (c == '\'') { // forbidden
85 		out.append((char)1);
86 		out.append((char)3);
87 	    } else {
88 		out.append(c);
89 	    }
90 	}
91 	return out.toString();
92     }
93 
94     /**
95      * Decodes the given string that is assumed to be a valid encoding
96      * of a byte array. Typically the given string is generated by
97      * this class' <code>encode</code> method.
98      * @param s the given string encoding.
99      * @return the byte array obtained from the decoding.
100      * @throws IllegalArgumentException when the string given is not
101      *    a valid encoded string for this encoder.
102      */
103 
decode(String s)104     public static byte[] decode(String s) {
105 	char[] a = s.toCharArray();
106 	if (a.length > 2 && a[0] == 'X' &&
107 	    a[1] == '\'' && a[a.length-1] == '\'') {
108 	    // SQLite3 BLOB syntax
109 	    byte[] result = new byte[(a.length-3)/2];
110 	    for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) {
111 		byte tmp;
112 		switch (a[i]) {
113 		case '0': tmp = 0; break;
114 		case '1': tmp = 1; break;
115 		case '2': tmp = 2; break;
116 		case '3': tmp = 3; break;
117 		case '4': tmp = 4; break;
118 		case '5': tmp = 5; break;
119 		case '6': tmp = 6; break;
120 		case '7': tmp = 7; break;
121 		case '8': tmp = 8; break;
122 		case '9': tmp = 9; break;
123 		case 'A':
124 		case 'a': tmp = 10; break;
125 		case 'B':
126 		case 'b': tmp = 11; break;
127 		case 'C':
128 		case 'c': tmp = 12; break;
129 		case 'D':
130 		case 'd': tmp = 13; break;
131 		case 'E':
132 		case 'e': tmp = 14; break;
133 		case 'F':
134 		case 'f': tmp = 15; break;
135 		default:  tmp = 0; break;
136 		}
137 		result[k] = (byte) (tmp << 4);
138 		switch (a[i+1]) {
139 		case '0': tmp = 0; break;
140 		case '1': tmp = 1; break;
141 		case '2': tmp = 2; break;
142 		case '3': tmp = 3; break;
143 		case '4': tmp = 4; break;
144 		case '5': tmp = 5; break;
145 		case '6': tmp = 6; break;
146 		case '7': tmp = 7; break;
147 		case '8': tmp = 8; break;
148 		case '9': tmp = 9; break;
149 		case 'A':
150 		case 'a': tmp = 10; break;
151 		case 'B':
152 		case 'b': tmp = 11; break;
153 		case 'C':
154 		case 'c': tmp = 12; break;
155 		case 'D':
156 		case 'd': tmp = 13; break;
157 		case 'E':
158 		case 'e': tmp = 14; break;
159 		case 'F':
160 		case 'f': tmp = 15; break;
161 		default:  tmp = 0; break;
162 		}
163 		result[k] |= tmp;
164 	    }
165 	    return result;
166 	}
167 	// first element is the shift
168 	byte[] result = new byte[a.length-1];
169 	int i = 0;
170 	int shift = s.charAt(i++);
171 	int j = 0;
172 	while (i < s.length()) {
173 	    int c;
174 	    if ((c = s.charAt(i++)) == 1) { // escape character found
175 		if ((c = s.charAt(i++)) == 1) {
176 		    c = 0;
177 		} else if (c == 2) {
178 		    c = 1;
179 		} else if (c == 3) {
180 		    c = '\'';
181 		} else {
182 		    throw new IllegalArgumentException(
183 			"invalid string passed to decoder: " + j);
184 		}
185 	    }
186 	    // do shift
187 	    result[j++] = (byte)((c + shift) & 0xff);
188 	}
189 	int outLen = j;
190 	// provide array of correct length
191 	if (result.length != outLen) {
192 	    result = byteCopy(result, 0, outLen, new byte[outLen]);
193 	}
194 	return result;
195     }
196 
197     /**
198      * Copies count elements from source, starting at element with
199      * index offset, to the given target.
200      * @param source the source.
201      * @param offset the offset.
202      * @param count the number of elements to be copied.
203      * @param target the target to be returned.
204      * @return the target being copied to.
205      */
206 
byteCopy(byte[] source, int offset, int count, byte[] target)207     private static byte[] byteCopy(byte[] source, int offset,
208 				   int count, byte[] target) {
209 	for (int i = offset, j = 0; i < offset + count; i++, j++) {
210 	    target[j] = source[i];
211 	}
212 	return target;
213     }
214 
215 
216     static final char[] xdigits = {
217 	'0', '1', '2', '3', '4', '5', '6', '7',
218 	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
219     };
220 
221     /**
222      * Encodes the given byte array into SQLite3 blob notation, ie X'..'
223      * @param a the byte array to be encoded. A null reference is handled as
224      *     an empty array.
225      * @return the encoded bytes as a string.
226      */
227 
encodeX(byte[] a)228     public static String encodeX(byte[] a) {
229 	// check input
230 	if (a == null || a.length == 0) {
231 	    return "X''";
232 	}
233 	int outLen = a.length * 2 + 3;
234 	StringBuffer out = new StringBuffer(outLen);
235 	out.append('X');
236 	out.append('\'');
237 	for (int i = 0; i < a.length; i++) {
238 	    out.append(xdigits[(a[i] >> 4) & 0x0F]);
239 	    out.append(xdigits[a[i] & 0x0F]);
240 	}
241 	out.append('\'');
242 	return out.toString();
243     }
244 }
245