1 /* ================================================================ */
2 /*
3 File:   ConvertUTF7.c
4 Author: David B. Goldsmith
5 Copyright (C) 1994, 1996 IBM Corporation All rights reserved.
6 Revisions: Header update only July, 2001.
7 
8 This code is copyrighted. Under the copyright laws, this code may not
9 be copied, in whole or part, without prior written consent of IBM Corporation.
10 
11 IBM Corporation grants the right to use this code as long as this ENTIRE
12 copyright notice is reproduced in the code.  The code is provided
13 AS-IS, AND IBM CORPORATION DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
14 IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF
15 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT
16 WILL IBM CORPORATION BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
17 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
18 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
19 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
20 IF IBM CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
21 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
22 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
23 LIMITATION MAY NOT APPLY TO YOU.
24 
25 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
26 government is subject to restrictions as set forth in subparagraph
27 (c)(l)(ii) of the Rights in Technical Data and Computer Software
28 clause at DFARS 252.227-7013 and FAR 52.227-19.
29 
30 This code may be protected by one or more U.S. and International
31 Patents.
32 
33 */
34 
35 #include "CVTUTF7.H"
36 
37 static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
38 static short invbase64[128];
39 
40 static char direct[] =
41 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
42 static char optional[] = "!\"#$%&*;<=>@[]^_`{|}";
43 static char spaces[] = " \011\015\012";		/* space, tab, return, line feed */
44 static char mustshiftsafe[128];
45 static char mustshiftopt[128];
46 
47 static int needtables = 1;
48 
49 #define SHIFT_IN '+'
50 #define SHIFT_OUT '-'
51 
52 static void
tabinit()53 tabinit()
54 {
55 	int i, limit;
56 
57 	for (i = 0; i < 128; ++i)
58 	{
59 		mustshiftopt[i] = mustshiftsafe[i] = 1;
60 		invbase64[i] = -1;
61 	}
62 	limit = strlen(direct);
63 	for (i = 0; i < limit; ++i)
64 		mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0;
65 	limit = strlen(spaces);
66 	for (i = 0; i < limit; ++i)
67 		mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0;
68 	limit = strlen(optional);
69 	for (i = 0; i < limit; ++i)
70 		mustshiftopt[optional[i]] = 0;
71 	limit = strlen(base64);
72 	for (i = 0; i < limit; ++i)
73 		invbase64[base64[i]] = i;
74 
75 	needtables = 0;
76 }
77 
78 #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0
79 #define BITS_IN_BUFFER bufferbits
80 #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) )
81 #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp)
82 #define TARGETCHECK  {if (target >= targetEnd) {result = targetExhausted; break;}}
83 
ConvertUCS2toUTF7(UCS2 ** sourceStart,UCS2 * sourceEnd,char ** targetStart,char * targetEnd,int optional,int verbose)84 ConversionResult ConvertUCS2toUTF7(
85                 UCS2** sourceStart, UCS2* sourceEnd,
86                 char** targetStart, char* targetEnd,
87                 int optional, int verbose)
88 {
89 	ConversionResult result = ok;
90 	DECLARE_BIT_BUFFER;
91 	int shifted = 0, needshift = 0, done = 0;
92 	register UCS2 *source = *sourceStart;
93 	register char *target = *targetStart;
94 	char *mustshift;
95 
96 	if (needtables)
97 		tabinit();
98 
99 	if (optional)
100 		mustshift = mustshiftopt;
101 	else
102 		mustshift = mustshiftsafe;
103 
104 	do
105 	{
106 		register UCS2 r;
107 
108 		if (!(done = (source >= sourceEnd)))
109 			r = *source++;
110 		needshift = (!done && ((r > 0x7f) || mustshift[r]));
111 
112 		if (needshift && !shifted)
113 		{
114 			TARGETCHECK;
115 			*target++ = SHIFT_IN;
116 			/* Special case handling of the SHIFT_IN character */
117 			if (r == (UCS2)SHIFT_IN) {
118 				TARGETCHECK;
119 				*target++ = SHIFT_OUT;
120 			}
121 			else
122 				shifted = 1;
123 		}
124 
125 		if (shifted)
126 		{
127 			/* Either write the character to the bit buffer, or pad
128 			   the bit buffer out to a full base64 character.
129 			 */
130 			if (needshift)
131 				WRITE_N_BITS(r, 16);
132 			else
133 				WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6);
134 
135 			/* Flush out as many full base64 characters as possible
136 			   from the bit buffer.
137 			 */
138 			while ((target < targetEnd) && BITS_IN_BUFFER >= 6)
139 			{
140 				*target++ = base64[READ_N_BITS(6)];
141 			}
142 
143 			if (BITS_IN_BUFFER >= 6)
144 				TARGETCHECK;
145 
146 			if (!needshift)
147 			{
148 				/* Write the explicit shift out character if
149 				   1) The caller has requested we always do it, or
150 				   2) The directly encoded character is in the
151 				   base64 set, or
152 				   3) The directly encoded character is SHIFT_OUT.
153 				 */
154 				if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT)))
155 				{
156 					TARGETCHECK;
157 					*target++ = SHIFT_OUT;
158 				}
159 				shifted = 0;
160 			}
161 		}
162 
163 		/* The character can be directly encoded as ASCII. */
164 		if (!needshift && !done)
165 		{
166 			TARGETCHECK;
167 			*target++ = (char) r;
168 		}
169 
170 	}
171 	while (!done);
172 
173     *sourceStart = source;
174     *targetStart = target;
175     return result;
176 }
177 
ConvertUTF7toUCS2(char ** sourceStart,char * sourceEnd,UCS2 ** targetStart,UCS2 * targetEnd)178 ConversionResult ConvertUTF7toUCS2(
179                 char** sourceStart, char* sourceEnd,
180                 UCS2** targetStart, UCS2* targetEnd)
181 {
182 	ConversionResult result = ok;
183 	DECLARE_BIT_BUFFER;
184 	int shifted = 0, first = 0, wroteone = 0, base64EOF, base64value, done;
185 	unsigned int c, prevc;
186 	unsigned long junk;
187 	register char *source = *sourceStart;
188 	register UCS2 *target = *targetStart;
189 
190 	if (needtables)
191 		tabinit();
192 
193 	do
194 	{
195 		/* read an ASCII character c */
196 		if (!(done = (source >= sourceEnd)))
197 			c = *source++;
198 		if (shifted)
199 		{
200 			/* We're done with a base64 string if we hit EOF, it's not a valid
201 			   ASCII character, or it's not in the base64 set.
202 			 */
203 			base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0;
204 			if (base64EOF)
205 			{
206 				shifted = 0;
207 				/* If the character causing us to drop out was SHIFT_IN or
208 				   SHIFT_OUT, it may be a special escape for SHIFT_IN. The
209 				   test for SHIFT_IN is not necessary, but allows an alternate
210 				   form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This
211 				   only works for some values of SHIFT_IN.
212 				 */
213 				if (!done && (c == SHIFT_IN || c == SHIFT_OUT))
214 				{
215 					/* get another character c */
216 					prevc = c;
217 					if (!(done = (source >= sourceEnd)))
218 						c = *source++;
219 					/* If no base64 characters were encountered, and the
220 					   character terminating the shift sequence was
221 					   SHIFT_OUT, then it's a special escape for SHIFT_IN.
222 					 */
223 					if (first && prevc == SHIFT_OUT)
224 					{
225 						/* write SHIFT_IN unicode */
226 						TARGETCHECK;
227 						*target++ = (UCS2)SHIFT_IN;
228 					}
229 					else if (!wroteone)
230 					{
231 						result = sourceCorrupt;
232 						/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
233 					}
234 				}
235 				else if (!wroteone)
236 				{
237 					result = sourceCorrupt;
238 					/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
239 				}
240 			}
241 			else
242 			{
243 				/* Add another 6 bits of base64 to the bit buffer. */
244 				WRITE_N_BITS(base64value, 6);
245 				first = 0;
246 			}
247 
248 			/* Extract as many full 16 bit characters as possible from the
249 			   bit buffer.
250 			 */
251 			while (BITS_IN_BUFFER >= 16 && (target < targetEnd))
252 			{
253 				/* write a unicode */
254 				*target++ = READ_N_BITS(16);
255 				wroteone = 1;
256 			}
257 
258 			if (BITS_IN_BUFFER >= 16)
259 				TARGETCHECK;
260 
261 			if (base64EOF)
262 			{
263 				junk = READ_N_BITS(BITS_IN_BUFFER);
264 				if (junk)
265 				{
266 					result = sourceCorrupt;
267 					/* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */;
268 				}
269 			}
270 		}
271 
272 		if (!shifted && !done)
273 		{
274 			if (c == SHIFT_IN)
275 			{
276 				shifted = 1;
277 				first = 1;
278 				wroteone = 0;
279 			}
280 			else
281 			{
282 				/* It must be a directly encoded character. */
283 				if (c > 0x7f)
284 				{
285 					result = sourceCorrupt;
286 					/* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */;
287 				}
288 				/* write a unicode */
289 				TARGETCHECK;
290 				*target++ = c;
291 			}
292 		}
293 	}
294 	while (!done);
295 
296     *sourceStart = source;
297     *targetStart = target;
298     return result;
299 }
300