1 /*
2  * Internationalization test for CUPS.
3  *
4  * Copyright 2007-2014 by Apple Inc.
5  * Copyright 1997-2006 by Easy Software Products.
6  *
7  * These coded instructions, statements, and computer programs are the
8  * property of Apple Inc. and are protected by Federal copyright
9  * law.  Distribution and use rights are outlined in the file "LICENSE.txt"
10  * which should have been included with this file.  If this file is
11  * missing or damaged, see the license at "http://www.cups.org/".
12  *
13  * This file is subject to the Apple OS-Developed Software exception.
14  */
15 
16 /*
17  * Include necessary headers...
18  */
19 
20 #include "string-private.h"
21 #include "language-private.h"
22 #include <stdlib.h>
23 #include <time.h>
24 #include <unistd.h>
25 
26 
27 /*
28  * Local globals...
29  */
30 
31 static const char * const lang_encodings[] =
32 			{		/* Encoding strings */
33 			  "us-ascii",		"iso-8859-1",
34 			  "iso-8859-2",		"iso-8859-3",
35 			  "iso-8859-4",		"iso-8859-5",
36 			  "iso-8859-6",		"iso-8859-7",
37 			  "iso-8859-8",		"iso-8859-9",
38 			  "iso-8859-10",	"utf-8",
39 			  "iso-8859-13",	"iso-8859-14",
40 			  "iso-8859-15",	"windows-874",
41 			  "windows-1250",	"windows-1251",
42 			  "windows-1252",	"windows-1253",
43 			  "windows-1254",	"windows-1255",
44 			  "windows-1256",	"windows-1257",
45 			  "windows-1258",	"koi8-r",
46 			  "koi8-u",		"iso-8859-11",
47 			  "iso-8859-16",	"mac-roman",
48 			  "unknown",		"unknown",
49 			  "unknown",		"unknown",
50 			  "unknown",		"unknown",
51 			  "unknown",		"unknown",
52 			  "unknown",		"unknown",
53 			  "unknown",		"unknown",
54 			  "unknown",		"unknown",
55 			  "unknown",		"unknown",
56 			  "unknown",		"unknown",
57 			  "unknown",		"unknown",
58 			  "unknown",		"unknown",
59 			  "unknown",		"unknown",
60 			  "unknown",		"unknown",
61 			  "unknown",		"unknown",
62 			  "unknown",		"unknown",
63 			  "unknown",		"unknown",
64 			  "unknown",		"unknown",
65 			  "windows-932",	"windows-936",
66 			  "windows-949",	"windows-950",
67 			  "windows-1361",	"unknown",
68 			  "unknown",		"unknown",
69 			  "unknown",		"unknown",
70 			  "unknown",		"unknown",
71 			  "unknown",		"unknown",
72 			  "unknown",		"unknown",
73 			  "unknown",		"unknown",
74 			  "unknown",		"unknown",
75 			  "unknown",		"unknown",
76 			  "unknown",		"unknown",
77 			  "unknown",		"unknown",
78 			  "unknown",		"unknown",
79 			  "unknown",		"unknown",
80 			  "unknown",		"unknown",
81 			  "unknown",		"unknown",
82 			  "unknown",		"unknown",
83 			  "unknown",		"unknown",
84 			  "unknown",		"unknown",
85 			  "unknown",		"unknown",
86 			  "unknown",		"unknown",
87 			  "unknown",		"unknown",
88 			  "unknown",		"unknown",
89 			  "unknown",		"unknown",
90 			  "unknown",		"unknown",
91 			  "unknown",		"unknown",
92 			  "unknown",		"unknown",
93 			  "unknown",		"unknown",
94 			  "unknown",		"unknown",
95 			  "unknown",		"unknown",
96 			  "unknown",		"unknown",
97 			  "euc-cn",		"euc-jp",
98 			  "euc-kr",		"euc-tw",
99 			  "jis-x0213"
100 			};
101 
102 
103 /*
104  * Local functions...
105  */
106 
107 static void	print_utf8(const char *msg, const cups_utf8_t *src);
108 
109 
110 /*
111  * 'main()' - Main entry for internationalization test module.
112  */
113 
114 int					/* O - Exit code */
main(int argc,char * argv[])115 main(int  argc,				/* I - Argument Count */
116      char *argv[])			/* I - Arguments */
117 {
118   FILE		*fp;			/* File pointer */
119   int		count;			/* File line counter */
120   int		status,			/* Status of current test */
121 		errors;			/* Error count */
122   char		line[1024];		/* File line source string */
123   int		len;			/* Length (count) of string */
124   char		legsrc[1024],		/* Legacy source string */
125 		legdest[1024],		/* Legacy destination string */
126 		*legptr;		/* Pointer into legacy string */
127   cups_utf8_t	utf8latin[] =		/* UTF-8 Latin-1 source */
128     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
129     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
130   cups_utf8_t	utf8repla[] =		/* UTF-8 Latin-1 replacement */
131     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
132     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
133   cups_utf8_t	utf8greek[] =		/* UTF-8 Greek source string */
134     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
135     /* "A != <ALPHA>." - use ISO 8859-7 */
136   cups_utf8_t	utf8japan[] =		/* UTF-8 Japanese source */
137     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
138     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
139   cups_utf8_t	utf8taiwan[] =		/* UTF-8 Chinese source */
140     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
141     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
142   cups_utf8_t	utf8dest[1024];		/* UTF-8 destination string */
143   cups_utf32_t	utf32dest[1024];	/* UTF-32 destination string */
144 
145 
146   if (argc > 1)
147   {
148     int			i;		/* Looping var */
149     cups_encoding_t	encoding;	/* Source encoding */
150 
151 
152     if (argc != 3)
153     {
154       puts("Usage: ./testi18n [filename charset]");
155       return (1);
156     }
157 
158     if ((fp = fopen(argv[1], "rb")) == NULL)
159     {
160       perror(argv[1]);
161       return (1);
162     }
163 
164     for (i = 0, encoding = CUPS_AUTO_ENCODING;
165          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
166 	 i ++)
167       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
168       {
169         encoding = (cups_encoding_t)i;
170 	break;
171       }
172 
173     if (encoding == CUPS_AUTO_ENCODING)
174     {
175       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
176       return (1);
177     }
178 
179     while (fgets(line, sizeof(line), fp))
180     {
181       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
182       {
183         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
184 	return (1);
185       }
186 
187       fputs((char *)utf8dest, stdout);
188     }
189 
190     fclose(fp);
191     return (0);
192   }
193 
194  /*
195   * Start with some conversion tests from a UTF-8 test file.
196   */
197 
198   errors = 0;
199 
200   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
201   {
202     perror("utf8demo.txt");
203     return (1);
204   }
205 
206  /*
207   * cupsUTF8ToUTF32
208   */
209 
210   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
211 
212   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
213   {
214     count ++;
215 
216     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
217     {
218       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
219       errors ++;
220       status = 1;
221       break;
222     }
223   }
224 
225   if (!status)
226     puts("PASS");
227 
228  /*
229   * cupsUTF8ToCharset(CUPS_EUC_JP)
230   */
231 
232   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
233 
234   rewind(fp);
235 
236   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
237   {
238     count ++;
239 
240     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
241     if (len < 0)
242     {
243       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
244       errors ++;
245       status = 1;
246       break;
247     }
248   }
249 
250   if (!status)
251     puts("PASS");
252 
253   fclose(fp);
254 
255  /*
256   * Test UTF-8 to legacy charset (ISO 8859-1)...
257   */
258 
259   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
260 
261   legdest[0] = 0;
262 
263   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
264   if (len < 0)
265   {
266     printf("FAIL (len=%d)\n", len);
267     errors ++;
268   }
269   else
270     puts("PASS");
271 
272  /*
273   * cupsCharsetToUTF8
274   */
275 
276   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
277 
278   strlcpy(legsrc, legdest, sizeof(legsrc));
279 
280   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
281   if ((size_t)len != strlen((char *)utf8latin))
282   {
283     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
284     print_utf8("    utf8latin", utf8latin);
285     print_utf8("    utf8dest", utf8dest);
286     errors ++;
287   }
288   else if (memcmp(utf8latin, utf8dest, (size_t)len))
289   {
290     puts("FAIL (results do not match)");
291     print_utf8("    utf8latin", utf8latin);
292     print_utf8("    utf8dest", utf8dest);
293     errors ++;
294   }
295   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
296   {
297     puts("FAIL (replacement characters do not work!)");
298     errors ++;
299   }
300   else
301     puts("PASS");
302 
303  /*
304   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
305   */
306 
307   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
308 
309   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
310   {
311     puts("FAIL");
312     errors ++;
313   }
314   else
315   {
316     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
317 
318     if (*legptr)
319     {
320       puts("FAIL (unknown character)");
321       errors ++;
322     }
323     else
324       puts("PASS");
325   }
326 
327   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
328 
329   strlcpy(legsrc, legdest, sizeof(legsrc));
330 
331   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
332   if ((size_t)len != strlen((char *)utf8greek))
333   {
334     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
335     print_utf8("    utf8greek", utf8greek);
336     print_utf8("    utf8dest", utf8dest);
337     errors ++;
338   }
339   else if (memcmp(utf8greek, utf8dest, (size_t)len))
340   {
341     puts("FAIL (results do not match)");
342     print_utf8("    utf8greek", utf8greek);
343     print_utf8("    utf8dest", utf8dest);
344     errors ++;
345   }
346   else
347     puts("PASS");
348 
349  /*
350   * Test UTF-8 to/from legacy charset (Windows 932)...
351   */
352 
353   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
354 
355   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
356   {
357     puts("FAIL");
358     errors ++;
359   }
360   else
361   {
362     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
363 
364     if (*legptr)
365     {
366       puts("FAIL (unknown character)");
367       errors ++;
368     }
369     else
370       puts("PASS");
371   }
372 
373   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
374 
375   strlcpy(legsrc, legdest, sizeof(legsrc));
376 
377   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
378   if ((size_t)len != strlen((char *)utf8japan))
379   {
380     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
381     print_utf8("    utf8japan", utf8japan);
382     print_utf8("    utf8dest", utf8dest);
383     errors ++;
384   }
385   else if (memcmp(utf8japan, utf8dest, (size_t)len))
386   {
387     puts("FAIL (results do not match)");
388     print_utf8("    utf8japan", utf8japan);
389     print_utf8("    utf8dest", utf8dest);
390     errors ++;
391   }
392   else
393     puts("PASS");
394 
395  /*
396   * Test UTF-8 to/from legacy charset (EUC-JP)...
397   */
398 
399   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
400 
401   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
402   {
403     puts("FAIL");
404     errors ++;
405   }
406   else
407   {
408     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
409 
410     if (*legptr)
411     {
412       puts("FAIL (unknown character)");
413       errors ++;
414     }
415     else
416       puts("PASS");
417   }
418 
419 #ifndef __linux
420   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
421 
422   strlcpy(legsrc, legdest, sizeof(legsrc));
423 
424   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
425   if ((size_t)len != strlen((char *)utf8japan))
426   {
427     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
428     print_utf8("    utf8japan", utf8japan);
429     print_utf8("    utf8dest", utf8dest);
430     errors ++;
431   }
432   else if (memcmp(utf8japan, utf8dest, (size_t)len))
433   {
434     puts("FAIL (results do not match)");
435     print_utf8("    utf8japan", utf8japan);
436     print_utf8("    utf8dest", utf8dest);
437     errors ++;
438   }
439   else
440     puts("PASS");
441 #endif /* !__linux */
442 
443  /*
444   * Test UTF-8 to/from legacy charset (Windows 950)...
445   */
446 
447   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
448 
449   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
450   {
451     puts("FAIL");
452     errors ++;
453   }
454   else
455   {
456     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
457 
458     if (*legptr)
459     {
460       puts("FAIL (unknown character)");
461       errors ++;
462     }
463     else
464       puts("PASS");
465   }
466 
467   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
468 
469   strlcpy(legsrc, legdest, sizeof(legsrc));
470 
471   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
472   if ((size_t)len != strlen((char *)utf8taiwan))
473   {
474     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
475     print_utf8("    utf8taiwan", utf8taiwan);
476     print_utf8("    utf8dest", utf8dest);
477     errors ++;
478   }
479   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
480   {
481     puts("FAIL (results do not match)");
482     print_utf8("    utf8taiwan", utf8taiwan);
483     print_utf8("    utf8dest", utf8dest);
484     errors ++;
485   }
486   else
487     puts("PASS");
488 
489  /*
490   * Test UTF-8 to/from legacy charset (EUC-TW)...
491   */
492 
493   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
494 
495   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
496   {
497     puts("FAIL");
498     errors ++;
499   }
500   else
501   {
502     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
503 
504     if (*legptr)
505     {
506       puts("FAIL (unknown character)");
507       errors ++;
508     }
509     else
510       puts("PASS");
511   }
512 
513   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
514 
515   strlcpy(legsrc, legdest, sizeof(legsrc));
516 
517   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
518   if ((size_t)len != strlen((char *)utf8taiwan))
519   {
520     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
521     print_utf8("    utf8taiwan", utf8taiwan);
522     print_utf8("    utf8dest", utf8dest);
523     errors ++;
524   }
525   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
526   {
527     puts("FAIL (results do not match)");
528     print_utf8("    utf8taiwan", utf8taiwan);
529     print_utf8("    utf8dest", utf8dest);
530     errors ++;
531   }
532   else
533     puts("PASS");
534 
535 #if 0
536  /*
537   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
538   */
539   if (verbose)
540     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
541   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
542   if (len < 0)
543     return (1);
544   if (verbose)
545   {
546     print_utf8(" utf8good ", utf8good);
547     print_utf32(" utf32dest", utf32dest);
548   }
549   memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
550   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
551   if (len < 0)
552     return (1);
553   if (len != strlen ((char *) utf8good))
554     return (1);
555   if (memcmp(utf8good, utf8dest, len) != 0)
556     return (1);
557 
558  /*
559   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
560   */
561   if (verbose)
562     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
563   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
564   if (len >= 0)
565     return (1);
566   if (verbose)
567     print_utf8(" utf8bad  ", utf8bad);
568 
569  /*
570   * Test _cupsCharmapFlush()...
571   */
572   if (verbose)
573     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
574   _cupsCharmapFlush();
575   return (0);
576 #endif /* 0 */
577 
578   return (errors > 0);
579 }
580 
581 
582 /*
583  * 'print_utf8()' - Print UTF-8 string with (optional) message.
584  */
585 
586 static void
print_utf8(const char * msg,const cups_utf8_t * src)587 print_utf8(const char	     *msg,	/* I - Message String */
588 	   const cups_utf8_t *src)	/* I - UTF-8 Source String */
589 {
590   const char	*prefix;		/* Prefix string */
591 
592 
593   if (msg)
594     printf("%s:", msg);
595 
596   for (prefix = " "; *src; src ++)
597   {
598     printf("%s%02x", prefix, *src);
599 
600     if ((src[0] & 0x80) && (src[1] & 0x80))
601       prefix = "";
602     else
603       prefix = " ";
604   }
605 
606   putchar('\n');
607 }
608