1 /* winduni.c -- unicode support for the windres program.
2 Copyright (C) 1997-2016 Free Software Foundation, Inc.
3 Written by Ian Lance Taylor, Cygnus Support.
4 Rewritten by Kai Tietz, Onevision.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
30 all. */
31
32 #include "sysdep.h"
33 #include "bfd.h"
34 #include "libiberty.h" /* for xstrdup */
35 #include "bucomm.h"
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
38 #include <windows.h>
39 #include <winnls.h>
40 #endif
41 #include "winduni.h"
42 #include "safe-ctype.h"
43
44 #if HAVE_ICONV
45 #include <iconv.h>
46 #endif
47
48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50 static int unichar_isascii (const unichar *, rc_uint_type);
51
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
54
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
56
57 /* Codepages mapped. */
58 static local_iconv_map codepages[] =
59 {
60 { 0, "MS-ANSI" },
61 { 1, "WINDOWS-1252" },
62 { 437, "MS-ANSI" },
63 { 737, "MS-GREEK" },
64 { 775, "WINBALTRIM" },
65 { 850, "MS-ANSI" },
66 { 852, "MS-EE" },
67 { 857, "MS-TURK" },
68 { 862, "CP862" },
69 { 864, "CP864" },
70 { 866, "MS-CYRL" },
71 { 874, "WINDOWS-874" },
72 { 932, "CP932" },
73 { 936, "CP936" },
74 { 949, "CP949" },
75 { 950, "CP950" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
85 { CP_UTF7, "UTF-7" },
86 { CP_UTF8, "UTF-8" },
87 { CP_UTF16, "UTF-16LE" },
88 { (rc_uint_type) -1, NULL }
89 };
90
91 /* Languages supported. */
92 static const wind_language_t languages[] =
93 {
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL, NULL }
175 };
176
177 #endif
178
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage = CP_ACP;
182
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage = CP_ACP;
186
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
189
190 void
unicode_from_ascii(rc_uint_type * length,unichar ** unicode,const char * ascii)191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
192 {
193 unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
194 }
195
196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 copy it, expanding chars to shorts, rather than doing something intelligent.
198 This routine converts also \0 within a string. */
199
200 void
unicode_from_ascii_len(rc_uint_type * length,unichar ** unicode,const char * ascii,rc_uint_type a_length)201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
202 {
203 char *tmp, *p;
204 rc_uint_type tlen, elen, idx = 0;
205
206 *unicode = NULL;
207
208 if (!a_length)
209 {
210 if (length)
211 *length = 0;
212 return;
213 }
214
215 /* Make sure we have zero terminated string. */
216 p = tmp = (char *) xmalloc (a_length + 1);
217 memcpy (tmp, ascii, a_length);
218 tmp[a_length] = 0;
219
220 while (a_length > 0)
221 {
222 unichar *utmp, *up;
223
224 tlen = strlen (p);
225
226 if (tlen > a_length)
227 tlen = a_length;
228 if (*p == 0)
229 {
230 /* Make room for one more character. */
231 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232 if (idx > 0)
233 {
234 memcpy (utmp, *unicode, idx * sizeof (unichar));
235 }
236 *unicode = utmp;
237 utmp[idx++] = 0;
238 --a_length;
239 p++;
240 continue;
241 }
242 utmp = NULL;
243 elen = 0;
244 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245 if (elen)
246 {
247 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249 elen /= sizeof (unichar);
250 elen --;
251 }
252 else
253 {
254 /* Make room for one more character. */
255 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256 if (idx > 0)
257 {
258 memcpy (utmp, *unicode, idx * sizeof (unichar));
259 }
260 *unicode = utmp;
261 utmp[idx++] = ((unichar) *p) & 0xff;
262 --a_length;
263 p++;
264 continue;
265 }
266 p += tlen;
267 a_length -= tlen;
268
269 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270 if (idx > 0)
271 memcpy (up, *unicode, idx * sizeof (unichar));
272
273 *unicode = up;
274 if (elen)
275 memcpy (&up[idx], utmp, sizeof (unichar) * elen);
276
277 idx += elen;
278 }
279
280 if (length)
281 *length = idx;
282
283 free (tmp);
284 }
285
286 /* Convert an unicode string to an ASCII string. We just copy it,
287 shrink shorts to chars, rather than doing something intelligent.
288 Shorts with not within the char range are replaced by '_'. */
289
290 void
ascii_from_unicode(rc_uint_type * length,const unichar * unicode,char ** ascii)291 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
292 {
293 codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
294 }
295
296 /* Print the unicode string UNICODE to the file E. LENGTH is the
297 number of characters to print, or -1 if we should print until the
298 end of the string. FIXME: On a Windows host, we should be calling
299 some Windows function, probably WideCharToMultiByte. */
300
301 void
unicode_print(FILE * e,const unichar * unicode,rc_uint_type length)302 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
303 {
304 while (1)
305 {
306 unichar ch;
307
308 if (length == 0)
309 return;
310 if ((bfd_signed_vma) length > 0)
311 --length;
312
313 ch = *unicode;
314
315 if (ch == 0 && (bfd_signed_vma) length < 0)
316 return;
317
318 ++unicode;
319
320 if ((ch & 0x7f) == ch)
321 {
322 if (ch == '\\')
323 fputs ("\\\\", e);
324 else if (ch == '"')
325 fputs ("\"\"", e);
326 else if (ISPRINT (ch))
327 putc (ch, e);
328 else
329 {
330 switch (ch)
331 {
332 case ESCAPE_A:
333 fputs ("\\a", e);
334 break;
335
336 case ESCAPE_B:
337 fputs ("\\b", e);
338 break;
339
340 case ESCAPE_F:
341 fputs ("\\f", e);
342 break;
343
344 case ESCAPE_N:
345 fputs ("\\n", e);
346 break;
347
348 case ESCAPE_R:
349 fputs ("\\r", e);
350 break;
351
352 case ESCAPE_T:
353 fputs ("\\t", e);
354 break;
355
356 case ESCAPE_V:
357 fputs ("\\v", e);
358 break;
359
360 default:
361 fprintf (e, "\\%03o", (unsigned int) ch);
362 break;
363 }
364 }
365 }
366 else if ((ch & 0xff) == ch)
367 fprintf (e, "\\%03o", (unsigned int) ch);
368 else
369 fprintf (e, "\\x%04x", (unsigned int) ch);
370 }
371 }
372
373 /* Print a unicode string to a file. */
374
375 void
ascii_print(FILE * e,const char * s,rc_uint_type length)376 ascii_print (FILE *e, const char *s, rc_uint_type length)
377 {
378 while (1)
379 {
380 char ch;
381
382 if (length == 0)
383 return;
384 if ((bfd_signed_vma) length > 0)
385 --length;
386
387 ch = *s;
388
389 if (ch == 0 && (bfd_signed_vma) length < 0)
390 return;
391
392 ++s;
393
394 if ((ch & 0x7f) == ch)
395 {
396 if (ch == '\\')
397 fputs ("\\\\", e);
398 else if (ch == '"')
399 fputs ("\"\"", e);
400 else if (ISPRINT (ch))
401 putc (ch, e);
402 else
403 {
404 switch (ch)
405 {
406 case ESCAPE_A:
407 fputs ("\\a", e);
408 break;
409
410 case ESCAPE_B:
411 fputs ("\\b", e);
412 break;
413
414 case ESCAPE_F:
415 fputs ("\\f", e);
416 break;
417
418 case ESCAPE_N:
419 fputs ("\\n", e);
420 break;
421
422 case ESCAPE_R:
423 fputs ("\\r", e);
424 break;
425
426 case ESCAPE_T:
427 fputs ("\\t", e);
428 break;
429
430 case ESCAPE_V:
431 fputs ("\\v", e);
432 break;
433
434 default:
435 fprintf (e, "\\%03o", (unsigned int) ch);
436 break;
437 }
438 }
439 }
440 else
441 fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
442 }
443 }
444
445 rc_uint_type
unichar_len(const unichar * unicode)446 unichar_len (const unichar *unicode)
447 {
448 rc_uint_type r = 0;
449
450 if (unicode)
451 while (unicode[r] != 0)
452 r++;
453 else
454 --r;
455 return r;
456 }
457
458 unichar *
unichar_dup(const unichar * unicode)459 unichar_dup (const unichar *unicode)
460 {
461 unichar *r;
462 int len;
463
464 if (! unicode)
465 return NULL;
466 for (len = 0; unicode[len] != 0; ++len)
467 ;
468 ++len;
469 r = ((unichar *) res_alloc (len * sizeof (unichar)));
470 memcpy (r, unicode, len * sizeof (unichar));
471 return r;
472 }
473
474 unichar *
unichar_dup_uppercase(const unichar * u)475 unichar_dup_uppercase (const unichar *u)
476 {
477 unichar *r = unichar_dup (u);
478 int i;
479
480 if (! r)
481 return NULL;
482
483 for (i = 0; r[i] != 0; ++i)
484 {
485 if (r[i] >= 'a' && r[i] <= 'z')
486 r[i] &= 0xdf;
487 }
488 return r;
489 }
490
491 static int
unichar_isascii(const unichar * u,rc_uint_type len)492 unichar_isascii (const unichar *u, rc_uint_type len)
493 {
494 rc_uint_type i;
495
496 if ((bfd_signed_vma) len < 0)
497 {
498 if (u)
499 len = (rc_uint_type) unichar_len (u);
500 else
501 len = 0;
502 }
503
504 for (i = 0; i < len; i++)
505 if ((u[i] & 0xff80) != 0)
506 return 0;
507 return 1;
508 }
509
510 void
unicode_print_quoted(FILE * e,const unichar * u,rc_uint_type len)511 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
512 {
513 if (! unichar_isascii (u, len))
514 fputc ('L', e);
515 fputc ('"', e);
516 unicode_print (e, u, len);
517 fputc ('"', e);
518 }
519
520 int
unicode_is_valid_codepage(rc_uint_type cp)521 unicode_is_valid_codepage (rc_uint_type cp)
522 {
523 if ((cp & 0xffff) != cp)
524 return 0;
525 if (cp == CP_UTF16 || cp == CP_ACP)
526 return 1;
527
528 #if !defined (_WIN32) && !defined (__CYGWIN__)
529 if (! wind_find_codepage_info (cp))
530 return 0;
531 return 1;
532 #else
533 return !! IsValidCodePage ((UINT) cp);
534 #endif
535 }
536
537 #if defined (_WIN32) || defined (__CYGWIN__)
538
539 #define max_cp_string_len 6
540
541 static unsigned int
codepage_from_langid(unsigned short langid)542 codepage_from_langid (unsigned short langid)
543 {
544 char cp_string [max_cp_string_len];
545 int c;
546
547 memset (cp_string, 0, max_cp_string_len);
548 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549 but is unavailable on Win95. */
550 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
551 LOCALE_IDEFAULTANSICODEPAGE,
552 cp_string, max_cp_string_len);
553 /* If codepage data for an LCID is not installed on users's system,
554 GetLocaleInfo returns an empty string. Fall back to system ANSI
555 default. */
556 if (c == 0)
557 return CP_ACP;
558 return strtoul (cp_string, 0, 10);
559 }
560
561 static unsigned int
wincodepage_from_langid(unsigned short langid)562 wincodepage_from_langid (unsigned short langid)
563 {
564 char cp_string [max_cp_string_len];
565 int c;
566
567 memset (cp_string, 0, max_cp_string_len);
568 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569 but is unavailable on Win95. */
570 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
571 LOCALE_IDEFAULTCODEPAGE,
572 cp_string, max_cp_string_len);
573 /* If codepage data for an LCID is not installed on users's system,
574 GetLocaleInfo returns an empty string. Fall back to system ANSI
575 default. */
576 if (c == 0)
577 return CP_OEM;
578 return strtoul (cp_string, 0, 10);
579 }
580
581 static char *
lang_from_langid(unsigned short langid)582 lang_from_langid (unsigned short langid)
583 {
584 char cp_string[261];
585 int c;
586
587 memset (cp_string, 0, 261);
588 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
589 LOCALE_SENGLANGUAGE,
590 cp_string, 260);
591 /* If codepage data for an LCID is not installed on users's system,
592 GetLocaleInfo returns an empty string. Fall back to system ANSI
593 default. */
594 if (c == 0)
595 strcpy (cp_string, "Neutral");
596 return xstrdup (cp_string);
597 }
598
599 static char *
country_from_langid(unsigned short langid)600 country_from_langid (unsigned short langid)
601 {
602 char cp_string[261];
603 int c;
604
605 memset (cp_string, 0, 261);
606 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
607 LOCALE_SENGCOUNTRY,
608 cp_string, 260);
609 /* If codepage data for an LCID is not installed on users's system,
610 GetLocaleInfo returns an empty string. Fall back to system ANSI
611 default. */
612 if (c == 0)
613 strcpy (cp_string, "Neutral");
614 return xstrdup (cp_string);
615 }
616
617 #endif
618
619 const wind_language_t *
wind_find_language_by_id(unsigned id)620 wind_find_language_by_id (unsigned id)
621 {
622 #if !defined (_WIN32) && !defined (__CYGWIN__)
623 int i;
624
625 if (! id)
626 return NULL;
627 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
628 ;
629 if (languages[i].id == id)
630 return &languages[i];
631 return NULL;
632 #else
633 static wind_language_t wl;
634
635 wl.id = id;
636 wl.doscp = codepage_from_langid ((unsigned short) id);
637 wl.wincp = wincodepage_from_langid ((unsigned short) id);
638 wl.name = lang_from_langid ((unsigned short) id);
639 wl.country = country_from_langid ((unsigned short) id);
640
641 return & wl;
642 #endif
643 }
644
645 const local_iconv_map *
wind_find_codepage_info(unsigned cp)646 wind_find_codepage_info (unsigned cp)
647 {
648 #if !defined (_WIN32) && !defined (__CYGWIN__)
649 int i;
650
651 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
652 ;
653 if (codepages[i].codepage == (rc_uint_type) -1)
654 return NULL;
655 return &codepages[i];
656 #else
657 static local_iconv_map lim;
658 if (!unicode_is_valid_codepage (cp))
659 return NULL;
660 lim.codepage = cp;
661 lim.iconv_name = "";
662 return & lim;
663 #endif
664 }
665
666 /* Convert an Codepage string to a unicode string. */
667
668 void
unicode_from_codepage(rc_uint_type * length,unichar ** u,const char * src,rc_uint_type cp)669 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
670 {
671 rc_uint_type len;
672
673 len = wind_MultiByteToWideChar (cp, src, NULL, 0);
674 if (len)
675 {
676 *u = ((unichar *) res_alloc (len));
677 wind_MultiByteToWideChar (cp, src, *u, len);
678 }
679 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
680 this will set *length to -1. */
681 len -= sizeof (unichar);
682
683 if (length != NULL)
684 *length = len / sizeof (unichar);
685 }
686
687 /* Convert an unicode string to an codepage string. */
688
689 void
codepage_from_unicode(rc_uint_type * length,const unichar * unicode,char ** ascii,rc_uint_type cp)690 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
691 {
692 rc_uint_type len;
693
694 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
695 if (len)
696 {
697 *ascii = (char *) res_alloc (len * sizeof (char));
698 wind_WideCharToMultiByte (cp, unicode, *ascii, len);
699 }
700 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
701 this will set *length to -1. */
702 len--;
703
704 if (length != NULL)
705 *length = len;
706 }
707
708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709 static int
iconv_onechar(iconv_t cd,ICONV_CONST char * s,char * d,int d_len,const char ** n_s,char ** n_d)710 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
711 {
712 int i;
713
714 for (i = 1; i <= 32; i++)
715 {
716 char *tmp_d = d;
717 ICONV_CONST char *tmp_s = s;
718 size_t ret;
719 size_t s_left = (size_t) i;
720 size_t d_left = (size_t) d_len;
721
722 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
723
724 if (ret != (size_t) -1)
725 {
726 *n_s = tmp_s;
727 *n_d = tmp_d;
728 return 0;
729 }
730 }
731
732 return 1;
733 }
734
735 static const char *
wind_iconv_cp(rc_uint_type cp)736 wind_iconv_cp (rc_uint_type cp)
737 {
738 const local_iconv_map *lim = wind_find_codepage_info (cp);
739
740 if (!lim)
741 return NULL;
742 return lim->iconv_name;
743 }
744 #endif /* HAVE_ICONV */
745
746 static rc_uint_type
wind_MultiByteToWideChar(rc_uint_type cp,const char * mb,unichar * u,rc_uint_type u_len)747 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
748 unichar *u, rc_uint_type u_len)
749 {
750 rc_uint_type ret = 0;
751
752 #if defined (_WIN32) || defined (__CYGWIN__)
753 rc_uint_type conv_flags = MB_PRECOMPOSED;
754
755 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
756 MultiByteToWideChar will set the last error to
757 ERROR_INVALID_FLAGS if we do. */
758 if (cp == CP_UTF8 || cp == CP_UTF7)
759 conv_flags = 0;
760
761 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
762 mb, -1, u, u_len);
763 /* Convert to bytes. */
764 ret *= sizeof (unichar);
765
766 #elif defined (HAVE_ICONV)
767 int first = 1;
768 char tmp[32];
769 char *p_tmp;
770 const char *iconv_name = wind_iconv_cp (cp);
771
772 if (!mb || !iconv_name)
773 return 0;
774 iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
775
776 while (1)
777 {
778 int iret;
779 const char *n_mb = "";
780 char *n_tmp = "";
781
782 p_tmp = tmp;
783 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
784 if (first)
785 {
786 first = 0;
787 continue;
788 }
789 if (!iret)
790 {
791 size_t l_tmp = (size_t) (n_tmp - p_tmp);
792
793 if (u)
794 {
795 if ((size_t) u_len < l_tmp)
796 break;
797 memcpy (u, tmp, l_tmp);
798 u += l_tmp/2;
799 u_len -= l_tmp;
800 }
801 ret += l_tmp;
802 }
803 else
804 break;
805 if (tmp[0] == 0 && tmp[1] == 0)
806 break;
807 mb = n_mb;
808 }
809 iconv_close (cd);
810 #else
811 if (cp)
812 ret = 0;
813 ret = strlen (mb) + 1;
814 ret *= sizeof (unichar);
815 if (u != NULL && u_len != 0)
816 {
817 do
818 {
819 *u++ = ((unichar) *mb) & 0xff;
820 --u_len; mb++;
821 }
822 while (u_len != 0 && mb[-1] != 0);
823 }
824 if (u != NULL && u_len != 0)
825 *u = 0;
826 #endif
827 return ret;
828 }
829
830 static rc_uint_type
wind_WideCharToMultiByte(rc_uint_type cp,const unichar * u,char * mb,rc_uint_type mb_len)831 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
832 {
833 rc_uint_type ret = 0;
834 #if defined (_WIN32) || defined (__CYGWIN__)
835 WINBOOL used_def = FALSE;
836
837 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
838 NULL, & used_def);
839 #elif defined (HAVE_ICONV)
840 int first = 1;
841 char tmp[32];
842 char *p_tmp;
843 const char *iconv_name = wind_iconv_cp (cp);
844
845 if (!u || !iconv_name)
846 return 0;
847 iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
848
849 while (1)
850 {
851 int iret;
852 const char *n_u = "";
853 char *n_tmp = "";
854
855 p_tmp = tmp;
856 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
857 if (first)
858 {
859 first = 0;
860 continue;
861 }
862 if (!iret)
863 {
864 size_t l_tmp = (size_t) (n_tmp - p_tmp);
865
866 if (mb)
867 {
868 if ((size_t) mb_len < l_tmp)
869 break;
870 memcpy (mb, tmp, l_tmp);
871 mb += l_tmp;
872 mb_len -= l_tmp;
873 }
874 ret += l_tmp;
875 }
876 else
877 break;
878 if (u[0] == 0)
879 break;
880 u = (const unichar *) n_u;
881 }
882 iconv_close (cd);
883 #else
884 if (cp)
885 ret = 0;
886
887 while (u[ret] != 0)
888 ++ret;
889
890 ++ret;
891
892 if (mb)
893 {
894 while (*u != 0 && mb_len != 0)
895 {
896 if (u[0] == (u[0] & 0x7f))
897 *mb++ = (char) u[0];
898 else
899 *mb++ = '_';
900 ++u; --mb_len;
901 }
902 if (mb_len != 0)
903 *mb = 0;
904 }
905 #endif
906 return ret;
907 }
908