1 /* winduni.c -- unicode support for the windres program.
2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
3 Written by Ian Lance Taylor, Cygnus Support.
4 Rewritten by Kai Tietz, Onevision.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
30 all. */
31
32 #include "sysdep.h"
33 #include "bfd.h"
34 #include "libiberty.h" /* for xstrdup */
35 #include "bucomm.h"
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
38 #include <windows.h>
39 #include <winnls.h>
40 #endif
41 #include "winduni.h"
42 #include "safe-ctype.h"
43
44 #if HAVE_ICONV
45 #include <iconv.h>
46 #endif
47
48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50 static int unichar_isascii (const unichar *, rc_uint_type);
51
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
54
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
56
57 /* Codepages mapped. */
58 static local_iconv_map codepages[] =
59 {
60 { 0, "MS-ANSI" },
61 { 1, "WINDOWS-1252" },
62 { 437, "MS-ANSI" },
63 { 737, "MS-GREEK" },
64 { 775, "WINBALTRIM" },
65 { 850, "MS-ANSI" },
66 { 852, "MS-EE" },
67 { 857, "MS-TURK" },
68 { 862, "CP862" },
69 { 864, "CP864" },
70 { 866, "MS-CYRL" },
71 { 874, "WINDOWS-874" },
72 { 932, "CP932" },
73 { 936, "CP936" },
74 { 949, "CP949" },
75 { 950, "CP950" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
85 { CP_UTF7, "UTF-7" },
86 { CP_UTF8, "UTF-8" },
87 { CP_UTF16, "UTF-16LE" },
88 { (rc_uint_type) -1, NULL }
89 };
90
91 /* Languages supported. */
92 static const wind_language_t languages[] =
93 {
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL, NULL }
175 };
176
177 #endif
178
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage = CP_ACP;
182
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage = CP_ACP;
186
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
189
190 void
unicode_from_ascii(rc_uint_type * length,unichar ** unicode,const char * ascii)191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
192 {
193 unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
194 }
195
196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 copy it, expanding chars to shorts, rather than doing something intelligent.
198 This routine converts also \0 within a string. */
199
200 void
unicode_from_ascii_len(rc_uint_type * length,unichar ** unicode,const char * ascii,rc_uint_type a_length)201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
202 {
203 char *tmp, *p;
204 rc_uint_type tlen, elen, idx = 0;
205
206 *unicode = NULL;
207
208 if (!a_length)
209 {
210 if (length)
211 *length = 0;
212 return;
213 }
214
215 /* Make sure we have zero terminated string. */
216 p = tmp = (char *) alloca (a_length + 1);
217 memcpy (tmp, ascii, a_length);
218 tmp[a_length] = 0;
219
220 while (a_length > 0)
221 {
222 unichar *utmp, *up;
223
224 tlen = strlen (p);
225
226 if (tlen > a_length)
227 tlen = a_length;
228 if (*p == 0)
229 {
230 /* Make room for one more character. */
231 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232 if (idx > 0)
233 {
234 memcpy (utmp, *unicode, idx * sizeof (unichar));
235 }
236 *unicode = utmp;
237 utmp[idx++] = 0;
238 --a_length;
239 p++;
240 continue;
241 }
242 utmp = NULL;
243 elen = 0;
244 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245 if (elen)
246 {
247 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249 elen /= sizeof (unichar);
250 elen --;
251 }
252 else
253 {
254 /* Make room for one more character. */
255 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256 if (idx > 0)
257 {
258 memcpy (utmp, *unicode, idx * sizeof (unichar));
259 }
260 *unicode = utmp;
261 utmp[idx++] = ((unichar) *p) & 0xff;
262 --a_length;
263 p++;
264 continue;
265 }
266 p += tlen;
267 a_length -= tlen;
268
269 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270 if (idx > 0)
271 memcpy (up, *unicode, idx * sizeof (unichar));
272
273 *unicode = up;
274 if (elen)
275 memcpy (&up[idx], utmp, sizeof (unichar) * elen);
276
277 idx += elen;
278 }
279
280 if (length)
281 *length = idx;
282 }
283
284 /* Convert an unicode string to an ASCII string. We just copy it,
285 shrink shorts to chars, rather than doing something intelligent.
286 Shorts with not within the char range are replaced by '_'. */
287
288 void
ascii_from_unicode(rc_uint_type * length,const unichar * unicode,char ** ascii)289 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
290 {
291 codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
292 }
293
294 /* Print the unicode string UNICODE to the file E. LENGTH is the
295 number of characters to print, or -1 if we should print until the
296 end of the string. FIXME: On a Windows host, we should be calling
297 some Windows function, probably WideCharToMultiByte. */
298
299 void
unicode_print(FILE * e,const unichar * unicode,rc_uint_type length)300 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
301 {
302 while (1)
303 {
304 unichar ch;
305
306 if (length == 0)
307 return;
308 if ((bfd_signed_vma) length > 0)
309 --length;
310
311 ch = *unicode;
312
313 if (ch == 0 && (bfd_signed_vma) length < 0)
314 return;
315
316 ++unicode;
317
318 if ((ch & 0x7f) == ch)
319 {
320 if (ch == '\\')
321 fputs ("\\\\", e);
322 else if (ch == '"')
323 fputs ("\"\"", e);
324 else if (ISPRINT (ch))
325 putc (ch, e);
326 else
327 {
328 switch (ch)
329 {
330 case ESCAPE_A:
331 fputs ("\\a", e);
332 break;
333
334 case ESCAPE_B:
335 fputs ("\\b", e);
336 break;
337
338 case ESCAPE_F:
339 fputs ("\\f", e);
340 break;
341
342 case ESCAPE_N:
343 fputs ("\\n", e);
344 break;
345
346 case ESCAPE_R:
347 fputs ("\\r", e);
348 break;
349
350 case ESCAPE_T:
351 fputs ("\\t", e);
352 break;
353
354 case ESCAPE_V:
355 fputs ("\\v", e);
356 break;
357
358 default:
359 fprintf (e, "\\%03o", (unsigned int) ch);
360 break;
361 }
362 }
363 }
364 else if ((ch & 0xff) == ch)
365 fprintf (e, "\\%03o", (unsigned int) ch);
366 else
367 fprintf (e, "\\x%04x", (unsigned int) ch);
368 }
369 }
370
371 /* Print a unicode string to a file. */
372
373 void
ascii_print(FILE * e,const char * s,rc_uint_type length)374 ascii_print (FILE *e, const char *s, rc_uint_type length)
375 {
376 while (1)
377 {
378 char ch;
379
380 if (length == 0)
381 return;
382 if ((bfd_signed_vma) length > 0)
383 --length;
384
385 ch = *s;
386
387 if (ch == 0 && (bfd_signed_vma) length < 0)
388 return;
389
390 ++s;
391
392 if ((ch & 0x7f) == ch)
393 {
394 if (ch == '\\')
395 fputs ("\\\\", e);
396 else if (ch == '"')
397 fputs ("\"\"", e);
398 else if (ISPRINT (ch))
399 putc (ch, e);
400 else
401 {
402 switch (ch)
403 {
404 case ESCAPE_A:
405 fputs ("\\a", e);
406 break;
407
408 case ESCAPE_B:
409 fputs ("\\b", e);
410 break;
411
412 case ESCAPE_F:
413 fputs ("\\f", e);
414 break;
415
416 case ESCAPE_N:
417 fputs ("\\n", e);
418 break;
419
420 case ESCAPE_R:
421 fputs ("\\r", e);
422 break;
423
424 case ESCAPE_T:
425 fputs ("\\t", e);
426 break;
427
428 case ESCAPE_V:
429 fputs ("\\v", e);
430 break;
431
432 default:
433 fprintf (e, "\\%03o", (unsigned int) ch);
434 break;
435 }
436 }
437 }
438 else
439 fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
440 }
441 }
442
443 rc_uint_type
unichar_len(const unichar * unicode)444 unichar_len (const unichar *unicode)
445 {
446 rc_uint_type r = 0;
447
448 if (unicode)
449 while (unicode[r] != 0)
450 r++;
451 else
452 --r;
453 return r;
454 }
455
456 unichar *
unichar_dup(const unichar * unicode)457 unichar_dup (const unichar *unicode)
458 {
459 unichar *r;
460 int len;
461
462 if (! unicode)
463 return NULL;
464 for (len = 0; unicode[len] != 0; ++len)
465 ;
466 ++len;
467 r = ((unichar *) res_alloc (len * sizeof (unichar)));
468 memcpy (r, unicode, len * sizeof (unichar));
469 return r;
470 }
471
472 unichar *
unichar_dup_uppercase(const unichar * u)473 unichar_dup_uppercase (const unichar *u)
474 {
475 unichar *r = unichar_dup (u);
476 int i;
477
478 if (! r)
479 return NULL;
480
481 for (i = 0; r[i] != 0; ++i)
482 {
483 if (r[i] >= 'a' && r[i] <= 'z')
484 r[i] &= 0xdf;
485 }
486 return r;
487 }
488
489 static int
unichar_isascii(const unichar * u,rc_uint_type len)490 unichar_isascii (const unichar *u, rc_uint_type len)
491 {
492 rc_uint_type i;
493
494 if ((bfd_signed_vma) len < 0)
495 {
496 if (u)
497 len = (rc_uint_type) unichar_len (u);
498 else
499 len = 0;
500 }
501
502 for (i = 0; i < len; i++)
503 if ((u[i] & 0xff80) != 0)
504 return 0;
505 return 1;
506 }
507
508 void
unicode_print_quoted(FILE * e,const unichar * u,rc_uint_type len)509 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
510 {
511 if (! unichar_isascii (u, len))
512 fputc ('L', e);
513 fputc ('"', e);
514 unicode_print (e, u, len);
515 fputc ('"', e);
516 }
517
518 int
unicode_is_valid_codepage(rc_uint_type cp)519 unicode_is_valid_codepage (rc_uint_type cp)
520 {
521 if ((cp & 0xffff) != cp)
522 return 0;
523 if (cp == CP_UTF16 || cp == CP_ACP)
524 return 1;
525
526 #if !defined (_WIN32) && !defined (__CYGWIN__)
527 if (! wind_find_codepage_info (cp))
528 return 0;
529 return 1;
530 #else
531 return !! IsValidCodePage ((UINT) cp);
532 #endif
533 }
534
535 #if defined (_WIN32) || defined (__CYGWIN__)
536
537 #define max_cp_string_len 6
538
539 static unsigned int
codepage_from_langid(unsigned short langid)540 codepage_from_langid (unsigned short langid)
541 {
542 char cp_string [max_cp_string_len];
543 int c;
544
545 memset (cp_string, 0, max_cp_string_len);
546 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
547 but is unavailable on Win95. */
548 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
549 LOCALE_IDEFAULTANSICODEPAGE,
550 cp_string, max_cp_string_len);
551 /* If codepage data for an LCID is not installed on users's system,
552 GetLocaleInfo returns an empty string. Fall back to system ANSI
553 default. */
554 if (c == 0)
555 return CP_ACP;
556 return strtoul (cp_string, 0, 10);
557 }
558
559 static unsigned int
wincodepage_from_langid(unsigned short langid)560 wincodepage_from_langid (unsigned short langid)
561 {
562 char cp_string [max_cp_string_len];
563 int c;
564
565 memset (cp_string, 0, max_cp_string_len);
566 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
567 but is unavailable on Win95. */
568 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
569 LOCALE_IDEFAULTCODEPAGE,
570 cp_string, max_cp_string_len);
571 /* If codepage data for an LCID is not installed on users's system,
572 GetLocaleInfo returns an empty string. Fall back to system ANSI
573 default. */
574 if (c == 0)
575 return CP_OEM;
576 return strtoul (cp_string, 0, 10);
577 }
578
579 static char *
lang_from_langid(unsigned short langid)580 lang_from_langid (unsigned short langid)
581 {
582 char cp_string[261];
583 int c;
584
585 memset (cp_string, 0, 261);
586 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
587 LOCALE_SENGLANGUAGE,
588 cp_string, 260);
589 /* If codepage data for an LCID is not installed on users's system,
590 GetLocaleInfo returns an empty string. Fall back to system ANSI
591 default. */
592 if (c == 0)
593 strcpy (cp_string, "Neutral");
594 return xstrdup (cp_string);
595 }
596
597 static char *
country_from_langid(unsigned short langid)598 country_from_langid (unsigned short langid)
599 {
600 char cp_string[261];
601 int c;
602
603 memset (cp_string, 0, 261);
604 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
605 LOCALE_SENGCOUNTRY,
606 cp_string, 260);
607 /* If codepage data for an LCID is not installed on users's system,
608 GetLocaleInfo returns an empty string. Fall back to system ANSI
609 default. */
610 if (c == 0)
611 strcpy (cp_string, "Neutral");
612 return xstrdup (cp_string);
613 }
614
615 #endif
616
617 const wind_language_t *
wind_find_language_by_id(unsigned id)618 wind_find_language_by_id (unsigned id)
619 {
620 #if !defined (_WIN32) && !defined (__CYGWIN__)
621 int i;
622
623 if (! id)
624 return NULL;
625 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
626 ;
627 if (languages[i].id == id)
628 return &languages[i];
629 return NULL;
630 #else
631 static wind_language_t wl;
632
633 wl.id = id;
634 wl.doscp = codepage_from_langid ((unsigned short) id);
635 wl.wincp = wincodepage_from_langid ((unsigned short) id);
636 wl.name = lang_from_langid ((unsigned short) id);
637 wl.country = country_from_langid ((unsigned short) id);
638
639 return & wl;
640 #endif
641 }
642
643 const local_iconv_map *
wind_find_codepage_info(unsigned cp)644 wind_find_codepage_info (unsigned cp)
645 {
646 #if !defined (_WIN32) && !defined (__CYGWIN__)
647 int i;
648
649 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
650 ;
651 if (codepages[i].codepage == (rc_uint_type) -1)
652 return NULL;
653 return &codepages[i];
654 #else
655 static local_iconv_map lim;
656 if (!unicode_is_valid_codepage (cp))
657 return NULL;
658 lim.codepage = cp;
659 lim.iconv_name = "";
660 return & lim;
661 #endif
662 }
663
664 /* Convert an Codepage string to a unicode string. */
665
666 void
unicode_from_codepage(rc_uint_type * length,unichar ** u,const char * src,rc_uint_type cp)667 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
668 {
669 rc_uint_type len;
670
671 len = wind_MultiByteToWideChar (cp, src, NULL, 0);
672 if (len)
673 {
674 *u = ((unichar *) res_alloc (len));
675 wind_MultiByteToWideChar (cp, src, *u, len);
676 }
677 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
678 this will set *length to -1. */
679 len -= sizeof (unichar);
680
681 if (length != NULL)
682 *length = len / sizeof (unichar);
683 }
684
685 /* Convert an unicode string to an codepage string. */
686
687 void
codepage_from_unicode(rc_uint_type * length,const unichar * unicode,char ** ascii,rc_uint_type cp)688 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
689 {
690 rc_uint_type len;
691
692 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
693 if (len)
694 {
695 *ascii = (char *) res_alloc (len * sizeof (char));
696 wind_WideCharToMultiByte (cp, unicode, *ascii, len);
697 }
698 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
699 this will set *length to -1. */
700 len--;
701
702 if (length != NULL)
703 *length = len;
704 }
705
706 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
707 static int
iconv_onechar(iconv_t cd,ICONV_CONST char * s,char * d,int d_len,const char ** n_s,char ** n_d)708 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
709 {
710 int i;
711
712 for (i = 1; i <= 32; i++)
713 {
714 char *tmp_d = d;
715 ICONV_CONST char *tmp_s = s;
716 size_t ret;
717 size_t s_left = (size_t) i;
718 size_t d_left = (size_t) d_len;
719
720 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
721
722 if (ret != (size_t) -1)
723 {
724 *n_s = tmp_s;
725 *n_d = tmp_d;
726 return 0;
727 }
728 }
729
730 return 1;
731 }
732
733 static const char *
wind_iconv_cp(rc_uint_type cp)734 wind_iconv_cp (rc_uint_type cp)
735 {
736 const local_iconv_map *lim = wind_find_codepage_info (cp);
737
738 if (!lim)
739 return NULL;
740 return lim->iconv_name;
741 }
742 #endif /* HAVE_ICONV */
743
744 static rc_uint_type
wind_MultiByteToWideChar(rc_uint_type cp,const char * mb,unichar * u,rc_uint_type u_len)745 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
746 unichar *u, rc_uint_type u_len)
747 {
748 rc_uint_type ret = 0;
749
750 #if defined (_WIN32) || defined (__CYGWIN__)
751 rc_uint_type conv_flags = MB_PRECOMPOSED;
752
753 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
754 MultiByteToWideChar will set the last error to
755 ERROR_INVALID_FLAGS if we do. */
756 if (cp == CP_UTF8 || cp == CP_UTF7)
757 conv_flags = 0;
758
759 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
760 mb, -1, u, u_len);
761 /* Convert to bytes. */
762 ret *= sizeof (unichar);
763
764 #elif defined (HAVE_ICONV)
765 int first = 1;
766 char tmp[32];
767 char *p_tmp;
768 const char *iconv_name = wind_iconv_cp (cp);
769
770 if (!mb || !iconv_name)
771 return 0;
772 iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
773
774 while (1)
775 {
776 int iret;
777 const char *n_mb = "";
778 char *n_tmp = "";
779
780 p_tmp = tmp;
781 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
782 if (first)
783 {
784 first = 0;
785 continue;
786 }
787 if (!iret)
788 {
789 size_t l_tmp = (size_t) (n_tmp - p_tmp);
790
791 if (u)
792 {
793 if ((size_t) u_len < l_tmp)
794 break;
795 memcpy (u, tmp, l_tmp);
796 u += l_tmp/2;
797 u_len -= l_tmp;
798 }
799 ret += l_tmp;
800 }
801 else
802 break;
803 if (tmp[0] == 0 && tmp[1] == 0)
804 break;
805 mb = n_mb;
806 }
807 iconv_close (cd);
808 #else
809 if (cp)
810 ret = 0;
811 ret = strlen (mb) + 1;
812 ret *= sizeof (unichar);
813 if (u != NULL && u_len != 0)
814 {
815 do
816 {
817 *u++ = ((unichar) *mb) & 0xff;
818 --u_len; mb++;
819 }
820 while (u_len != 0 && mb[-1] != 0);
821 }
822 if (u != NULL && u_len != 0)
823 *u = 0;
824 #endif
825 return ret;
826 }
827
828 static rc_uint_type
wind_WideCharToMultiByte(rc_uint_type cp,const unichar * u,char * mb,rc_uint_type mb_len)829 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
830 {
831 rc_uint_type ret = 0;
832 #if defined (_WIN32) || defined (__CYGWIN__)
833 WINBOOL used_def = FALSE;
834
835 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
836 NULL, & used_def);
837 #elif defined (HAVE_ICONV)
838 int first = 1;
839 char tmp[32];
840 char *p_tmp;
841 const char *iconv_name = wind_iconv_cp (cp);
842
843 if (!u || !iconv_name)
844 return 0;
845 iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
846
847 while (1)
848 {
849 int iret;
850 const char *n_u = "";
851 char *n_tmp = "";
852
853 p_tmp = tmp;
854 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
855 if (first)
856 {
857 first = 0;
858 continue;
859 }
860 if (!iret)
861 {
862 size_t l_tmp = (size_t) (n_tmp - p_tmp);
863
864 if (mb)
865 {
866 if ((size_t) mb_len < l_tmp)
867 break;
868 memcpy (mb, tmp, l_tmp);
869 mb += l_tmp;
870 mb_len -= l_tmp;
871 }
872 ret += l_tmp;
873 }
874 else
875 break;
876 if (u[0] == 0)
877 break;
878 u = (const unichar *) n_u;
879 }
880 iconv_close (cd);
881 #else
882 if (cp)
883 ret = 0;
884
885 while (u[ret] != 0)
886 ++ret;
887
888 ++ret;
889
890 if (mb)
891 {
892 while (*u != 0 && mb_len != 0)
893 {
894 if (u[0] == (u[0] & 0x7f))
895 *mb++ = (char) u[0];
896 else
897 *mb++ = '_';
898 ++u; --mb_len;
899 }
900 if (mb_len != 0)
901 *mb = 0;
902 }
903 #endif
904 return ret;
905 }
906