1 /** @file
2 Language Library implementation that provides functions for language conversion
3 between ISO 639-2 and RFC 4646 language codes.
4
5 Copyright (c) 2009 - 2010, Intel Corporation. All rights reserved.<BR>
6 This program and the accompanying materials
7 are licensed and made available under the terms and conditions of the BSD License
8 which accompanies this distribution. The full text of the license may be found at
9 http://opensource.org/licenses/bsd-license.php
10
11 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13
14 **/
15
16 #include <Uefi.h>
17
18 #include <Library/LanguageLib.h>
19
20 #include <Library/BaseLib.h>
21 #include <Library/DebugLib.h>
22 #include <Library/MemoryAllocationLib.h>
23
24 //
25 // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes
26 // Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code.
27 // The last 2 CHAR8 values are the ISO 639-1 code.
28 //
29 // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported.
30 //
31 // Commonly used language codes such as English and French are put in the front of the table for quick match.
32 //
33 GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] =
34 "\
35 engen\
36 frafr\
37 aaraa\
38 abkab\
39 aveae\
40 afraf\
41 akaak\
42 amham\
43 argan\
44 araar\
45 asmas\
46 avaav\
47 aymay\
48 azeaz\
49 bakba\
50 belbe\
51 bulbg\
52 bihbh\
53 bisbi\
54 bambm\
55 benbn\
56 bodbo\
57 brebr\
58 bosbs\
59 catca\
60 chece\
61 chach\
62 cosco\
63 crecr\
64 cescs\
65 chucu\
66 chvcv\
67 cymcy\
68 danda\
69 deude\
70 divdv\
71 dzodz\
72 eweee\
73 ellel\
74 epoeo\
75 spaes\
76 estet\
77 euseu\
78 fasfa\
79 fulff\
80 finfi\
81 fijfj\
82 faofo\
83 fryfy\
84 glega\
85 glagd\
86 glggl\
87 grngn\
88 gujgu\
89 glvgv\
90 hauha\
91 hebhe\
92 hinhi\
93 hmoho\
94 hrvhr\
95 hatht\
96 hunhu\
97 hyehy\
98 herhz\
99 inaia\
100 indid\
101 ileie\
102 iboig\
103 iiiii\
104 ipkik\
105 idoio\
106 islis\
107 itait\
108 ikuiu\
109 jpnja\
110 javjv\
111 katka\
112 konkg\
113 kikki\
114 kuakj\
115 kazkk\
116 kalkl\
117 khmkm\
118 kankn\
119 korko\
120 kaukr\
121 kasks\
122 kurku\
123 komkv\
124 corkw\
125 kirky\
126 latla\
127 ltzlb\
128 luglg\
129 limli\
130 linln\
131 laolo\
132 litlt\
133 lublu\
134 lavlv\
135 mlgmg\
136 mahmh\
137 mrimi\
138 mkdmk\
139 malml\
140 monmn\
141 marmr\
142 msams\
143 mltmt\
144 myamy\
145 nauna\
146 nobnb\
147 ndend\
148 nepne\
149 ndong\
150 nldnl\
151 nnonn\
152 norno\
153 nblnr\
154 navnv\
155 nyany\
156 ocioc\
157 ojioj\
158 ormom\
159 orior\
160 ossos\
161 panpa\
162 plipi\
163 polpl\
164 pusps\
165 porpt\
166 quequ\
167 rohrm\
168 runrn\
169 ronro\
170 rusru\
171 kinrw\
172 sansa\
173 srdsc\
174 sndsd\
175 smese\
176 sagsg\
177 sinsi\
178 slksk\
179 slvsl\
180 smosm\
181 snasn\
182 somso\
183 sqisq\
184 srpsr\
185 sswss\
186 sotst\
187 sunsu\
188 swesv\
189 swasw\
190 tamta\
191 telte\
192 tgktg\
193 thath\
194 tirti\
195 tuktk\
196 tgltl\
197 tsntn\
198 tonto\
199 turtr\
200 tsots\
201 tattt\
202 twitw\
203 tahty\
204 uigug\
205 ukruk\
206 urdur\
207 uzbuz\
208 venve\
209 vievi\
210 volvo\
211 wlnwa\
212 wolwo\
213 xhoxh\
214 yidyi\
215 yoryo\
216 zhaza\
217 zhozh\
218 zulzu\
219 ";
220
221 /**
222 Converts upper case ASCII characters in an ASCII string to lower case ASCII
223 characters in an ASCII string.
224
225 If a an ASCII character in Source is in the range 'A'..'Z', then it is converted
226 to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no
227 conversion is performed. Length ASCII characters from Source are convertered and
228 stored in Destination.
229
230 @param Destination An ASCII string to store the results of the conversion.
231 @param Source The source ASCII string of the conversion.
232 @param Length The number of ASCII characters to convert.
233
234 **/
235 VOID
236 EFIAPI
InternalLanguageLibToLower(OUT CHAR8 * Destination,IN CONST CHAR8 * Source,IN UINTN Length)237 InternalLanguageLibToLower (
238 OUT CHAR8 *Destination,
239 IN CONST CHAR8 *Source,
240 IN UINTN Length
241 )
242 {
243 for (; Length > 0; Length--, Destination++, Source++) {
244 *Destination = (CHAR8)((*Source >= 'A' && *Source <= 'Z') ? *Source + ('a' - 'A') : *Source);
245 }
246 }
247
248 /**
249 Convert an ISO 639-2 language code to a RFC 4646 language code.
250 If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1
251 code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646
252 language code is composed of only a primary language subtag.
253
254 If Iso639Language is NULL, then ASSERT.
255 If Rfc4646Language is NULL, then ASSERT.
256
257 @param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string
258 which reprsents a RFC 4646 language code containging only
259 either a ISO 639-1 or ISO 639-2 primary language subtag.
260 This string is Null-terminated.
261 @param[in] Iso639Language Pointer to a 3-letter ASCII string which represents
262 an ISO 639-2 language code. This string is not required
263 to be Null-terminated.
264
265 @retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code.
266 @retval FALSE The language code does not have corresponding ISO 639-1 code.
267
268 **/
269 BOOLEAN
270 EFIAPI
ConvertIso639ToRfc4646(OUT CHAR8 * Rfc4646Language,IN CONST CHAR8 * Iso639Language)271 ConvertIso639ToRfc4646 (
272 OUT CHAR8 *Rfc4646Language,
273 IN CONST CHAR8 *Iso639Language
274 )
275 {
276 CONST CHAR8 *Match;
277
278 ASSERT (Iso639Language != NULL);
279 ASSERT (Rfc4646Language != NULL);
280
281 //
282 // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language
283 //
284 InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3);
285 Rfc4646Language[3] = '\0';
286
287 Match = mIso639ToRfc4646ConversionTable;
288 do {
289 Match = AsciiStrStr (Match, Rfc4646Language);
290 if (Match == NULL) {
291 return FALSE;
292 }
293 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) {
294 break;
295 }
296 ++Match;
297 } while (TRUE);
298 Rfc4646Language[0] = Match[3];
299 Rfc4646Language[1] = Match[4];
300 Rfc4646Language[2] = '\0';
301 return TRUE;
302 }
303
304 /**
305 Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language
306 subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary
307 language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2
308 code (T code if applies). Else the ISO 639-2 code is returned.
309
310 If Rfc4646Language is NULL, then ASSERT.
311 If Iso639Language is NULL, then ASSERT.
312
313 @param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string
314 which reprsents an ISO 639-2 language code. The string is Null-terminated.
315 @param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated
316 by a NULL or a ';' character.
317
318 @retval TRUE Language code converted successfully.
319 @retval FALSE The RFC 4646 language code is invalid or unsupported.
320
321 **/
322 BOOLEAN
323 EFIAPI
ConvertRfc4646ToIso639(OUT CHAR8 * Iso639Language,IN CONST CHAR8 * Rfc4646Language)324 ConvertRfc4646ToIso639 (
325 OUT CHAR8 *Iso639Language,
326 IN CONST CHAR8 *Rfc4646Language
327 )
328 {
329 CONST CHAR8 *Match;
330
331 ASSERT (Rfc4646Language != NULL);
332 ASSERT (Iso639Language != NULL);
333
334 //
335 // RFC 4646 language code check before determining
336 // if the primary language subtag is ISO 639-1 or 639-2 code
337 //
338 if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') {
339 return FALSE;
340 }
341
342 //
343 // Check if the primary language subtag is ISO 639-1 code
344 //
345 if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') {
346 //
347 // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language
348 //
349 InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2);
350 //
351 // Convert ISO 639-1 code to ISO 639-2 code
352 //
353 Iso639Language[2] = '\0';
354 Match = mIso639ToRfc4646ConversionTable;
355 do {
356 Match = AsciiStrStr (Match, Iso639Language);
357 if (Match == NULL) {
358 return FALSE;
359 }
360 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) {
361 break;
362 }
363 ++Match;
364 } while (TRUE);
365 Rfc4646Language = Match - 3;
366 } else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) {
367 return FALSE;
368 }
369 Iso639Language[0] = Rfc4646Language[0];
370 Iso639Language[1] = Rfc4646Language[1];
371 Iso639Language[2] = Rfc4646Language[2];
372 Iso639Language[3] = '\0';
373 return TRUE;
374 }
375
376 /**
377 Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes.
378 Caller is responsible for freeing the allocated buffer.
379
380 If Iso639Languages is NULL, then ASSERT.
381
382 @param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing
383 one or more ISO 639-2 3-letter language codes.
384
385 @retval NULL Invalid ISO 639-2 language code found.
386 @retval NULL Out of memory.
387 @return Pointer to the allocate buffer containing the Null-terminated converted language codes string.
388 This string is composed of one or more RFC4646 language codes each of which has only
389 ISO 639-1 2-letter primary language subtag.
390
391 **/
392 CHAR8 *
393 EFIAPI
ConvertLanguagesIso639ToRfc4646(IN CONST CHAR8 * Iso639Languages)394 ConvertLanguagesIso639ToRfc4646 (
395 IN CONST CHAR8 *Iso639Languages
396 )
397 {
398 UINTN Length;
399 UINTN Iso639Index;
400 UINTN Rfc4646Index;
401 CHAR8 *Rfc4646Languages;
402
403 ASSERT (Iso639Languages != NULL);
404
405 //
406 // The length of ISO 639-2 lanugage codes string must be multiple of 3
407 //
408 Length = AsciiStrLen (Iso639Languages);
409 if (Length % 3 != 0) {
410 return NULL;
411 }
412
413 //
414 // Allocate buffer for RFC 4646 language codes string
415 //
416 Rfc4646Languages = AllocatePool (Length + (Length / 3));
417 if (Rfc4646Languages == NULL) {
418 return NULL;
419 }
420
421 for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) {
422 if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) {
423 Rfc4646Index += 2;
424 } else {
425 Rfc4646Index += 3;
426 }
427 Rfc4646Languages[Rfc4646Index++] = ';';
428 }
429 Rfc4646Languages[Rfc4646Index - 1] = '\0';
430 return Rfc4646Languages;
431 }
432
433 /**
434 Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes.
435 The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code.
436 Caller is responsible for freeing the allocated buffer.
437
438 If Rfc4646Languages is NULL, then ASSERT.
439
440 @param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing
441 one or more RFC 4646 language codes.
442
443 @retval NULL Invalid or unsupported RFC 4646 language code found.
444 @retval NULL Out of memory.
445 @return Pointer to the allocate buffer containing the Null-terminated converted language codes string.
446 This string is composed of one or more ISO 639-2 language codes.
447
448 **/
449 CHAR8 *
450 EFIAPI
ConvertLanguagesRfc4646ToIso639(IN CONST CHAR8 * Rfc4646Languages)451 ConvertLanguagesRfc4646ToIso639 (
452 IN CONST CHAR8 *Rfc4646Languages
453 )
454 {
455 UINTN NumLanguages;
456 UINTN Iso639Index;
457 UINTN Rfc4646Index;
458 CHAR8 *Iso639Languages;
459
460 ASSERT (Rfc4646Languages != NULL);
461
462 //
463 // Determine the number of languages in the RFC 4646 language codes string
464 //
465 for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) {
466 if (Rfc4646Languages[Rfc4646Index] == ';') {
467 NumLanguages++;
468 }
469 }
470
471 //
472 // Allocate buffer for ISO 639-2 language codes string
473 //
474 Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1);
475 if (Iso639Languages == NULL) {
476 return NULL;
477 }
478
479 //
480 // Do the conversion for each RFC 4646 language code
481 //
482 for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) {
483 if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) {
484 Iso639Index += 3;
485 } else {
486 FreePool (Iso639Languages);
487 return NULL;
488 }
489 //
490 // Locate next language code
491 //
492 while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') {
493 Rfc4646Index++;
494 }
495 if (Rfc4646Languages[Rfc4646Index] == ';') {
496 Rfc4646Index++;
497 }
498 }
499 Iso639Languages[Iso639Index] = '\0';
500 return Iso639Languages;
501 }
502