1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picobase.c
18  *
19  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20  * All rights reserved.
21  *
22  * History:
23  * - 2009-04-20 -- initial version
24  *
25  */
26 
27 #include "picoos.h"
28 #include "picodbg.h"
29 #include "picodefs.h"
30 #include "picobase.h"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 #if 0
36 }
37 #endif
38 
39 /**
40  * @addtogroup picobase
41  *
42  * @b Unicode_UTF8_functions
43  *
44  * UTF8
45  * scalar value                1st Byte    2nd Byte    3rd Byte    4th Byte
46  * 00000000 0xxxxxxx           0xxxxxxx
47  * 00000yyy yyxxxxxx           110yyyyy    10xxxxxx
48  * zzzzyyyy yyxxxxxx           1110zzzz    10yyyyyy    10xxxxxx
49  * 000uuuuu zzzzyyyy yyxxxxx   11110uuu    10uuzzzz    10yyyyyy    10xxxxxx
50  *
51 */
picobase_utf8_length(const picoos_uint8 * utf8str,const picoos_uint16 maxlen)52 picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
53                                   const picoos_uint16 maxlen) {
54 
55     picoos_uint16 i;
56     picoos_uint16 len;
57     picoos_uint8 follow;
58     picoos_uint8 ok;
59 
60     ok = TRUE;
61     i = 0;
62     len = 0;
63     follow = 0;
64     while (ok && (i < maxlen) && (utf8str[i] != '\000')) {
65         if (follow > 0) {
66             if ((utf8str[i] >= (picoos_uint8)'\200') &&
67                 (utf8str[i] < (picoos_uint8)'\300')) {
68                 follow--;
69             } else {
70                 ok = FALSE;
71             }
72         } else if (utf8str[i] < (picoos_uint8)'\200') {
73             len++;
74         } else if (utf8str[i] >= (picoos_uint8)'\370') {
75             ok = FALSE;
76         } else if (utf8str[i] >= (picoos_uint8)'\360') {
77             follow = 3;
78             len++;
79         } else if (utf8str[i] >= (picoos_uint8)'\340') {
80             follow = 2;
81             len++;
82         } else if (utf8str[i] >= (picoos_uint8)'\300') {
83             follow = 1;
84             len++;
85         } else {
86             ok = FALSE;
87         }
88         i++;
89     }
90     if (ok) {
91         return len;
92     } else {
93         return -1;
94     }
95 }
96 
97 
base_utf32_lowercase(picoos_uint32 utf32)98 static picoos_uint32 base_utf32_lowercase (picoos_uint32 utf32)
99 {
100 
101     picoos_uint32 lc;
102 
103     lc = utf32;
104     if (((utf32 >= 65313) && (utf32 <= 65338))) {
105         lc = (utf32 + 32);
106     } else if (((utf32 >= 66560) && (utf32 <= 66599))) {
107         lc = (utf32 + 40);
108     } else if (((utf32 >= 7680) && (utf32 <= 9423))) {
109         switch (utf32) {
110             case 7680:   case 7681:   case 7682:   case 7683:   case 7684:   case 7685:   case 7686:   case 7687:   case 7688:   case 7689:
111             case 7690:   case 7691:   case 7692:   case 7693:   case 7694:   case 7695:   case 7696:   case 7697:   case 7698:   case 7699:   case 7700:   case 7701:
112             case 7702:   case 7703:   case 7704:   case 7705:   case 7706:   case 7707:   case 7708:   case 7709:   case 7710:   case 7711:   case 7712:   case 7713:
113             case 7714:   case 7715:   case 7716:   case 7717:   case 7718:   case 7719:   case 7720:   case 7721:   case 7722:   case 7723:   case 7724:   case 7725:
114             case 7726:   case 7727:   case 7728:   case 7729:   case 7730:   case 7731:   case 7732:   case 7733:   case 7734:   case 7735:   case 7736:   case 7737:
115             case 7738:   case 7739:   case 7740:   case 7741:   case 7742:   case 7743:   case 7744:   case 7745:   case 7746:   case 7747:   case 7748:   case 7749:
116             case 7750:   case 7751:   case 7752:   case 7753:   case 7754:   case 7755:   case 7756:   case 7757:   case 7758:   case 7759:   case 7760:   case 7761:
117             case 7762:   case 7763:   case 7764:   case 7765:   case 7766:   case 7767:   case 7768:   case 7769:   case 7770:   case 7771:   case 7772:   case 7773:
118             case 7774:   case 7775:   case 7776:   case 7777:   case 7778:   case 7779:   case 7780:   case 7781:   case 7782:   case 7783:   case 7784:   case 7785:
119             case 7786:   case 7787:   case 7788:   case 7789:   case 7790:   case 7791:   case 7792:   case 7793:   case 7794:   case 7795:   case 7796:   case 7797:
120             case 7798:   case 7799:   case 7800:   case 7801:   case 7802:   case 7803:   case 7804:   case 7805:   case 7806:   case 7807:   case 7808:   case 7809:
121             case 7810:   case 7811:   case 7812:   case 7813:   case 7814:   case 7815:   case 7816:   case 7817:   case 7818:   case 7819:   case 7820:   case 7821:
122             case 7822:   case 7823:   case 7824:   case 7825:   case 7826:   case 7827:   case 7828:   case 7840:   case 7841:   case 7842:   case 7843:
123             case 7844:   case 7845:   case 7846:   case 7847:   case 7848:   case 7849:   case 7850:   case 7851:   case 7852:   case 7853:   case 7854:   case 7855:
124             case 7856:   case 7857:   case 7858:   case 7859:   case 7860:   case 7861:   case 7862:   case 7863:   case 7864:   case 7865:   case 7866:   case 7867:
125             case 7868:   case 7869:   case 7870:   case 7871:   case 7872:   case 7873:   case 7874:   case 7875:   case 7876:   case 7877:   case 7878:   case 7879:
126             case 7880:   case 7881:   case 7882:   case 7883:   case 7884:   case 7885:   case 7886:   case 7887:   case 7888:   case 7889:   case 7890:   case 7891:
127             case 7892:   case 7893:   case 7894:   case 7895:   case 7896:   case 7897:   case 7898:   case 7899:   case 7900:   case 7901:   case 7902:   case 7903:
128             case 7904:   case 7905:   case 7906:   case 7907:   case 7908:   case 7909:   case 7910:   case 7911:   case 7912:   case 7913:   case 7914:   case 7915:
129             case 7916:   case 7917:   case 7918:   case 7919:   case 7920:   case 7921:   case 7922:   case 7923:   case 7924:   case 7925:   case 7926:   case 7927:
130             case 7928:
131                 if ( !(((utf32) % 2 == 1))) {
132                     lc = (utf32 + 1);
133                 }
134                 break;
135             case 7944:   case 7945:   case 7946:   case 7947:   case 7948:   case 7949:   case 7950:   case 7951:   case 7960:
136             case 7961:   case 7962:   case 7963:   case 7964:   case 7965:   case 7976:   case 7977:   case 7978:   case 7979:   case 7980:   case 7981:
137             case 7982:   case 7983:   case 7992:   case 7993:   case 7994:   case 7995:   case 7996:   case 7997:   case 7998:   case 7999:
138             case 8008:   case 8009:   case 8010:   case 8011:   case 8012:   case 8013:   case 8040:   case 8041:   case 8042:   case 8043:   case 8044:
139             case 8045:   case 8046:   case 8047:   case 8072:   case 8073:   case 8074:   case 8075:   case 8076:   case 8077:   case 8078:   case 8079:
140             case 8088:   case 8089:   case 8090:   case 8091:   case 8092:   case 8093:   case 8094:   case 8095:   case 8104:   case 8105:
141             case 8106:   case 8107:   case 8108:   case 8109:   case 8110:   case 8111:
142                 lc = (utf32 - 8);
143                 break;
144             case 8025:   case 8026:   case 8027:   case 8028:   case 8029:   case 8030:   case 8031:
145                 if (((utf32) % 2 == 1)) {
146                     lc = (utf32 - 8);
147                 }
148                 break;
149             case 8544:   case 8545:   case 8546:   case 8547:   case 8548:   case 8549:   case 8550:   case 8551:   case 8552:   case 8553:
150             case 8554:   case 8555:   case 8556:   case 8557:   case 8558:   case 8559:
151                 lc = (utf32 + 16);
152                 break;
153             case 9398:   case 9399:   case 9400:   case 9401:   case 9402:   case 9403:   case 9404:   case 9405:   case 9406:   case 9407:
154             case 9408:   case 9409:   case 9410:   case 9411:   case 9412:   case 9413:   case 9414:   case 9415:   case 9416:   case 9417:   case 9418:   case 9419:
155             case 9420:   case 9421:   case 9422:   case 9423:
156                 lc = (utf32 + 26);
157                 break;
158             case 8120:
159                 lc = 8112;
160                 break;
161             case 8121:
162                 lc = 8113;
163                 break;
164             case 8122:
165                 lc = 8048;
166                 break;
167             case 8123:
168                 lc = 8049;
169                 break;
170             case 8124:
171                 lc = 8115;
172                 break;
173             case 8136:
174                 lc = 8050;
175                 break;
176             case 8137:
177                 lc = 8051;
178                 break;
179             case 8138:
180                 lc = 8052;
181                 break;
182             case 8139:
183                 lc = 8053;
184                 break;
185             case 8140:
186                 lc = 8131;
187                 break;
188             case 8152:
189                 lc = 8144;
190                 break;
191             case 8153:
192                 lc = 8145;
193                 break;
194             case 8154:
195                 lc = 8054;
196                 break;
197             case 8155:
198                 lc = 8055;
199                 break;
200             case 8168:
201                 lc = 8160;
202                 break;
203             case 8169:
204                 lc = 8161;
205                 break;
206             case 8170:
207                 lc = 8058;
208                 break;
209             case 8171:
210                 lc = 8059;
211                 break;
212             case 8172:
213                 lc = 8165;
214                 break;
215             case 8184:
216                 lc = 8056;
217                 break;
218             case 8185:
219                 lc = 8057;
220                 break;
221             case 8186:
222                 lc = 8060;
223                 break;
224             case 8187:
225                 lc = 8061;
226                 break;
227             case 8188:
228                 lc = 8179;
229                 break;
230             case 8486:
231                 lc = 969;
232                 break;
233             case 8490:
234                 lc = 107;
235                 break;
236             case 8491:
237                 lc = 229;
238                 break;
239         default:
240             break;
241         }
242     } else {
243         switch (utf32) {
244             case 65:   case 66:   case 67:   case 68:   case 69:   case 70:   case 71:   case 72:   case 73:   case 74:
245             case 75:   case 76:   case 77:   case 78:   case 79:   case 80:   case 81:   case 82:   case 83:   case 84:   case 85:   case 86:
246             case 87:   case 88:   case 89:   case 90:   case 192:   case 193:   case 194:   case 195:   case 196:   case 197:   case 198:
247             case 199:   case 200:   case 201:   case 202:   case 203:   case 204:   case 205:   case 206:   case 207:   case 208:   case 209:   case 210:
248             case 211:   case 212:   case 213:   case 214:   case 216:   case 217:   case 218:   case 219:   case 220:   case 221:   case 222:
249             case 913:   case 914:   case 915:   case 916:   case 917:   case 918:   case 919:   case 920:   case 921:   case 922:   case 923:
250             case 924:   case 925:   case 926:   case 927:   case 928:   case 929:   case 931:   case 932:   case 933:   case 934:   case 935:
251             case 936:   case 937:   case 938:   case 939:   case 1040:   case 1041:   case 1042:   case 1043:   case 1044:   case 1045:   case 1046:
252             case 1047:   case 1048:   case 1049:   case 1050:   case 1051:   case 1052:   case 1053:   case 1054:   case 1055:   case 1056:   case 1057:   case 1058:
253             case 1059:   case 1060:   case 1061:   case 1062:   case 1063:   case 1064:   case 1065:   case 1066:   case 1067:   case 1068:   case 1069:   case 1070:
254             case 1071:
255                 lc = (utf32 + 32);
256                 break;
257             case 256:   case 257:   case 258:   case 259:   case 260:   case 261:   case 262:   case 263:   case 264:   case 265:
258             case 266:   case 267:   case 268:   case 269:   case 270:   case 271:   case 272:   case 273:   case 274:   case 275:   case 276:   case 277:
259             case 278:   case 279:   case 280:   case 281:   case 282:   case 283:   case 284:   case 285:   case 286:   case 287:   case 288:   case 289:
260             case 290:   case 291:   case 292:   case 293:   case 294:   case 295:   case 296:   case 297:   case 298:   case 299:   case 300:   case 301:
261             case 302:   case 303:   case 305:   case 306:   case 307:   case 308:   case 309:   case 310:   case 330:   case 331:
262             case 332:   case 333:   case 334:   case 335:   case 336:   case 337:   case 338:   case 339:   case 340:   case 341:   case 342:   case 343:
263             case 344:   case 345:   case 346:   case 347:   case 348:   case 349:   case 350:   case 351:   case 352:   case 353:   case 354:   case 355:
264             case 356:   case 357:   case 358:   case 359:   case 360:   case 361:   case 362:   case 363:   case 364:   case 365:   case 366:   case 367:
265             case 368:   case 369:   case 370:   case 371:   case 372:   case 373:   case 374:   case 416:   case 417:   case 418:   case 419:
266             case 420:   case 478:   case 479:   case 480:   case 481:   case 482:   case 483:   case 484:   case 485:   case 486:   case 487:
267             case 488:   case 489:   case 490:   case 491:   case 492:   case 493:   case 494:   case 504:   case 505:   case 506:   case 507:
268             case 508:   case 509:   case 510:   case 511:   case 512:   case 513:   case 514:   case 515:   case 516:   case 517:   case 518:   case 519:
269             case 520:   case 521:   case 522:   case 523:   case 524:   case 525:   case 526:   case 527:   case 528:   case 529:   case 530:   case 531:
270             case 532:   case 533:   case 534:   case 535:   case 536:   case 537:   case 538:   case 539:   case 540:   case 541:   case 542:
271             case 546:   case 547:   case 548:   case 549:   case 550:   case 551:   case 552:   case 553:   case 554:   case 555:   case 556:   case 557:
272             case 558:   case 559:   case 560:   case 561:   case 562:   case 984:   case 985:   case 986:   case 987:   case 988:   case 989:
273             case 990:   case 991:   case 992:   case 993:   case 994:   case 995:   case 996:   case 997:   case 998:   case 999:   case 1000:   case 1001:
274             case 1002:   case 1003:   case 1004:   case 1005:   case 1006:   case 1120:   case 1121:   case 1122:   case 1123:   case 1124:   case 1125:
275             case 1126:   case 1127:   case 1128:   case 1129:   case 1130:   case 1131:   case 1132:   case 1133:   case 1134:   case 1135:   case 1136:   case 1137:
276             case 1138:   case 1139:   case 1140:   case 1141:   case 1142:   case 1143:   case 1144:   case 1145:   case 1146:   case 1147:   case 1148:   case 1149:
277             case 1150:   case 1151:   case 1152:   case 1162:   case 1163:   case 1164:   case 1165:   case 1166:   case 1167:   case 1168:   case 1169:
278             case 1170:   case 1171:   case 1172:   case 1173:   case 1174:   case 1175:   case 1176:   case 1177:   case 1178:   case 1179:   case 1180:   case 1181:
279             case 1182:   case 1183:   case 1184:   case 1185:   case 1186:   case 1187:   case 1188:   case 1189:   case 1190:   case 1191:   case 1192:   case 1193:
280             case 1194:   case 1195:   case 1196:   case 1197:   case 1198:   case 1199:   case 1200:   case 1201:   case 1202:   case 1203:   case 1204:   case 1205:
281             case 1206:   case 1207:   case 1208:   case 1209:   case 1210:   case 1211:   case 1212:   case 1213:   case 1214:   case 1232:   case 1233:
282             case 1234:   case 1235:   case 1236:   case 1237:   case 1238:   case 1239:   case 1240:   case 1241:   case 1242:   case 1243:   case 1244:   case 1245:
283             case 1246:   case 1247:   case 1248:   case 1249:   case 1250:   case 1251:   case 1252:   case 1253:   case 1254:   case 1255:   case 1256:   case 1257:
284             case 1258:   case 1259:   case 1260:   case 1261:   case 1262:   case 1263:   case 1264:   case 1265:   case 1266:   case 1267:   case 1268:
285             case 1280:   case 1281:   case 1282:   case 1283:   case 1284:   case 1285:   case 1286:   case 1287:   case 1288:   case 1289:   case 1290:   case 1291:
286             case 1292:   case 1293:   case 1294:
287                 if ( !(((utf32) % 2 == 1))) {
288                     lc = (utf32 + 1);
289                 }
290                 break;
291             case 313:   case 314:   case 315:   case 316:   case 317:   case 318:   case 319:   case 320:   case 321:   case 322:
292             case 323:   case 324:   case 325:   case 326:   case 327:   case 377:   case 378:   case 379:   case 380:   case 381:
293             case 459:   case 460:   case 461:   case 462:   case 463:   case 464:   case 465:   case 466:   case 467:   case 468:   case 469:   case 470:
294             case 471:   case 472:   case 473:   case 474:   case 475:   case 1217:   case 1218:   case 1219:   case 1220:   case 1221:   case 1222:
295             case 1223:   case 1224:   case 1225:   case 1226:   case 1227:   case 1228:   case 1229:
296                 if (((utf32) % 2 == 1)) {
297                     lc = (utf32 + 1);
298                 }
299                 break;
300             case 1024:   case 1025:   case 1026:   case 1027:   case 1028:   case 1029:   case 1030:   case 1031:   case 1032:   case 1033:
301             case 1034:   case 1035:   case 1036:   case 1037:   case 1038:   case 1039:
302                 lc = (utf32 + 80);
303                 break;
304             case 1329:   case 1330:   case 1331:   case 1332:   case 1333:   case 1334:   case 1335:   case 1336:   case 1337:   case 1338:
305             case 1339:   case 1340:   case 1341:   case 1342:   case 1343:   case 1344:   case 1345:   case 1346:   case 1347:   case 1348:   case 1349:   case 1350:
306             case 1351:   case 1352:   case 1353:   case 1354:   case 1355:   case 1356:   case 1357:   case 1358:   case 1359:   case 1360:   case 1361:   case 1362:
307             case 1363:   case 1364:   case 1365:   case 1366:
308                 lc = (utf32 + 48);
309                 break;
310             case 304:
311                 lc = 105;
312                 break;
313             case 376:
314                 lc = 255;
315                 break;
316             case 385:
317                 lc = 595;
318                 break;
319             case 386:
320                 lc = 387;
321                 break;
322             case 388:
323                 lc = 389;
324                 break;
325             case 390:
326                 lc = 596;
327                 break;
328             case 391:
329                 lc = 392;
330                 break;
331             case 393:
332                 lc = 598;
333                 break;
334             case 394:
335                 lc = 599;
336                 break;
337             case 395:
338                 lc = 396;
339                 break;
340             case 398:
341                 lc = 477;
342                 break;
343             case 399:
344                 lc = 601;
345                 break;
346             case 400:
347                 lc = 603;
348                 break;
349             case 401:
350                 lc = 402;
351                 break;
352             case 403:
353                 lc = 608;
354                 break;
355             case 404:
356                 lc = 611;
357                 break;
358             case 406:
359                 lc = 617;
360                 break;
361             case 407:
362                 lc = 616;
363                 break;
364             case 408:
365                 lc = 409;
366                 break;
367             case 412:
368                 lc = 623;
369                 break;
370             case 413:
371                 lc = 626;
372                 break;
373             case 415:
374                 lc = 629;
375                 break;
376             case 422:
377                 lc = 640;
378                 break;
379             case 423:
380                 lc = 424;
381                 break;
382             case 425:
383                 lc = 643;
384                 break;
385             case 428:
386                 lc = 429;
387                 break;
388             case 430:
389                 lc = 648;
390                 break;
391             case 431:
392                 lc = 432;
393                 break;
394             case 433:
395                 lc = 650;
396                 break;
397             case 434:
398                 lc = 651;
399                 break;
400             case 435:
401                 lc = 436;
402                 break;
403             case 437:
404                 lc = 438;
405                 break;
406             case 439:
407                 lc = 658;
408                 break;
409             case 440:
410                 lc = 441;
411                 break;
412             case 444:
413                 lc = 445;
414                 break;
415             case 452:
416                 lc = 454;
417                 break;
418             case 453:
419                 lc = 454;
420                 break;
421             case 455:
422                 lc = 457;
423                 break;
424             case 456:
425                 lc = 457;
426                 break;
427             case 458:
428                 lc = 460;
429                 break;
430             case 497:
431                 lc = 499;
432                 break;
433             case 498:
434                 lc = 499;
435                 break;
436             case 500:
437                 lc = 501;
438                 break;
439             case 502:
440                 lc = 405;
441                 break;
442             case 503:
443                 lc = 447;
444                 break;
445             case 544:
446                 lc = 414;
447                 break;
448             case 902:
449                 lc = 940;
450                 break;
451             case 904:
452                 lc = 941;
453                 break;
454             case 905:
455                 lc = 942;
456                 break;
457             case 906:
458                 lc = 943;
459                 break;
460             case 908:
461                 lc = 972;
462                 break;
463             case 910:
464                 lc = 973;
465                 break;
466             case 911:
467                 lc = 974;
468                 break;
469             case 1012:
470                 lc = 952;
471                 break;
472             case 1015:
473                 lc = 1016;
474                 break;
475             case 1017:
476                 lc = 1010;
477                 break;
478             case 1018:
479                 lc = 1019;
480                 break;
481             case 1272:
482                 lc = 1273;
483                 break;
484         default:
485             break;
486         }
487     }
488     return lc;
489 }
490 
491 /**
492  * Converts utf32 input to uppercase
493  * @param    utf32 : a single character encoded in UTF32
494  * @return   a single uppercase character encoded in UTF32
495 */
base_utf32_uppercase(picoos_uint32 utf32)496 static picoos_uint32 base_utf32_uppercase (picoos_uint32 utf32)
497 {
498     picoos_uint32 lc;
499 
500     lc = utf32;
501     if (((utf32 >= 65345) && (utf32 <= 65370))) {
502         lc = (utf32 - 32);
503     } else if (((utf32 >= 66600) && (utf32 <= 66639))) {
504         lc = (utf32 - 40);
505     } else if (((utf32 >= 7681) && (utf32 <= 9449))) {
506         switch (utf32) {
507             case 7681:   case 7682:   case 7683:   case 7684:   case 7685:   case 7686:   case 7687:   case 7688:   case 7689:   case 7690:
508             case 7691:   case 7692:   case 7693:   case 7694:   case 7695:   case 7696:   case 7697:   case 7698:   case 7699:   case 7700:   case 7701:   case 7702:
509             case 7703:   case 7704:   case 7705:   case 7706:   case 7707:   case 7708:   case 7709:   case 7710:   case 7711:   case 7712:   case 7713:   case 7714:
510             case 7715:   case 7716:   case 7717:   case 7718:   case 7719:   case 7720:   case 7721:   case 7722:   case 7723:   case 7724:   case 7725:   case 7726:
511             case 7727:   case 7728:   case 7729:   case 7730:   case 7731:   case 7732:   case 7733:   case 7734:   case 7735:   case 7736:   case 7737:   case 7738:
512             case 7739:   case 7740:   case 7741:   case 7742:   case 7743:   case 7744:   case 7745:   case 7746:   case 7747:   case 7748:   case 7749:   case 7750:
513             case 7751:   case 7752:   case 7753:   case 7754:   case 7755:   case 7756:   case 7757:   case 7758:   case 7759:   case 7760:   case 7761:   case 7762:
514             case 7763:   case 7764:   case 7765:   case 7766:   case 7767:   case 7768:   case 7769:   case 7770:   case 7771:   case 7772:   case 7773:   case 7774:
515             case 7775:   case 7776:   case 7777:   case 7778:   case 7779:   case 7780:   case 7781:   case 7782:   case 7783:   case 7784:   case 7785:   case 7786:
516             case 7787:   case 7788:   case 7789:   case 7790:   case 7791:   case 7792:   case 7793:   case 7794:   case 7795:   case 7796:   case 7797:   case 7798:
517             case 7799:   case 7800:   case 7801:   case 7802:   case 7803:   case 7804:   case 7805:   case 7806:   case 7807:   case 7808:   case 7809:   case 7810:
518             case 7811:   case 7812:   case 7813:   case 7814:   case 7815:   case 7816:   case 7817:   case 7818:   case 7819:   case 7820:   case 7821:   case 7822:
519             case 7823:   case 7824:   case 7825:   case 7826:   case 7827:   case 7828:   case 7829:   case 7841:   case 7842:   case 7843:   case 7844:
520             case 7845:   case 7846:   case 7847:   case 7848:   case 7849:   case 7850:   case 7851:   case 7852:   case 7853:   case 7854:   case 7855:   case 7856:
521             case 7857:   case 7858:   case 7859:   case 7860:   case 7861:   case 7862:   case 7863:   case 7864:   case 7865:   case 7866:   case 7867:   case 7868:
522             case 7869:   case 7870:   case 7871:   case 7872:   case 7873:   case 7874:   case 7875:   case 7876:   case 7877:   case 7878:   case 7879:   case 7880:
523             case 7881:   case 7882:   case 7883:   case 7884:   case 7885:   case 7886:   case 7887:   case 7888:   case 7889:   case 7890:   case 7891:   case 7892:
524             case 7893:   case 7894:   case 7895:   case 7896:   case 7897:   case 7898:   case 7899:   case 7900:   case 7901:   case 7902:   case 7903:   case 7904:
525             case 7905:   case 7906:   case 7907:   case 7908:   case 7909:   case 7910:   case 7911:   case 7912:   case 7913:   case 7914:   case 7915:   case 7916:
526             case 7917:   case 7918:   case 7919:   case 7920:   case 7921:   case 7922:   case 7923:   case 7924:   case 7925:   case 7926:   case 7927:   case 7928:
527             case 7929:
528                 if (((utf32) % 2 == 1)) {
529                     lc = (utf32 - 1);
530                 }
531                 break;
532             case 7936:   case 7937:   case 7938:   case 7939:   case 7940:   case 7941:   case 7942:   case 7943:   case 7952:
533             case 7953:   case 7954:   case 7955:   case 7956:   case 7957:   case 7968:   case 7969:   case 7970:   case 7971:   case 7972:   case 7973:
534             case 7974:   case 7975:   case 7984:   case 7985:   case 7986:   case 7987:   case 7988:   case 7989:   case 7990:   case 7991:
535             case 8000:   case 8001:   case 8002:   case 8003:   case 8004:   case 8005:   case 8032:   case 8033:   case 8034:   case 8035:   case 8036:
536             case 8037:   case 8038:   case 8039:   case 8064:   case 8065:   case 8066:   case 8067:   case 8068:   case 8069:   case 8070:   case 8071:
537             case 8080:   case 8081:   case 8082:   case 8083:   case 8084:   case 8085:   case 8086:   case 8087:   case 8096:   case 8097:
538             case 8098:   case 8099:   case 8100:   case 8101:   case 8102:   case 8103:
539                 lc = (utf32 + 8);
540                 break;
541             case 8017:   case 8018:   case 8019:   case 8020:   case 8021:   case 8022:   case 8023:
542                 if (((utf32) % 2 == 1)) {
543                     lc = (utf32 + 8);
544                 }
545                 break;
546             case 8560:   case 8561:   case 8562:   case 8563:   case 8564:   case 8565:   case 8566:   case 8567:   case 8568:   case 8569:
547             case 8570:   case 8571:   case 8572:   case 8573:   case 8574:   case 8575:
548                 lc = (utf32 - 16);
549                 break;
550             case 9424:   case 9425:   case 9426:   case 9427:   case 9428:   case 9429:   case 9430:   case 9431:   case 9432:   case 9433:
551             case 9434:   case 9435:   case 9436:   case 9437:   case 9438:   case 9439:   case 9440:   case 9441:   case 9442:   case 9443:   case 9444:   case 9445:
552             case 9446:   case 9447:   case 9448:   case 9449:
553                 lc = (utf32 - 26);
554                 break;
555             case 7835:
556                 lc = 7776;
557                 break;
558             case 8048:
559                 lc = 8122;
560                 break;
561             case 8049:
562                 lc = 8123;
563                 break;
564             case 8050:
565                 lc = 8136;
566                 break;
567             case 8051:
568                 lc = 8137;
569                 break;
570             case 8052:
571                 lc = 8138;
572                 break;
573             case 8053:
574                 lc = 8139;
575                 break;
576             case 8054:
577                 lc = 8154;
578                 break;
579             case 8055:
580                 lc = 8155;
581                 break;
582             case 8056:
583                 lc = 8184;
584                 break;
585             case 8057:
586                 lc = 8185;
587                 break;
588             case 8058:
589                 lc = 8170;
590                 break;
591             case 8059:
592                 lc = 8171;
593                 break;
594             case 8060:
595                 lc = 8186;
596                 break;
597             case 8061:
598                 lc = 8187;
599                 break;
600             case 8112:
601                 lc = 8120;
602                 break;
603             case 8113:
604                 lc = 8121;
605                 break;
606             case 8115:
607                 lc = 8124;
608                 break;
609             case 8126:
610                 lc = 921;
611                 break;
612             case 8131:
613                 lc = 8140;
614                 break;
615             case 8144:
616                 lc = 8152;
617                 break;
618             case 8145:
619                 lc = 8153;
620                 break;
621             case 8160:
622                 lc = 8168;
623                 break;
624             case 8161:
625                 lc = 8169;
626                 break;
627             case 8165:
628                 lc = 8172;
629                 break;
630             case 8179:
631                 lc = 8188;
632                 break;
633         default:
634             break;
635         }
636     } else {
637         switch (utf32) {
638             case 97:   case 98:   case 99:   case 100:   case 101:   case 102:   case 103:   case 104:   case 105:   case 106:
639             case 107:   case 108:   case 109:   case 110:   case 111:   case 112:   case 113:   case 114:   case 115:   case 116:   case 117:   case 118:
640             case 119:   case 120:   case 121:   case 122:   case 224:   case 225:   case 226:   case 227:   case 228:   case 229:   case 230:
641             case 231:   case 232:   case 233:   case 234:   case 235:   case 236:   case 237:   case 238:   case 239:   case 240:   case 241:   case 242:
642             case 243:   case 244:   case 245:   case 246:   case 247:   case 248:   case 249:   case 250:   case 251:   case 252:   case 253:   case 254:
643             case 945:   case 946:   case 947:   case 948:   case 949:   case 950:   case 951:   case 952:   case 953:   case 954:   case 955:
644             case 956:   case 957:   case 958:   case 959:   case 960:   case 961:   case 963:   case 964:   case 965:   case 966:   case 967:
645             case 968:   case 969:   case 970:   case 971:   case 1072:   case 1073:   case 1074:   case 1075:   case 1076:   case 1077:   case 1078:
646             case 1079:   case 1080:   case 1081:   case 1082:   case 1083:   case 1084:   case 1085:   case 1086:   case 1087:   case 1088:   case 1089:   case 1090:
647             case 1091:   case 1092:   case 1093:   case 1094:   case 1095:   case 1096:   case 1097:   case 1098:   case 1099:   case 1100:   case 1101:   case 1102:
648             case 1103:
649                 if ((utf32 != 247)) {
650                     lc = (utf32 - 32);
651                 }
652                 break;
653             case 257:   case 258:   case 259:   case 260:   case 261:   case 262:   case 263:   case 264:   case 265:   case 266:
654             case 267:   case 268:   case 269:   case 270:   case 271:   case 272:   case 273:   case 274:   case 275:   case 276:   case 277:   case 278:
655             case 279:   case 280:   case 281:   case 282:   case 283:   case 284:   case 285:   case 286:   case 287:   case 288:   case 289:   case 290:
656             case 291:   case 292:   case 293:   case 294:   case 295:   case 296:   case 297:   case 298:   case 299:   case 300:   case 301:   case 302:
657             case 303:   case 304:   case 306:   case 307:   case 308:   case 309:   case 310:   case 311:   case 331:   case 332:
658             case 333:   case 334:   case 335:   case 336:   case 337:   case 338:   case 339:   case 340:   case 341:   case 342:   case 343:   case 344:
659             case 345:   case 346:   case 347:   case 348:   case 349:   case 350:   case 351:   case 352:   case 353:   case 354:   case 355:   case 356:
660             case 357:   case 358:   case 359:   case 360:   case 361:   case 362:   case 363:   case 364:   case 365:   case 366:   case 367:   case 368:
661             case 369:   case 370:   case 371:   case 372:   case 373:   case 374:   case 375:   case 417:   case 418:   case 419:   case 420:
662             case 421:   case 481:   case 482:   case 483:   case 484:   case 485:   case 486:   case 487:   case 488:   case 489:   case 490:
663             case 491:   case 492:   case 493:   case 494:   case 495:   case 507:   case 508:   case 509:   case 510:   case 511:
664             case 513:   case 514:   case 515:   case 516:   case 517:   case 518:   case 519:   case 520:   case 521:   case 522:   case 523:   case 524:
665             case 525:   case 526:   case 527:   case 528:   case 529:   case 530:   case 531:   case 532:   case 533:   case 534:   case 535:   case 536:
666             case 537:   case 538:   case 539:   case 540:   case 541:   case 542:   case 543:   case 544:   case 546:   case 547:   case 548:
667             case 549:   case 550:   case 551:   case 552:   case 553:   case 554:   case 555:   case 556:   case 557:   case 558:   case 559:   case 560:
668             case 561:   case 562:   case 563:   case 985:   case 986:   case 987:   case 988:   case 989:   case 990:   case 991:   case 992:
669             case 993:   case 994:   case 995:   case 996:   case 997:   case 998:   case 999:   case 1000:   case 1001:   case 1002:   case 1003:   case 1004:
670             case 1005:   case 1006:   case 1007:   case 1121:   case 1122:   case 1123:   case 1124:   case 1125:   case 1126:   case 1127:   case 1128:
671             case 1129:   case 1130:   case 1131:   case 1132:   case 1133:   case 1134:   case 1135:   case 1136:   case 1137:   case 1138:   case 1139:   case 1140:
672             case 1141:   case 1142:   case 1143:   case 1144:   case 1145:   case 1146:   case 1147:   case 1148:   case 1149:   case 1150:   case 1151:   case 1152:
673             case 1153:   case 1163:   case 1164:   case 1165:   case 1166:   case 1167:   case 1168:   case 1169:   case 1170:   case 1171:   case 1172:
674             case 1173:   case 1174:   case 1175:   case 1176:   case 1177:   case 1178:   case 1179:   case 1180:   case 1181:   case 1182:   case 1183:   case 1184:
675             case 1185:   case 1186:   case 1187:   case 1188:   case 1189:   case 1190:   case 1191:   case 1192:   case 1193:   case 1194:   case 1195:   case 1196:
676             case 1197:   case 1198:   case 1199:   case 1200:   case 1201:   case 1202:   case 1203:   case 1204:   case 1205:   case 1206:   case 1207:   case 1208:
677             case 1209:   case 1210:   case 1211:   case 1212:   case 1213:   case 1214:   case 1215:   case 1233:   case 1234:   case 1235:   case 1236:
678             case 1237:   case 1238:   case 1239:   case 1240:   case 1241:   case 1242:   case 1243:   case 1244:   case 1245:   case 1246:   case 1247:   case 1248:
679             case 1249:   case 1250:   case 1251:   case 1252:   case 1253:   case 1254:   case 1255:   case 1256:   case 1257:   case 1258:   case 1259:   case 1260:
680             case 1261:   case 1262:   case 1263:   case 1264:   case 1265:   case 1266:   case 1267:   case 1268:   case 1269:   case 1281:   case 1282:
681             case 1283:   case 1284:   case 1285:   case 1286:   case 1287:   case 1288:   case 1289:   case 1290:   case 1291:   case 1292:   case 1293:   case 1294:
682             case 1295:
683                 if (((utf32) % 2 == 1)) {
684                     lc = (utf32 - 1);
685                 }
686                 break;
687             case 314:   case 315:   case 316:   case 317:   case 318:   case 319:   case 320:   case 321:   case 322:   case 323:
688             case 324:   case 325:   case 326:   case 327:   case 328:   case 378:   case 379:   case 380:   case 381:   case 382:
689             case 464:   case 465:   case 466:   case 467:   case 468:   case 469:   case 470:   case 471:   case 472:   case 473:   case 474:   case 475:
690             case 476:   case 1218:   case 1219:   case 1220:   case 1221:   case 1222:   case 1223:   case 1224:   case 1225:   case 1226:   case 1227:
691             case 1228:   case 1229:   case 1230:
692                 if ( !(((utf32) % 2 == 1))) {
693                     lc = (utf32 - 1);
694                 }
695                 break;
696             case 1104:   case 1105:   case 1106:   case 1107:   case 1108:   case 1109:   case 1110:   case 1111:   case 1112:   case 1113:
697             case 1114:   case 1115:   case 1116:   case 1117:   case 1118:   case 1119:
698                 lc = (utf32 - 80);
699                 break;
700             case 1377:   case 1378:   case 1379:   case 1380:   case 1381:   case 1382:   case 1383:   case 1384:   case 1385:   case 1386:
701             case 1387:   case 1388:   case 1389:   case 1390:   case 1391:   case 1392:   case 1393:   case 1394:   case 1395:   case 1396:   case 1397:   case 1398:
702             case 1399:   case 1400:   case 1401:   case 1402:   case 1403:   case 1404:   case 1405:   case 1406:   case 1407:   case 1408:   case 1409:   case 1410:
703             case 1411:   case 1412:   case 1413:   case 1414:
704                 lc = (utf32 - 48);
705                 break;
706             case 181:
707                 lc = 924;
708                 break;
709             case 255:
710                 lc = 376;
711                 break;
712             case 305:
713                 lc = 73;
714                 break;
715             case 383:
716                 lc = 83;
717                 break;
718             case 387:
719                 lc = 386;
720                 break;
721             case 389:
722                 lc = 388;
723                 break;
724             case 392:
725                 lc = 391;
726                 break;
727             case 396:
728                 lc = 395;
729                 break;
730             case 402:
731                 lc = 401;
732                 break;
733             case 405:
734                 lc = 502;
735                 break;
736             case 409:
737                 lc = 408;
738                 break;
739             case 414:
740                 lc = 544;
741                 break;
742             case 424:
743                 lc = 423;
744                 break;
745             case 429:
746                 lc = 428;
747                 break;
748             case 432:
749                 lc = 431;
750                 break;
751             case 436:
752                 lc = 435;
753                 break;
754             case 438:
755                 lc = 437;
756                 break;
757             case 441:
758                 lc = 440;
759                 break;
760             case 445:
761                 lc = 444;
762                 break;
763             case 447:
764                 lc = 503;
765                 break;
766             case 453:
767                 lc = 452;
768                 break;
769             case 454:
770                 lc = 452;
771                 break;
772             case 456:
773                 lc = 455;
774                 break;
775             case 457:
776                 lc = 455;
777                 break;
778             case 459:
779                 lc = 458;
780                 break;
781             case 460:
782                 lc = 458;
783                 break;
784             case 462:
785                 lc = 461;
786                 break;
787             case 477:
788                 lc = 398;
789                 break;
790             case 479:
791                 lc = 478;
792                 break;
793             case 498:
794                 lc = 497;
795                 break;
796             case 499:
797                 lc = 497;
798                 break;
799             case 501:
800                 lc = 500;
801                 break;
802             case 505:
803                 lc = 504;
804                 break;
805             case 595:
806                 lc = 385;
807                 break;
808             case 596:
809                 lc = 390;
810                 break;
811             case 598:
812                 lc = 393;
813                 break;
814             case 599:
815                 lc = 394;
816                 break;
817             case 601:
818                 lc = 399;
819                 break;
820             case 603:
821                 lc = 400;
822                 break;
823             case 608:
824                 lc = 403;
825                 break;
826             case 611:
827                 lc = 404;
828                 break;
829             case 616:
830                 lc = 407;
831                 break;
832             case 617:
833                 lc = 406;
834                 break;
835             case 623:
836                 lc = 412;
837                 break;
838             case 626:
839                 lc = 413;
840                 break;
841             case 629:
842                 lc = 415;
843                 break;
844             case 640:
845                 lc = 422;
846                 break;
847             case 643:
848                 lc = 425;
849                 break;
850             case 648:
851                 lc = 430;
852                 break;
853             case 650:
854                 lc = 433;
855                 break;
856             case 651:
857                 lc = 434;
858                 break;
859             case 658:
860                 lc = 439;
861                 break;
862             case 837:
863                 lc = 921;
864                 break;
865             case 940:
866                 lc = 902;
867                 break;
868             case 941:
869                 lc = 904;
870                 break;
871             case 942:
872                 lc = 905;
873                 break;
874             case 943:
875                 lc = 906;
876                 break;
877             case 962:
878                 lc = 931;
879                 break;
880             case 972:
881                 lc = 908;
882                 break;
883             case 973:
884                 lc = 910;
885                 break;
886             case 974:
887                 lc = 911;
888                 break;
889             case 976:
890                 lc = 914;
891                 break;
892             case 977:
893                 lc = 920;
894                 break;
895             case 981:
896                 lc = 934;
897                 break;
898             case 982:
899                 lc = 928;
900                 break;
901             case 1008:
902                 lc = 922;
903                 break;
904             case 1009:
905                 lc = 929;
906                 break;
907             case 1010:
908                 lc = 1017;
909                 break;
910             case 1013:
911                 lc = 917;
912                 break;
913             case 1016:
914                 lc = 1015;
915                 break;
916             case 1019:
917                 lc = 1018;
918                 break;
919             case 1273:
920                 lc = 1272;
921                 break;
922         default:
923             break;
924         }
925     }
926     return lc;
927 }
928 
929 /**
930  * Gets the UTF8 character 'utf8char' from the UTF8 string 'utf8str' from
931  *             position 'pos'
932  * @param    utf8str: utf8 string
933  * @param    pos: position from where the utf8 character is copied
934  *            (also output set as position of the next utf8 character in the utf8 string)
935  * @param    utf8char: zero terminated utf8 character containing 1 to 4 bytes (output)
936 */
picobase_get_utf8char(picoos_uint8 utf8[],picoos_int32 * pos,picobase_utf8char utf8char)937 static void picobase_get_utf8char (picoos_uint8 utf8[], picoos_int32 * pos, picobase_utf8char utf8char)
938 {
939 
940     int i;
941     int l;
942 
943     utf8char[0] = 0;
944     l = picobase_det_utf8_length(utf8[*pos]);
945     i = 0;
946     while ((((i < l) && (i < PICOBASE_UTF8_MAXLEN)) && (utf8[*pos] != 0))) {
947         utf8char[i] = utf8[*pos];
948         (*pos)++;
949         i++;
950     }
951     utf8char[i] = 0;
952 }
953 
954 
picobase_get_next_utf8char(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmax,picoos_uint32 * pos,picobase_utf8char utf8char)955 picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
956                                         const picoos_uint32 utf8slenmax,
957                                         picoos_uint32 *pos,
958                                         picobase_utf8char utf8char) {
959     picoos_uint8 i;
960     picoos_uint8 len;
961     picoos_uint32 poscnt;
962 
963     utf8char[0] = 0;
964     len = picobase_det_utf8_length(utf8s[*pos]);
965     if ((((*pos) + len) > utf8slenmax) ||
966         (len > PICOBASE_UTF8_MAXLEN)) {
967         return FALSE;
968     }
969 
970     poscnt = *pos;
971     i = 0;
972     while ((i < len) && (utf8s[poscnt] != 0)) {
973         utf8char[i] = utf8s[poscnt];
974         poscnt++;
975         i++;
976     }
977     utf8char[i] = 0;
978     if ((i < len) && (utf8s[poscnt] == 0)) {
979         return FALSE;
980     }
981     *pos = poscnt;
982     return TRUE;
983 }
984 
picobase_get_next_utf8charpos(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmax,picoos_uint32 * pos)985 picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
986                                            const picoos_uint32 utf8slenmax,
987                                            picoos_uint32 *pos) {
988     picoos_uint8 i;
989     picoos_uint8 len;
990     picoos_uint32 poscnt;
991 
992     len = picobase_det_utf8_length(utf8s[*pos]);
993     if ((((*pos) + len) > utf8slenmax) ||
994         (len > PICOBASE_UTF8_MAXLEN)){
995         return FALSE;
996     }
997 
998     poscnt = *pos;
999     i = 0;
1000     while ((i < len) && (utf8s[poscnt] != 0)) {
1001         poscnt++;
1002         i++;
1003     }
1004     if ((i < len) && (utf8s[poscnt] == 0)) {
1005         return FALSE;
1006     }
1007     *pos = poscnt;
1008     return TRUE;
1009 }
1010 
picobase_get_prev_utf8char(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmin,picoos_uint32 * pos,picobase_utf8char utf8char)1011 picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
1012                                         const picoos_uint32 utf8slenmin,
1013                                         picoos_uint32 *pos,
1014                                         picobase_utf8char utf8char) {
1015     picoos_uint8 i, j;
1016     picoos_uint8 len;
1017     picoos_uint32 poscnt;
1018 
1019     utf8char[0] = 0;
1020     if ((*pos) == 0) {
1021         return FALSE;
1022     }
1023     poscnt = (*pos) - 1;
1024     i = 1;
1025     while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
1026            (utf8s[poscnt] != 0)) {
1027         len = picobase_det_utf8_length(utf8s[poscnt]);
1028         if (len == i) {
1029             for (j = 0; j < len; j++) {
1030                 utf8char[j] = utf8s[poscnt + j];
1031             }
1032             utf8char[j] = 0;
1033             *pos = poscnt;
1034             return TRUE;
1035         }
1036         i++;
1037         poscnt--;
1038     }
1039     return FALSE;
1040 }
1041 
picobase_get_prev_utf8charpos(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmin,picoos_uint32 * pos)1042 picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
1043                                            const picoos_uint32 utf8slenmin,
1044                                            picoos_uint32 *pos) {
1045     picoos_uint8 i;
1046     picoos_uint8 len;
1047     picoos_uint32 poscnt;
1048 
1049     if ((*pos) == 0) {
1050         return FALSE;
1051     }
1052     poscnt = (*pos) - 1;
1053     i = 1;
1054     while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
1055            (utf8s[poscnt] != 0)) {
1056         len = picobase_det_utf8_length(utf8s[poscnt]);
1057         if (len == i) {
1058             *pos = poscnt;
1059             return TRUE;
1060         }
1061         i++;
1062         poscnt--;
1063     }
1064     return FALSE;
1065 }
1066 
1067 /**
1068  * Converts utf8 input to utf32
1069  * @param    utf8[] : character encoded in utf8
1070  * @param    done : boolean indicating the completion of the operation (FALSE: conversion not done)
1071  * @return   a single character encoded in UTF32
1072 */
picobase_utf8_to_utf32(picoos_uint8 utf8[],picoos_uint8 * done)1073 static picobase_utf32 picobase_utf8_to_utf32 (picoos_uint8 utf8[], picoos_uint8 * done)
1074 {
1075     (*done) = TRUE;
1076     if ((utf8[0] < (picoos_uint8)'\200')) {
1077         return utf8[0];
1078     } else if ((utf8[0] >= (picoos_uint8)'\370')) {
1079         return 0;
1080     } else if ((utf8[0] >= (picoos_uint8)'\360')) {
1081         return ((((262144 * (utf8[0] % 8)) + (4096 * (utf8[1] % 64))) + (64 * (utf8[2] % 64))) + (utf8[3] % 64));
1082     } else if ((utf8[0] >= (picoos_uint8)'\340')) {
1083         return (((4096 * (utf8[0] % 16)) + (64 * (utf8[1] % 64))) + (utf8[2] % 64));
1084     } else if ((utf8[(0)] >= (picoos_uint8)'\300')) {
1085         return ((64 * (utf8[0] % 32)) + (utf8[1] % 64));
1086     } else {
1087         (*done) = FALSE;
1088         return 0;
1089     }
1090 }
1091 
picobase_utf32_to_utf8(picobase_utf32 utf32,picobase_utf8 utf8[],picoos_int32 utf8MaxLen,picoos_uint8 * done)1092 static picoos_int32 picobase_utf32_to_utf8 (picobase_utf32 utf32, picobase_utf8 utf8[], picoos_int32 utf8MaxLen, picoos_uint8 * done)
1093 {
1094     picoos_int32 len;
1095 
1096     (*done) = TRUE;
1097     if (utf8MaxLen >= 4) {
1098         if (utf32 < 128) {
1099             len = 1;
1100             utf8[0] = utf32;
1101         } else if (utf32 < 2048) {
1102             len = 2;
1103             utf8[1] = (128 + (utf32 % 64));
1104             utf32 = (utf32 / 64);
1105             utf8[0] = (192 + (utf32 % 32));
1106         } else if (utf32 < 65536) {
1107             len = 3;
1108             utf8[2] = (128 + (utf32 % 64));
1109             utf32 = (utf32 / 64);
1110             utf8[1] = (128 + (utf32 % 64));
1111             utf32 = (utf32 / 64);
1112             utf8[0] = (224 + utf32);
1113         } else if (utf32 < 1048576) {
1114             len = 4;
1115             utf8[3] = (128 + (utf32 % 64));
1116             utf32 = (utf32 / 64);
1117             utf8[2] = (128 + (utf32 % 64));
1118             utf32 = (utf32 / 64);
1119             utf8[1] = (128 + (utf32 % 64));
1120             utf32 = (utf32 / 64);
1121             utf8[0] = (240 + utf32);
1122         } else {
1123             (*done) = FALSE;
1124             return 0;
1125         }
1126         if (len <= (utf8MaxLen-1)) {
1127             utf8[len] = 0;
1128         }
1129         return len;
1130     } else {
1131         (*done) = FALSE;
1132         return 0;
1133     }
1134 }
1135 
1136 
picobase_lowercase_utf8_str(picoos_uchar utf8str[],picoos_char lowercase[],int lowercaseMaxLen,picoos_uint8 * done)1137 extern picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], int lowercaseMaxLen, picoos_uint8 * done)
1138 {
1139     picobase_utf8char utf8char;
1140     picoos_int32 i;
1141     picoos_int32 j;
1142     picoos_int32 k;
1143     picoos_int32 l;
1144     picobase_utf32 utf32;
1145     picoos_uint8 done1;
1146 
1147     k = 0;
1148     i = 0;
1149     (*done) = TRUE;
1150     while (utf8str[i] != 0) {
1151         picobase_get_utf8char(utf8str,& i,utf8char);
1152         utf32 = picobase_utf8_to_utf32(utf8char, & done1);
1153         utf32 = base_utf32_lowercase(utf32);
1154         l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
1155         j = 0;
1156         while ((j < l) && (k < (lowercaseMaxLen-1))) {
1157             lowercase[k] = utf8char[j];
1158             k++;
1159             j++;
1160         }
1161         *done = *done && (j == l);
1162     }
1163     lowercase[k] = 0;
1164     return k;
1165 }
1166 
1167 
picobase_uppercase_utf8_str(picoos_uchar utf8str[],picoos_char uppercase[],int uppercaseMaxLen,picoos_uint8 * done)1168 extern picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done)
1169 {
1170     picobase_utf8char utf8char;
1171     picoos_int32 i;
1172     picoos_int32 j;
1173     picoos_int32 k;
1174     picoos_int32 l;
1175     picobase_utf32 utf32;
1176     picoos_uint8 done1;
1177 
1178     k = 0;
1179     i = 0;
1180     (*done) = TRUE;
1181     while (utf8str[i] != 0) {
1182         picobase_get_utf8char(utf8str,& i,utf8char);
1183         utf32 = picobase_utf8_to_utf32(utf8char, & done1);
1184         utf32 = base_utf32_uppercase(utf32);
1185         l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
1186         j = 0;
1187         while ((j < l) && (k < (uppercaseMaxLen-1))) {
1188             uppercase[k] = utf8char[j];
1189             k++;
1190             j++;
1191         }
1192         *done = *done && (j == l);
1193     }
1194     uppercase[k] = 0;
1195     return k;
1196 }
1197 
1198 
picobase_is_utf8_uppercase(picoos_uchar utf8str[],picoos_int32 utf8strmaxlen)1199 extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
1200 {
1201     picobase_utf8char utf8char;
1202     picoos_int32 i;
1203     picoos_uint32 utf32;
1204     picoos_bool done;
1205     picoos_bool isUpperCase;
1206 
1207     isUpperCase = TRUE;
1208     i = 0;
1209     while (isUpperCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
1210         picobase_get_utf8char(utf8str,& i,utf8char);
1211         utf32 = picobase_utf8_to_utf32(utf8char,& done);
1212         isUpperCase = isUpperCase && (utf32 == base_utf32_uppercase(utf32));
1213     }
1214     return isUpperCase;
1215 }
1216 
1217 
picobase_is_utf8_lowercase(picoos_uchar utf8str[],picoos_int32 utf8strmaxlen)1218 extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
1219 {
1220     picobase_utf8char utf8char;
1221     picoos_int32 i;
1222     picoos_uint32 utf32;
1223     picoos_bool done;
1224     picoos_bool isLowerCase;
1225 
1226     isLowerCase = TRUE;
1227     i = 0;
1228     while (isLowerCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
1229         picobase_get_utf8char(utf8str,& i,utf8char);
1230         utf32 = picobase_utf8_to_utf32(utf8char,& done);
1231         isLowerCase = isLowerCase && (utf32 == base_utf32_lowercase(utf32));
1232     }
1233     return isLowerCase;
1234 }
1235 
1236 
1237 #ifdef __cplusplus
1238 }
1239 #endif
1240 
1241 
1242 
1243 /* end */
1244