• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *     http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  /**
17   * @file picobase.c
18   *
19   * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20   * All rights reserved.
21   *
22   * History:
23   * - 2009-04-20 -- initial version
24   *
25   */
26  
27  #include "picoos.h"
28  #include "picodbg.h"
29  #include "picodefs.h"
30  #include "picobase.h"
31  
32  #ifdef __cplusplus
33  extern "C" {
34  #endif
35  #if 0
36  }
37  #endif
38  
39  /**
40   * @addtogroup picobase
41   *
42   * @b Unicode_UTF8_functions
43   *
44   * UTF8
45   * scalar value                1st Byte    2nd Byte    3rd Byte    4th Byte
46   * 00000000 0xxxxxxx           0xxxxxxx
47   * 00000yyy yyxxxxxx           110yyyyy    10xxxxxx
48   * zzzzyyyy yyxxxxxx           1110zzzz    10yyyyyy    10xxxxxx
49   * 000uuuuu zzzzyyyy yyxxxxx   11110uuu    10uuzzzz    10yyyyyy    10xxxxxx
50   *
51  */
picobase_utf8_length(const picoos_uint8 * utf8str,const picoos_uint16 maxlen)52  picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
53                                    const picoos_uint16 maxlen) {
54  
55      picoos_uint16 i;
56      picoos_uint16 len;
57      picoos_uint8 follow;
58      picoos_uint8 ok;
59  
60      ok = TRUE;
61      i = 0;
62      len = 0;
63      follow = 0;
64      while (ok && (i < maxlen) && (utf8str[i] != '\000')) {
65          if (follow > 0) {
66              if ((utf8str[i] >= (picoos_uint8)'\200') &&
67                  (utf8str[i] < (picoos_uint8)'\300')) {
68                  follow--;
69              } else {
70                  ok = FALSE;
71              }
72          } else if (utf8str[i] < (picoos_uint8)'\200') {
73              len++;
74          } else if (utf8str[i] >= (picoos_uint8)'\370') {
75              ok = FALSE;
76          } else if (utf8str[i] >= (picoos_uint8)'\360') {
77              follow = 3;
78              len++;
79          } else if (utf8str[i] >= (picoos_uint8)'\340') {
80              follow = 2;
81              len++;
82          } else if (utf8str[i] >= (picoos_uint8)'\300') {
83              follow = 1;
84              len++;
85          } else {
86              ok = FALSE;
87          }
88          i++;
89      }
90      if (ok) {
91          return len;
92      } else {
93          return -1;
94      }
95  }
96  
97  
base_utf32_lowercase(picoos_uint32 utf32)98  static picoos_uint32 base_utf32_lowercase (picoos_uint32 utf32)
99  {
100  
101      picoos_uint32 lc;
102  
103      lc = utf32;
104      if (((utf32 >= 65313) && (utf32 <= 65338))) {
105          lc = (utf32 + 32);
106      } else if (((utf32 >= 66560) && (utf32 <= 66599))) {
107          lc = (utf32 + 40);
108      } else if (((utf32 >= 7680) && (utf32 <= 9423))) {
109          switch (utf32) {
110              case 7680:   case 7681:   case 7682:   case 7683:   case 7684:   case 7685:   case 7686:   case 7687:   case 7688:   case 7689:
111              case 7690:   case 7691:   case 7692:   case 7693:   case 7694:   case 7695:   case 7696:   case 7697:   case 7698:   case 7699:   case 7700:   case 7701:
112              case 7702:   case 7703:   case 7704:   case 7705:   case 7706:   case 7707:   case 7708:   case 7709:   case 7710:   case 7711:   case 7712:   case 7713:
113              case 7714:   case 7715:   case 7716:   case 7717:   case 7718:   case 7719:   case 7720:   case 7721:   case 7722:   case 7723:   case 7724:   case 7725:
114              case 7726:   case 7727:   case 7728:   case 7729:   case 7730:   case 7731:   case 7732:   case 7733:   case 7734:   case 7735:   case 7736:   case 7737:
115              case 7738:   case 7739:   case 7740:   case 7741:   case 7742:   case 7743:   case 7744:   case 7745:   case 7746:   case 7747:   case 7748:   case 7749:
116              case 7750:   case 7751:   case 7752:   case 7753:   case 7754:   case 7755:   case 7756:   case 7757:   case 7758:   case 7759:   case 7760:   case 7761:
117              case 7762:   case 7763:   case 7764:   case 7765:   case 7766:   case 7767:   case 7768:   case 7769:   case 7770:   case 7771:   case 7772:   case 7773:
118              case 7774:   case 7775:   case 7776:   case 7777:   case 7778:   case 7779:   case 7780:   case 7781:   case 7782:   case 7783:   case 7784:   case 7785:
119              case 7786:   case 7787:   case 7788:   case 7789:   case 7790:   case 7791:   case 7792:   case 7793:   case 7794:   case 7795:   case 7796:   case 7797:
120              case 7798:   case 7799:   case 7800:   case 7801:   case 7802:   case 7803:   case 7804:   case 7805:   case 7806:   case 7807:   case 7808:   case 7809:
121              case 7810:   case 7811:   case 7812:   case 7813:   case 7814:   case 7815:   case 7816:   case 7817:   case 7818:   case 7819:   case 7820:   case 7821:
122              case 7822:   case 7823:   case 7824:   case 7825:   case 7826:   case 7827:   case 7828:   case 7840:   case 7841:   case 7842:   case 7843:
123              case 7844:   case 7845:   case 7846:   case 7847:   case 7848:   case 7849:   case 7850:   case 7851:   case 7852:   case 7853:   case 7854:   case 7855:
124              case 7856:   case 7857:   case 7858:   case 7859:   case 7860:   case 7861:   case 7862:   case 7863:   case 7864:   case 7865:   case 7866:   case 7867:
125              case 7868:   case 7869:   case 7870:   case 7871:   case 7872:   case 7873:   case 7874:   case 7875:   case 7876:   case 7877:   case 7878:   case 7879:
126              case 7880:   case 7881:   case 7882:   case 7883:   case 7884:   case 7885:   case 7886:   case 7887:   case 7888:   case 7889:   case 7890:   case 7891:
127              case 7892:   case 7893:   case 7894:   case 7895:   case 7896:   case 7897:   case 7898:   case 7899:   case 7900:   case 7901:   case 7902:   case 7903:
128              case 7904:   case 7905:   case 7906:   case 7907:   case 7908:   case 7909:   case 7910:   case 7911:   case 7912:   case 7913:   case 7914:   case 7915:
129              case 7916:   case 7917:   case 7918:   case 7919:   case 7920:   case 7921:   case 7922:   case 7923:   case 7924:   case 7925:   case 7926:   case 7927:
130              case 7928:
131                  if ( !(((utf32) % 2 == 1))) {
132                      lc = (utf32 + 1);
133                  }
134                  break;
135              case 7944:   case 7945:   case 7946:   case 7947:   case 7948:   case 7949:   case 7950:   case 7951:   case 7960:
136              case 7961:   case 7962:   case 7963:   case 7964:   case 7965:   case 7976:   case 7977:   case 7978:   case 7979:   case 7980:   case 7981:
137              case 7982:   case 7983:   case 7992:   case 7993:   case 7994:   case 7995:   case 7996:   case 7997:   case 7998:   case 7999:
138              case 8008:   case 8009:   case 8010:   case 8011:   case 8012:   case 8013:   case 8040:   case 8041:   case 8042:   case 8043:   case 8044:
139              case 8045:   case 8046:   case 8047:   case 8072:   case 8073:   case 8074:   case 8075:   case 8076:   case 8077:   case 8078:   case 8079:
140              case 8088:   case 8089:   case 8090:   case 8091:   case 8092:   case 8093:   case 8094:   case 8095:   case 8104:   case 8105:
141              case 8106:   case 8107:   case 8108:   case 8109:   case 8110:   case 8111:
142                  lc = (utf32 - 8);
143                  break;
144              case 8025:   case 8026:   case 8027:   case 8028:   case 8029:   case 8030:   case 8031:
145                  if (((utf32) % 2 == 1)) {
146                      lc = (utf32 - 8);
147                  }
148                  break;
149              case 8544:   case 8545:   case 8546:   case 8547:   case 8548:   case 8549:   case 8550:   case 8551:   case 8552:   case 8553:
150              case 8554:   case 8555:   case 8556:   case 8557:   case 8558:   case 8559:
151                  lc = (utf32 + 16);
152                  break;
153              case 9398:   case 9399:   case 9400:   case 9401:   case 9402:   case 9403:   case 9404:   case 9405:   case 9406:   case 9407:
154              case 9408:   case 9409:   case 9410:   case 9411:   case 9412:   case 9413:   case 9414:   case 9415:   case 9416:   case 9417:   case 9418:   case 9419:
155              case 9420:   case 9421:   case 9422:   case 9423:
156                  lc = (utf32 + 26);
157                  break;
158              case 8120:
159                  lc = 8112;
160                  break;
161              case 8121:
162                  lc = 8113;
163                  break;
164              case 8122:
165                  lc = 8048;
166                  break;
167              case 8123:
168                  lc = 8049;
169                  break;
170              case 8124:
171                  lc = 8115;
172                  break;
173              case 8136:
174                  lc = 8050;
175                  break;
176              case 8137:
177                  lc = 8051;
178                  break;
179              case 8138:
180                  lc = 8052;
181                  break;
182              case 8139:
183                  lc = 8053;
184                  break;
185              case 8140:
186                  lc = 8131;
187                  break;
188              case 8152:
189                  lc = 8144;
190                  break;
191              case 8153:
192                  lc = 8145;
193                  break;
194              case 8154:
195                  lc = 8054;
196                  break;
197              case 8155:
198                  lc = 8055;
199                  break;
200              case 8168:
201                  lc = 8160;
202                  break;
203              case 8169:
204                  lc = 8161;
205                  break;
206              case 8170:
207                  lc = 8058;
208                  break;
209              case 8171:
210                  lc = 8059;
211                  break;
212              case 8172:
213                  lc = 8165;
214                  break;
215              case 8184:
216                  lc = 8056;
217                  break;
218              case 8185:
219                  lc = 8057;
220                  break;
221              case 8186:
222                  lc = 8060;
223                  break;
224              case 8187:
225                  lc = 8061;
226                  break;
227              case 8188:
228                  lc = 8179;
229                  break;
230              case 8486:
231                  lc = 969;
232                  break;
233              case 8490:
234                  lc = 107;
235                  break;
236              case 8491:
237                  lc = 229;
238                  break;
239          default:
240              break;
241          }
242      } else {
243          switch (utf32) {
244              case 65:   case 66:   case 67:   case 68:   case 69:   case 70:   case 71:   case 72:   case 73:   case 74:
245              case 75:   case 76:   case 77:   case 78:   case 79:   case 80:   case 81:   case 82:   case 83:   case 84:   case 85:   case 86:
246              case 87:   case 88:   case 89:   case 90:   case 192:   case 193:   case 194:   case 195:   case 196:   case 197:   case 198:
247              case 199:   case 200:   case 201:   case 202:   case 203:   case 204:   case 205:   case 206:   case 207:   case 208:   case 209:   case 210:
248              case 211:   case 212:   case 213:   case 214:   case 216:   case 217:   case 218:   case 219:   case 220:   case 221:   case 222:
249              case 913:   case 914:   case 915:   case 916:   case 917:   case 918:   case 919:   case 920:   case 921:   case 922:   case 923:
250              case 924:   case 925:   case 926:   case 927:   case 928:   case 929:   case 931:   case 932:   case 933:   case 934:   case 935:
251              case 936:   case 937:   case 938:   case 939:   case 1040:   case 1041:   case 1042:   case 1043:   case 1044:   case 1045:   case 1046:
252              case 1047:   case 1048:   case 1049:   case 1050:   case 1051:   case 1052:   case 1053:   case 1054:   case 1055:   case 1056:   case 1057:   case 1058:
253              case 1059:   case 1060:   case 1061:   case 1062:   case 1063:   case 1064:   case 1065:   case 1066:   case 1067:   case 1068:   case 1069:   case 1070:
254              case 1071:
255                  lc = (utf32 + 32);
256                  break;
257              case 256:   case 257:   case 258:   case 259:   case 260:   case 261:   case 262:   case 263:   case 264:   case 265:
258              case 266:   case 267:   case 268:   case 269:   case 270:   case 271:   case 272:   case 273:   case 274:   case 275:   case 276:   case 277:
259              case 278:   case 279:   case 280:   case 281:   case 282:   case 283:   case 284:   case 285:   case 286:   case 287:   case 288:   case 289:
260              case 290:   case 291:   case 292:   case 293:   case 294:   case 295:   case 296:   case 297:   case 298:   case 299:   case 300:   case 301:
261              case 302:   case 303:   case 305:   case 306:   case 307:   case 308:   case 309:   case 310:   case 330:   case 331:
262              case 332:   case 333:   case 334:   case 335:   case 336:   case 337:   case 338:   case 339:   case 340:   case 341:   case 342:   case 343:
263              case 344:   case 345:   case 346:   case 347:   case 348:   case 349:   case 350:   case 351:   case 352:   case 353:   case 354:   case 355:
264              case 356:   case 357:   case 358:   case 359:   case 360:   case 361:   case 362:   case 363:   case 364:   case 365:   case 366:   case 367:
265              case 368:   case 369:   case 370:   case 371:   case 372:   case 373:   case 374:   case 416:   case 417:   case 418:   case 419:
266              case 420:   case 478:   case 479:   case 480:   case 481:   case 482:   case 483:   case 484:   case 485:   case 486:   case 487:
267              case 488:   case 489:   case 490:   case 491:   case 492:   case 493:   case 494:   case 504:   case 505:   case 506:   case 507:
268              case 508:   case 509:   case 510:   case 511:   case 512:   case 513:   case 514:   case 515:   case 516:   case 517:   case 518:   case 519:
269              case 520:   case 521:   case 522:   case 523:   case 524:   case 525:   case 526:   case 527:   case 528:   case 529:   case 530:   case 531:
270              case 532:   case 533:   case 534:   case 535:   case 536:   case 537:   case 538:   case 539:   case 540:   case 541:   case 542:
271              case 546:   case 547:   case 548:   case 549:   case 550:   case 551:   case 552:   case 553:   case 554:   case 555:   case 556:   case 557:
272              case 558:   case 559:   case 560:   case 561:   case 562:   case 984:   case 985:   case 986:   case 987:   case 988:   case 989:
273              case 990:   case 991:   case 992:   case 993:   case 994:   case 995:   case 996:   case 997:   case 998:   case 999:   case 1000:   case 1001:
274              case 1002:   case 1003:   case 1004:   case 1005:   case 1006:   case 1120:   case 1121:   case 1122:   case 1123:   case 1124:   case 1125:
275              case 1126:   case 1127:   case 1128:   case 1129:   case 1130:   case 1131:   case 1132:   case 1133:   case 1134:   case 1135:   case 1136:   case 1137:
276              case 1138:   case 1139:   case 1140:   case 1141:   case 1142:   case 1143:   case 1144:   case 1145:   case 1146:   case 1147:   case 1148:   case 1149:
277              case 1150:   case 1151:   case 1152:   case 1162:   case 1163:   case 1164:   case 1165:   case 1166:   case 1167:   case 1168:   case 1169:
278              case 1170:   case 1171:   case 1172:   case 1173:   case 1174:   case 1175:   case 1176:   case 1177:   case 1178:   case 1179:   case 1180:   case 1181:
279              case 1182:   case 1183:   case 1184:   case 1185:   case 1186:   case 1187:   case 1188:   case 1189:   case 1190:   case 1191:   case 1192:   case 1193:
280              case 1194:   case 1195:   case 1196:   case 1197:   case 1198:   case 1199:   case 1200:   case 1201:   case 1202:   case 1203:   case 1204:   case 1205:
281              case 1206:   case 1207:   case 1208:   case 1209:   case 1210:   case 1211:   case 1212:   case 1213:   case 1214:   case 1232:   case 1233:
282              case 1234:   case 1235:   case 1236:   case 1237:   case 1238:   case 1239:   case 1240:   case 1241:   case 1242:   case 1243:   case 1244:   case 1245:
283              case 1246:   case 1247:   case 1248:   case 1249:   case 1250:   case 1251:   case 1252:   case 1253:   case 1254:   case 1255:   case 1256:   case 1257:
284              case 1258:   case 1259:   case 1260:   case 1261:   case 1262:   case 1263:   case 1264:   case 1265:   case 1266:   case 1267:   case 1268:
285              case 1280:   case 1281:   case 1282:   case 1283:   case 1284:   case 1285:   case 1286:   case 1287:   case 1288:   case 1289:   case 1290:   case 1291:
286              case 1292:   case 1293:   case 1294:
287                  if ( !(((utf32) % 2 == 1))) {
288                      lc = (utf32 + 1);
289                  }
290                  break;
291              case 313:   case 314:   case 315:   case 316:   case 317:   case 318:   case 319:   case 320:   case 321:   case 322:
292              case 323:   case 324:   case 325:   case 326:   case 327:   case 377:   case 378:   case 379:   case 380:   case 381:
293              case 459:   case 460:   case 461:   case 462:   case 463:   case 464:   case 465:   case 466:   case 467:   case 468:   case 469:   case 470:
294              case 471:   case 472:   case 473:   case 474:   case 475:   case 1217:   case 1218:   case 1219:   case 1220:   case 1221:   case 1222:
295              case 1223:   case 1224:   case 1225:   case 1226:   case 1227:   case 1228:   case 1229:
296                  if (((utf32) % 2 == 1)) {
297                      lc = (utf32 + 1);
298                  }
299                  break;
300              case 1024:   case 1025:   case 1026:   case 1027:   case 1028:   case 1029:   case 1030:   case 1031:   case 1032:   case 1033:
301              case 1034:   case 1035:   case 1036:   case 1037:   case 1038:   case 1039:
302                  lc = (utf32 + 80);
303                  break;
304              case 1329:   case 1330:   case 1331:   case 1332:   case 1333:   case 1334:   case 1335:   case 1336:   case 1337:   case 1338:
305              case 1339:   case 1340:   case 1341:   case 1342:   case 1343:   case 1344:   case 1345:   case 1346:   case 1347:   case 1348:   case 1349:   case 1350:
306              case 1351:   case 1352:   case 1353:   case 1354:   case 1355:   case 1356:   case 1357:   case 1358:   case 1359:   case 1360:   case 1361:   case 1362:
307              case 1363:   case 1364:   case 1365:   case 1366:
308                  lc = (utf32 + 48);
309                  break;
310              case 304:
311                  lc = 105;
312                  break;
313              case 376:
314                  lc = 255;
315                  break;
316              case 385:
317                  lc = 595;
318                  break;
319              case 386:
320                  lc = 387;
321                  break;
322              case 388:
323                  lc = 389;
324                  break;
325              case 390:
326                  lc = 596;
327                  break;
328              case 391:
329                  lc = 392;
330                  break;
331              case 393:
332                  lc = 598;
333                  break;
334              case 394:
335                  lc = 599;
336                  break;
337              case 395:
338                  lc = 396;
339                  break;
340              case 398:
341                  lc = 477;
342                  break;
343              case 399:
344                  lc = 601;
345                  break;
346              case 400:
347                  lc = 603;
348                  break;
349              case 401:
350                  lc = 402;
351                  break;
352              case 403:
353                  lc = 608;
354                  break;
355              case 404:
356                  lc = 611;
357                  break;
358              case 406:
359                  lc = 617;
360                  break;
361              case 407:
362                  lc = 616;
363                  break;
364              case 408:
365                  lc = 409;
366                  break;
367              case 412:
368                  lc = 623;
369                  break;
370              case 413:
371                  lc = 626;
372                  break;
373              case 415:
374                  lc = 629;
375                  break;
376              case 422:
377                  lc = 640;
378                  break;
379              case 423:
380                  lc = 424;
381                  break;
382              case 425:
383                  lc = 643;
384                  break;
385              case 428:
386                  lc = 429;
387                  break;
388              case 430:
389                  lc = 648;
390                  break;
391              case 431:
392                  lc = 432;
393                  break;
394              case 433:
395                  lc = 650;
396                  break;
397              case 434:
398                  lc = 651;
399                  break;
400              case 435:
401                  lc = 436;
402                  break;
403              case 437:
404                  lc = 438;
405                  break;
406              case 439:
407                  lc = 658;
408                  break;
409              case 440:
410                  lc = 441;
411                  break;
412              case 444:
413                  lc = 445;
414                  break;
415              case 452:
416                  lc = 454;
417                  break;
418              case 453:
419                  lc = 454;
420                  break;
421              case 455:
422                  lc = 457;
423                  break;
424              case 456:
425                  lc = 457;
426                  break;
427              case 458:
428                  lc = 460;
429                  break;
430              case 497:
431                  lc = 499;
432                  break;
433              case 498:
434                  lc = 499;
435                  break;
436              case 500:
437                  lc = 501;
438                  break;
439              case 502:
440                  lc = 405;
441                  break;
442              case 503:
443                  lc = 447;
444                  break;
445              case 544:
446                  lc = 414;
447                  break;
448              case 902:
449                  lc = 940;
450                  break;
451              case 904:
452                  lc = 941;
453                  break;
454              case 905:
455                  lc = 942;
456                  break;
457              case 906:
458                  lc = 943;
459                  break;
460              case 908:
461                  lc = 972;
462                  break;
463              case 910:
464                  lc = 973;
465                  break;
466              case 911:
467                  lc = 974;
468                  break;
469              case 1012:
470                  lc = 952;
471                  break;
472              case 1015:
473                  lc = 1016;
474                  break;
475              case 1017:
476                  lc = 1010;
477                  break;
478              case 1018:
479                  lc = 1019;
480                  break;
481              case 1272:
482                  lc = 1273;
483                  break;
484          default:
485              break;
486          }
487      }
488      return lc;
489  }
490  
491  /**
492   * Converts utf32 input to uppercase
493   * @param    utf32 : a single character encoded in UTF32
494   * @return   a single uppercase character encoded in UTF32
495  */
base_utf32_uppercase(picoos_uint32 utf32)496  static picoos_uint32 base_utf32_uppercase (picoos_uint32 utf32)
497  {
498      picoos_uint32 lc;
499  
500      lc = utf32;
501      if (((utf32 >= 65345) && (utf32 <= 65370))) {
502          lc = (utf32 - 32);
503      } else if (((utf32 >= 66600) && (utf32 <= 66639))) {
504          lc = (utf32 - 40);
505      } else if (((utf32 >= 7681) && (utf32 <= 9449))) {
506          switch (utf32) {
507              case 7681:   case 7682:   case 7683:   case 7684:   case 7685:   case 7686:   case 7687:   case 7688:   case 7689:   case 7690:
508              case 7691:   case 7692:   case 7693:   case 7694:   case 7695:   case 7696:   case 7697:   case 7698:   case 7699:   case 7700:   case 7701:   case 7702:
509              case 7703:   case 7704:   case 7705:   case 7706:   case 7707:   case 7708:   case 7709:   case 7710:   case 7711:   case 7712:   case 7713:   case 7714:
510              case 7715:   case 7716:   case 7717:   case 7718:   case 7719:   case 7720:   case 7721:   case 7722:   case 7723:   case 7724:   case 7725:   case 7726:
511              case 7727:   case 7728:   case 7729:   case 7730:   case 7731:   case 7732:   case 7733:   case 7734:   case 7735:   case 7736:   case 7737:   case 7738:
512              case 7739:   case 7740:   case 7741:   case 7742:   case 7743:   case 7744:   case 7745:   case 7746:   case 7747:   case 7748:   case 7749:   case 7750:
513              case 7751:   case 7752:   case 7753:   case 7754:   case 7755:   case 7756:   case 7757:   case 7758:   case 7759:   case 7760:   case 7761:   case 7762:
514              case 7763:   case 7764:   case 7765:   case 7766:   case 7767:   case 7768:   case 7769:   case 7770:   case 7771:   case 7772:   case 7773:   case 7774:
515              case 7775:   case 7776:   case 7777:   case 7778:   case 7779:   case 7780:   case 7781:   case 7782:   case 7783:   case 7784:   case 7785:   case 7786:
516              case 7787:   case 7788:   case 7789:   case 7790:   case 7791:   case 7792:   case 7793:   case 7794:   case 7795:   case 7796:   case 7797:   case 7798:
517              case 7799:   case 7800:   case 7801:   case 7802:   case 7803:   case 7804:   case 7805:   case 7806:   case 7807:   case 7808:   case 7809:   case 7810:
518              case 7811:   case 7812:   case 7813:   case 7814:   case 7815:   case 7816:   case 7817:   case 7818:   case 7819:   case 7820:   case 7821:   case 7822:
519              case 7823:   case 7824:   case 7825:   case 7826:   case 7827:   case 7828:   case 7829:   case 7841:   case 7842:   case 7843:   case 7844:
520              case 7845:   case 7846:   case 7847:   case 7848:   case 7849:   case 7850:   case 7851:   case 7852:   case 7853:   case 7854:   case 7855:   case 7856:
521              case 7857:   case 7858:   case 7859:   case 7860:   case 7861:   case 7862:   case 7863:   case 7864:   case 7865:   case 7866:   case 7867:   case 7868:
522              case 7869:   case 7870:   case 7871:   case 7872:   case 7873:   case 7874:   case 7875:   case 7876:   case 7877:   case 7878:   case 7879:   case 7880:
523              case 7881:   case 7882:   case 7883:   case 7884:   case 7885:   case 7886:   case 7887:   case 7888:   case 7889:   case 7890:   case 7891:   case 7892:
524              case 7893:   case 7894:   case 7895:   case 7896:   case 7897:   case 7898:   case 7899:   case 7900:   case 7901:   case 7902:   case 7903:   case 7904:
525              case 7905:   case 7906:   case 7907:   case 7908:   case 7909:   case 7910:   case 7911:   case 7912:   case 7913:   case 7914:   case 7915:   case 7916:
526              case 7917:   case 7918:   case 7919:   case 7920:   case 7921:   case 7922:   case 7923:   case 7924:   case 7925:   case 7926:   case 7927:   case 7928:
527              case 7929:
528                  if (((utf32) % 2 == 1)) {
529                      lc = (utf32 - 1);
530                  }
531                  break;
532              case 7936:   case 7937:   case 7938:   case 7939:   case 7940:   case 7941:   case 7942:   case 7943:   case 7952:
533              case 7953:   case 7954:   case 7955:   case 7956:   case 7957:   case 7968:   case 7969:   case 7970:   case 7971:   case 7972:   case 7973:
534              case 7974:   case 7975:   case 7984:   case 7985:   case 7986:   case 7987:   case 7988:   case 7989:   case 7990:   case 7991:
535              case 8000:   case 8001:   case 8002:   case 8003:   case 8004:   case 8005:   case 8032:   case 8033:   case 8034:   case 8035:   case 8036:
536              case 8037:   case 8038:   case 8039:   case 8064:   case 8065:   case 8066:   case 8067:   case 8068:   case 8069:   case 8070:   case 8071:
537              case 8080:   case 8081:   case 8082:   case 8083:   case 8084:   case 8085:   case 8086:   case 8087:   case 8096:   case 8097:
538              case 8098:   case 8099:   case 8100:   case 8101:   case 8102:   case 8103:
539                  lc = (utf32 + 8);
540                  break;
541              case 8017:   case 8018:   case 8019:   case 8020:   case 8021:   case 8022:   case 8023:
542                  if (((utf32) % 2 == 1)) {
543                      lc = (utf32 + 8);
544                  }
545                  break;
546              case 8560:   case 8561:   case 8562:   case 8563:   case 8564:   case 8565:   case 8566:   case 8567:   case 8568:   case 8569:
547              case 8570:   case 8571:   case 8572:   case 8573:   case 8574:   case 8575:
548                  lc = (utf32 - 16);
549                  break;
550              case 9424:   case 9425:   case 9426:   case 9427:   case 9428:   case 9429:   case 9430:   case 9431:   case 9432:   case 9433:
551              case 9434:   case 9435:   case 9436:   case 9437:   case 9438:   case 9439:   case 9440:   case 9441:   case 9442:   case 9443:   case 9444:   case 9445:
552              case 9446:   case 9447:   case 9448:   case 9449:
553                  lc = (utf32 - 26);
554                  break;
555              case 7835:
556                  lc = 7776;
557                  break;
558              case 8048:
559                  lc = 8122;
560                  break;
561              case 8049:
562                  lc = 8123;
563                  break;
564              case 8050:
565                  lc = 8136;
566                  break;
567              case 8051:
568                  lc = 8137;
569                  break;
570              case 8052:
571                  lc = 8138;
572                  break;
573              case 8053:
574                  lc = 8139;
575                  break;
576              case 8054:
577                  lc = 8154;
578                  break;
579              case 8055:
580                  lc = 8155;
581                  break;
582              case 8056:
583                  lc = 8184;
584                  break;
585              case 8057:
586                  lc = 8185;
587                  break;
588              case 8058:
589                  lc = 8170;
590                  break;
591              case 8059:
592                  lc = 8171;
593                  break;
594              case 8060:
595                  lc = 8186;
596                  break;
597              case 8061:
598                  lc = 8187;
599                  break;
600              case 8112:
601                  lc = 8120;
602                  break;
603              case 8113:
604                  lc = 8121;
605                  break;
606              case 8115:
607                  lc = 8124;
608                  break;
609              case 8126:
610                  lc = 921;
611                  break;
612              case 8131:
613                  lc = 8140;
614                  break;
615              case 8144:
616                  lc = 8152;
617                  break;
618              case 8145:
619                  lc = 8153;
620                  break;
621              case 8160:
622                  lc = 8168;
623                  break;
624              case 8161:
625                  lc = 8169;
626                  break;
627              case 8165:
628                  lc = 8172;
629                  break;
630              case 8179:
631                  lc = 8188;
632                  break;
633          default:
634              break;
635          }
636      } else {
637          switch (utf32) {
638              case 97:   case 98:   case 99:   case 100:   case 101:   case 102:   case 103:   case 104:   case 105:   case 106:
639              case 107:   case 108:   case 109:   case 110:   case 111:   case 112:   case 113:   case 114:   case 115:   case 116:   case 117:   case 118:
640              case 119:   case 120:   case 121:   case 122:   case 224:   case 225:   case 226:   case 227:   case 228:   case 229:   case 230:
641              case 231:   case 232:   case 233:   case 234:   case 235:   case 236:   case 237:   case 238:   case 239:   case 240:   case 241:   case 242:
642              case 243:   case 244:   case 245:   case 246:   case 247:   case 248:   case 249:   case 250:   case 251:   case 252:   case 253:   case 254:
643              case 945:   case 946:   case 947:   case 948:   case 949:   case 950:   case 951:   case 952:   case 953:   case 954:   case 955:
644              case 956:   case 957:   case 958:   case 959:   case 960:   case 961:   case 963:   case 964:   case 965:   case 966:   case 967:
645              case 968:   case 969:   case 970:   case 971:   case 1072:   case 1073:   case 1074:   case 1075:   case 1076:   case 1077:   case 1078:
646              case 1079:   case 1080:   case 1081:   case 1082:   case 1083:   case 1084:   case 1085:   case 1086:   case 1087:   case 1088:   case 1089:   case 1090:
647              case 1091:   case 1092:   case 1093:   case 1094:   case 1095:   case 1096:   case 1097:   case 1098:   case 1099:   case 1100:   case 1101:   case 1102:
648              case 1103:
649                  if ((utf32 != 247)) {
650                      lc = (utf32 - 32);
651                  }
652                  break;
653              case 257:   case 258:   case 259:   case 260:   case 261:   case 262:   case 263:   case 264:   case 265:   case 266:
654              case 267:   case 268:   case 269:   case 270:   case 271:   case 272:   case 273:   case 274:   case 275:   case 276:   case 277:   case 278:
655              case 279:   case 280:   case 281:   case 282:   case 283:   case 284:   case 285:   case 286:   case 287:   case 288:   case 289:   case 290:
656              case 291:   case 292:   case 293:   case 294:   case 295:   case 296:   case 297:   case 298:   case 299:   case 300:   case 301:   case 302:
657              case 303:   case 304:   case 306:   case 307:   case 308:   case 309:   case 310:   case 311:   case 331:   case 332:
658              case 333:   case 334:   case 335:   case 336:   case 337:   case 338:   case 339:   case 340:   case 341:   case 342:   case 343:   case 344:
659              case 345:   case 346:   case 347:   case 348:   case 349:   case 350:   case 351:   case 352:   case 353:   case 354:   case 355:   case 356:
660              case 357:   case 358:   case 359:   case 360:   case 361:   case 362:   case 363:   case 364:   case 365:   case 366:   case 367:   case 368:
661              case 369:   case 370:   case 371:   case 372:   case 373:   case 374:   case 375:   case 417:   case 418:   case 419:   case 420:
662              case 421:   case 481:   case 482:   case 483:   case 484:   case 485:   case 486:   case 487:   case 488:   case 489:   case 490:
663              case 491:   case 492:   case 493:   case 494:   case 495:   case 507:   case 508:   case 509:   case 510:   case 511:
664              case 513:   case 514:   case 515:   case 516:   case 517:   case 518:   case 519:   case 520:   case 521:   case 522:   case 523:   case 524:
665              case 525:   case 526:   case 527:   case 528:   case 529:   case 530:   case 531:   case 532:   case 533:   case 534:   case 535:   case 536:
666              case 537:   case 538:   case 539:   case 540:   case 541:   case 542:   case 543:   case 544:   case 546:   case 547:   case 548:
667              case 549:   case 550:   case 551:   case 552:   case 553:   case 554:   case 555:   case 556:   case 557:   case 558:   case 559:   case 560:
668              case 561:   case 562:   case 563:   case 985:   case 986:   case 987:   case 988:   case 989:   case 990:   case 991:   case 992:
669              case 993:   case 994:   case 995:   case 996:   case 997:   case 998:   case 999:   case 1000:   case 1001:   case 1002:   case 1003:   case 1004:
670              case 1005:   case 1006:   case 1007:   case 1121:   case 1122:   case 1123:   case 1124:   case 1125:   case 1126:   case 1127:   case 1128:
671              case 1129:   case 1130:   case 1131:   case 1132:   case 1133:   case 1134:   case 1135:   case 1136:   case 1137:   case 1138:   case 1139:   case 1140:
672              case 1141:   case 1142:   case 1143:   case 1144:   case 1145:   case 1146:   case 1147:   case 1148:   case 1149:   case 1150:   case 1151:   case 1152:
673              case 1153:   case 1163:   case 1164:   case 1165:   case 1166:   case 1167:   case 1168:   case 1169:   case 1170:   case 1171:   case 1172:
674              case 1173:   case 1174:   case 1175:   case 1176:   case 1177:   case 1178:   case 1179:   case 1180:   case 1181:   case 1182:   case 1183:   case 1184:
675              case 1185:   case 1186:   case 1187:   case 1188:   case 1189:   case 1190:   case 1191:   case 1192:   case 1193:   case 1194:   case 1195:   case 1196:
676              case 1197:   case 1198:   case 1199:   case 1200:   case 1201:   case 1202:   case 1203:   case 1204:   case 1205:   case 1206:   case 1207:   case 1208:
677              case 1209:   case 1210:   case 1211:   case 1212:   case 1213:   case 1214:   case 1215:   case 1233:   case 1234:   case 1235:   case 1236:
678              case 1237:   case 1238:   case 1239:   case 1240:   case 1241:   case 1242:   case 1243:   case 1244:   case 1245:   case 1246:   case 1247:   case 1248:
679              case 1249:   case 1250:   case 1251:   case 1252:   case 1253:   case 1254:   case 1255:   case 1256:   case 1257:   case 1258:   case 1259:   case 1260:
680              case 1261:   case 1262:   case 1263:   case 1264:   case 1265:   case 1266:   case 1267:   case 1268:   case 1269:   case 1281:   case 1282:
681              case 1283:   case 1284:   case 1285:   case 1286:   case 1287:   case 1288:   case 1289:   case 1290:   case 1291:   case 1292:   case 1293:   case 1294:
682              case 1295:
683                  if (((utf32) % 2 == 1)) {
684                      lc = (utf32 - 1);
685                  }
686                  break;
687              case 314:   case 315:   case 316:   case 317:   case 318:   case 319:   case 320:   case 321:   case 322:   case 323:
688              case 324:   case 325:   case 326:   case 327:   case 328:   case 378:   case 379:   case 380:   case 381:   case 382:
689              case 464:   case 465:   case 466:   case 467:   case 468:   case 469:   case 470:   case 471:   case 472:   case 473:   case 474:   case 475:
690              case 476:   case 1218:   case 1219:   case 1220:   case 1221:   case 1222:   case 1223:   case 1224:   case 1225:   case 1226:   case 1227:
691              case 1228:   case 1229:   case 1230:
692                  if ( !(((utf32) % 2 == 1))) {
693                      lc = (utf32 - 1);
694                  }
695                  break;
696              case 1104:   case 1105:   case 1106:   case 1107:   case 1108:   case 1109:   case 1110:   case 1111:   case 1112:   case 1113:
697              case 1114:   case 1115:   case 1116:   case 1117:   case 1118:   case 1119:
698                  lc = (utf32 - 80);
699                  break;
700              case 1377:   case 1378:   case 1379:   case 1380:   case 1381:   case 1382:   case 1383:   case 1384:   case 1385:   case 1386:
701              case 1387:   case 1388:   case 1389:   case 1390:   case 1391:   case 1392:   case 1393:   case 1394:   case 1395:   case 1396:   case 1397:   case 1398:
702              case 1399:   case 1400:   case 1401:   case 1402:   case 1403:   case 1404:   case 1405:   case 1406:   case 1407:   case 1408:   case 1409:   case 1410:
703              case 1411:   case 1412:   case 1413:   case 1414:
704                  lc = (utf32 - 48);
705                  break;
706              case 181:
707                  lc = 924;
708                  break;
709              case 255:
710                  lc = 376;
711                  break;
712              case 305:
713                  lc = 73;
714                  break;
715              case 383:
716                  lc = 83;
717                  break;
718              case 387:
719                  lc = 386;
720                  break;
721              case 389:
722                  lc = 388;
723                  break;
724              case 392:
725                  lc = 391;
726                  break;
727              case 396:
728                  lc = 395;
729                  break;
730              case 402:
731                  lc = 401;
732                  break;
733              case 405:
734                  lc = 502;
735                  break;
736              case 409:
737                  lc = 408;
738                  break;
739              case 414:
740                  lc = 544;
741                  break;
742              case 424:
743                  lc = 423;
744                  break;
745              case 429:
746                  lc = 428;
747                  break;
748              case 432:
749                  lc = 431;
750                  break;
751              case 436:
752                  lc = 435;
753                  break;
754              case 438:
755                  lc = 437;
756                  break;
757              case 441:
758                  lc = 440;
759                  break;
760              case 445:
761                  lc = 444;
762                  break;
763              case 447:
764                  lc = 503;
765                  break;
766              case 453:
767                  lc = 452;
768                  break;
769              case 454:
770                  lc = 452;
771                  break;
772              case 456:
773                  lc = 455;
774                  break;
775              case 457:
776                  lc = 455;
777                  break;
778              case 459:
779                  lc = 458;
780                  break;
781              case 460:
782                  lc = 458;
783                  break;
784              case 462:
785                  lc = 461;
786                  break;
787              case 477:
788                  lc = 398;
789                  break;
790              case 479:
791                  lc = 478;
792                  break;
793              case 498:
794                  lc = 497;
795                  break;
796              case 499:
797                  lc = 497;
798                  break;
799              case 501:
800                  lc = 500;
801                  break;
802              case 505:
803                  lc = 504;
804                  break;
805              case 595:
806                  lc = 385;
807                  break;
808              case 596:
809                  lc = 390;
810                  break;
811              case 598:
812                  lc = 393;
813                  break;
814              case 599:
815                  lc = 394;
816                  break;
817              case 601:
818                  lc = 399;
819                  break;
820              case 603:
821                  lc = 400;
822                  break;
823              case 608:
824                  lc = 403;
825                  break;
826              case 611:
827                  lc = 404;
828                  break;
829              case 616:
830                  lc = 407;
831                  break;
832              case 617:
833                  lc = 406;
834                  break;
835              case 623:
836                  lc = 412;
837                  break;
838              case 626:
839                  lc = 413;
840                  break;
841              case 629:
842                  lc = 415;
843                  break;
844              case 640:
845                  lc = 422;
846                  break;
847              case 643:
848                  lc = 425;
849                  break;
850              case 648:
851                  lc = 430;
852                  break;
853              case 650:
854                  lc = 433;
855                  break;
856              case 651:
857                  lc = 434;
858                  break;
859              case 658:
860                  lc = 439;
861                  break;
862              case 837:
863                  lc = 921;
864                  break;
865              case 940:
866                  lc = 902;
867                  break;
868              case 941:
869                  lc = 904;
870                  break;
871              case 942:
872                  lc = 905;
873                  break;
874              case 943:
875                  lc = 906;
876                  break;
877              case 962:
878                  lc = 931;
879                  break;
880              case 972:
881                  lc = 908;
882                  break;
883              case 973:
884                  lc = 910;
885                  break;
886              case 974:
887                  lc = 911;
888                  break;
889              case 976:
890                  lc = 914;
891                  break;
892              case 977:
893                  lc = 920;
894                  break;
895              case 981:
896                  lc = 934;
897                  break;
898              case 982:
899                  lc = 928;
900                  break;
901              case 1008:
902                  lc = 922;
903                  break;
904              case 1009:
905                  lc = 929;
906                  break;
907              case 1010:
908                  lc = 1017;
909                  break;
910              case 1013:
911                  lc = 917;
912                  break;
913              case 1016:
914                  lc = 1015;
915                  break;
916              case 1019:
917                  lc = 1018;
918                  break;
919              case 1273:
920                  lc = 1272;
921                  break;
922          default:
923              break;
924          }
925      }
926      return lc;
927  }
928  
929  /**
930   * Gets the UTF8 character 'utf8char' from the UTF8 string 'utf8str' from
931   *             position 'pos'
932   * @param    utf8str: utf8 string
933   * @param    pos: position from where the utf8 character is copied
934   *            (also output set as position of the next utf8 character in the utf8 string)
935   * @param    utf8char: zero terminated utf8 character containing 1 to 4 bytes (output)
936  */
picobase_get_utf8char(picoos_uint8 utf8[],picoos_int32 * pos,picobase_utf8char utf8char)937  static void picobase_get_utf8char (picoos_uint8 utf8[], picoos_int32 * pos, picobase_utf8char utf8char)
938  {
939  
940      int i;
941      int l;
942  
943      utf8char[0] = 0;
944      l = picobase_det_utf8_length(utf8[*pos]);
945      i = 0;
946      while ((((i < l) && (i < PICOBASE_UTF8_MAXLEN)) && (utf8[*pos] != 0))) {
947          utf8char[i] = utf8[*pos];
948          (*pos)++;
949          i++;
950      }
951      utf8char[i] = 0;
952  }
953  
954  
picobase_get_next_utf8char(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmax,picoos_uint32 * pos,picobase_utf8char utf8char)955  picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
956                                          const picoos_uint32 utf8slenmax,
957                                          picoos_uint32 *pos,
958                                          picobase_utf8char utf8char) {
959      picoos_uint8 i;
960      picoos_uint8 len;
961      picoos_uint32 poscnt;
962  
963      utf8char[0] = 0;
964      len = picobase_det_utf8_length(utf8s[*pos]);
965      if ((((*pos) + len) > utf8slenmax) ||
966          (len > PICOBASE_UTF8_MAXLEN)) {
967          return FALSE;
968      }
969  
970      poscnt = *pos;
971      i = 0;
972      while ((i < len) && (utf8s[poscnt] != 0)) {
973          utf8char[i] = utf8s[poscnt];
974          poscnt++;
975          i++;
976      }
977      utf8char[i] = 0;
978      if ((i < len) && (utf8s[poscnt] == 0)) {
979          return FALSE;
980      }
981      *pos = poscnt;
982      return TRUE;
983  }
984  
picobase_get_next_utf8charpos(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmax,picoos_uint32 * pos)985  picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
986                                             const picoos_uint32 utf8slenmax,
987                                             picoos_uint32 *pos) {
988      picoos_uint8 i;
989      picoos_uint8 len;
990      picoos_uint32 poscnt;
991  
992      len = picobase_det_utf8_length(utf8s[*pos]);
993      if ((((*pos) + len) > utf8slenmax) ||
994          (len > PICOBASE_UTF8_MAXLEN)){
995          return FALSE;
996      }
997  
998      poscnt = *pos;
999      i = 0;
1000      while ((i < len) && (utf8s[poscnt] != 0)) {
1001          poscnt++;
1002          i++;
1003      }
1004      if ((i < len) && (utf8s[poscnt] == 0)) {
1005          return FALSE;
1006      }
1007      *pos = poscnt;
1008      return TRUE;
1009  }
1010  
picobase_get_prev_utf8char(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmin,picoos_uint32 * pos,picobase_utf8char utf8char)1011  picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
1012                                          const picoos_uint32 utf8slenmin,
1013                                          picoos_uint32 *pos,
1014                                          picobase_utf8char utf8char) {
1015      picoos_uint8 i, j;
1016      picoos_uint8 len;
1017      picoos_uint32 poscnt;
1018  
1019      utf8char[0] = 0;
1020      if ((*pos) == 0) {
1021          return FALSE;
1022      }
1023      poscnt = (*pos) - 1;
1024      i = 1;
1025      while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
1026             (utf8s[poscnt] != 0)) {
1027          len = picobase_det_utf8_length(utf8s[poscnt]);
1028          if (len == i) {
1029              for (j = 0; j < len; j++) {
1030                  utf8char[j] = utf8s[poscnt + j];
1031              }
1032              utf8char[j] = 0;
1033              *pos = poscnt;
1034              return TRUE;
1035          }
1036          i++;
1037          poscnt--;
1038      }
1039      return FALSE;
1040  }
1041  
picobase_get_prev_utf8charpos(const picoos_uint8 * utf8s,const picoos_uint32 utf8slenmin,picoos_uint32 * pos)1042  picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
1043                                             const picoos_uint32 utf8slenmin,
1044                                             picoos_uint32 *pos) {
1045      picoos_uint8 i;
1046      picoos_uint8 len;
1047      picoos_uint32 poscnt;
1048  
1049      if ((*pos) == 0) {
1050          return FALSE;
1051      }
1052      poscnt = (*pos) - 1;
1053      i = 1;
1054      while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
1055             (utf8s[poscnt] != 0)) {
1056          len = picobase_det_utf8_length(utf8s[poscnt]);
1057          if (len == i) {
1058              *pos = poscnt;
1059              return TRUE;
1060          }
1061          i++;
1062          poscnt--;
1063      }
1064      return FALSE;
1065  }
1066  
1067  /**
1068   * Converts utf8 input to utf32
1069   * @param    utf8[] : character encoded in utf8
1070   * @param    done : boolean indicating the completion of the operation (FALSE: conversion not done)
1071   * @return   a single character encoded in UTF32
1072  */
picobase_utf8_to_utf32(picoos_uint8 utf8[],picoos_uint8 * done)1073  static picobase_utf32 picobase_utf8_to_utf32 (picoos_uint8 utf8[], picoos_uint8 * done)
1074  {
1075      (*done) = TRUE;
1076      if ((utf8[0] < (picoos_uint8)'\200')) {
1077          return utf8[0];
1078      } else if ((utf8[0] >= (picoos_uint8)'\370')) {
1079          return 0;
1080      } else if ((utf8[0] >= (picoos_uint8)'\360')) {
1081          return ((((262144 * (utf8[0] % 8)) + (4096 * (utf8[1] % 64))) + (64 * (utf8[2] % 64))) + (utf8[3] % 64));
1082      } else if ((utf8[0] >= (picoos_uint8)'\340')) {
1083          return (((4096 * (utf8[0] % 16)) + (64 * (utf8[1] % 64))) + (utf8[2] % 64));
1084      } else if ((utf8[(0)] >= (picoos_uint8)'\300')) {
1085          return ((64 * (utf8[0] % 32)) + (utf8[1] % 64));
1086      } else {
1087          (*done) = FALSE;
1088          return 0;
1089      }
1090  }
1091  
picobase_utf32_to_utf8(picobase_utf32 utf32,picobase_utf8 utf8[],picoos_int32 utf8MaxLen,picoos_uint8 * done)1092  static picoos_int32 picobase_utf32_to_utf8 (picobase_utf32 utf32, picobase_utf8 utf8[], picoos_int32 utf8MaxLen, picoos_uint8 * done)
1093  {
1094      picoos_int32 len;
1095  
1096      (*done) = TRUE;
1097      if (utf8MaxLen >= 4) {
1098          if (utf32 < 128) {
1099              len = 1;
1100              utf8[0] = utf32;
1101          } else if (utf32 < 2048) {
1102              len = 2;
1103              utf8[1] = (128 + (utf32 % 64));
1104              utf32 = (utf32 / 64);
1105              utf8[0] = (192 + (utf32 % 32));
1106          } else if (utf32 < 65536) {
1107              len = 3;
1108              utf8[2] = (128 + (utf32 % 64));
1109              utf32 = (utf32 / 64);
1110              utf8[1] = (128 + (utf32 % 64));
1111              utf32 = (utf32 / 64);
1112              utf8[0] = (224 + utf32);
1113          } else if (utf32 < 1048576) {
1114              len = 4;
1115              utf8[3] = (128 + (utf32 % 64));
1116              utf32 = (utf32 / 64);
1117              utf8[2] = (128 + (utf32 % 64));
1118              utf32 = (utf32 / 64);
1119              utf8[1] = (128 + (utf32 % 64));
1120              utf32 = (utf32 / 64);
1121              utf8[0] = (240 + utf32);
1122          } else {
1123              (*done) = FALSE;
1124              return 0;
1125          }
1126          if (len <= (utf8MaxLen-1)) {
1127              utf8[len] = 0;
1128          }
1129          return len;
1130      } else {
1131          (*done) = FALSE;
1132          return 0;
1133      }
1134  }
1135  
1136  
picobase_lowercase_utf8_str(picoos_uchar utf8str[],picoos_char lowercase[],int lowercaseMaxLen,picoos_uint8 * done)1137  extern picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], int lowercaseMaxLen, picoos_uint8 * done)
1138  {
1139      picobase_utf8char utf8char;
1140      picoos_int32 i;
1141      picoos_int32 j;
1142      picoos_int32 k;
1143      picoos_int32 l;
1144      picobase_utf32 utf32;
1145      picoos_uint8 done1;
1146  
1147      k = 0;
1148      i = 0;
1149      (*done) = TRUE;
1150      while (utf8str[i] != 0) {
1151          picobase_get_utf8char(utf8str,& i,utf8char);
1152          utf32 = picobase_utf8_to_utf32(utf8char, & done1);
1153          utf32 = base_utf32_lowercase(utf32);
1154          l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
1155          j = 0;
1156          while ((j < l) && (k < (lowercaseMaxLen-1))) {
1157              lowercase[k] = utf8char[j];
1158              k++;
1159              j++;
1160          }
1161          *done = *done && (j == l);
1162      }
1163      lowercase[k] = 0;
1164      return k;
1165  }
1166  
1167  
picobase_uppercase_utf8_str(picoos_uchar utf8str[],picoos_char uppercase[],int uppercaseMaxLen,picoos_uint8 * done)1168  extern picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done)
1169  {
1170      picobase_utf8char utf8char;
1171      picoos_int32 i;
1172      picoos_int32 j;
1173      picoos_int32 k;
1174      picoos_int32 l;
1175      picobase_utf32 utf32;
1176      picoos_uint8 done1;
1177  
1178      k = 0;
1179      i = 0;
1180      (*done) = TRUE;
1181      while (utf8str[i] != 0) {
1182          picobase_get_utf8char(utf8str,& i,utf8char);
1183          utf32 = picobase_utf8_to_utf32(utf8char, & done1);
1184          utf32 = base_utf32_uppercase(utf32);
1185          l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
1186          j = 0;
1187          while ((j < l) && (k < (uppercaseMaxLen-1))) {
1188              uppercase[k] = utf8char[j];
1189              k++;
1190              j++;
1191          }
1192          *done = *done && (j == l);
1193      }
1194      uppercase[k] = 0;
1195      return k;
1196  }
1197  
1198  
picobase_is_utf8_uppercase(picoos_uchar utf8str[],picoos_int32 utf8strmaxlen)1199  extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
1200  {
1201      picobase_utf8char utf8char;
1202      picoos_int32 i;
1203      picoos_uint32 utf32;
1204      picoos_bool done;
1205      picoos_bool isUpperCase;
1206  
1207      isUpperCase = TRUE;
1208      i = 0;
1209      while (isUpperCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
1210          picobase_get_utf8char(utf8str,& i,utf8char);
1211          utf32 = picobase_utf8_to_utf32(utf8char,& done);
1212          isUpperCase = isUpperCase && (utf32 == base_utf32_uppercase(utf32));
1213      }
1214      return isUpperCase;
1215  }
1216  
1217  
picobase_is_utf8_lowercase(picoos_uchar utf8str[],picoos_int32 utf8strmaxlen)1218  extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
1219  {
1220      picobase_utf8char utf8char;
1221      picoos_int32 i;
1222      picoos_uint32 utf32;
1223      picoos_bool done;
1224      picoos_bool isLowerCase;
1225  
1226      isLowerCase = TRUE;
1227      i = 0;
1228      while (isLowerCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
1229          picobase_get_utf8char(utf8str,& i,utf8char);
1230          utf32 = picobase_utf8_to_utf32(utf8char,& done);
1231          isLowerCase = isLowerCase && (utf32 == base_utf32_lowercase(utf32));
1232      }
1233      return isLowerCase;
1234  }
1235  
1236  
1237  #ifdef __cplusplus
1238  }
1239  #endif
1240  
1241  
1242  
1243  /* end */
1244