1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  swapimpl.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005may05
16 *   created by: Markus W. Scherer
17 *
18 *   Data file swapping functions moved here from the common library
19 *   because some data is hardcoded in ICU4C and needs not be swapped any more.
20 *   Moving the functions here simplifies testing (for code coverage) because
21 *   we need not jump through hoops (like adding snapshots of these files
22 *   to testdata).
23 *
24 *   The declarations for these functions remain in the internal header files
25 *   in icu/source/common/
26 */
27 
28 #include "unicode/utypes.h"
29 #include "unicode/putil.h"
30 #include "unicode/udata.h"
31 
32 /* Explicit include statement for std_string.h is needed
33  * for compilation on certain platforms. (e.g. AIX/VACPP)
34  */
35 #include "unicode/std_string.h"
36 
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "uassert.h"
41 #include "uarrsort.h"
42 #include "ucmndata.h"
43 #include "udataswp.h"
44 
45 /* swapping implementations in common */
46 
47 #include "uresdata.h"
48 #include "ucnv_io.h"
49 #include "uprops.h"
50 #include "ucase.h"
51 #include "ubidi_props.h"
52 #include "ucol_swp.h"
53 #include "ucnv_bld.h"
54 #include "unormimp.h"
55 #include "normalizer2impl.h"
56 #include "sprpimpl.h"
57 #include "propname.h"
58 #include "rbbidata.h"
59 #include "utrie.h"
60 #include "utrie2.h"
61 #include "dictionarydata.h"
62 
63 /* swapping implementations in i18n */
64 
65 #if !UCONFIG_NO_NORMALIZATION
66 #include "uspoof_impl.h"
67 #endif
68 
69 U_NAMESPACE_USE
70 
71 /* definitions */
72 
73 /* Unicode property (value) aliases data swapping --------------------------- */
74 
75 static int32_t U_CALLCONV
upname_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)76 upname_swap(const UDataSwapper *ds,
77             const void *inData, int32_t length, void *outData,
78             UErrorCode *pErrorCode) {
79     /* udata_swapDataHeader checks the arguments */
80     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
81     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
82         return 0;
83     }
84 
85     /* check data format and format version */
86     const UDataInfo *pInfo=
87         reinterpret_cast<const UDataInfo *>(
88             static_cast<const char *>(inData)+4);
89     if(!(
90         pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
91         pInfo->dataFormat[1]==0x6e &&
92         pInfo->dataFormat[2]==0x61 &&
93         pInfo->dataFormat[3]==0x6d &&
94         pInfo->formatVersion[0]==2
95     )) {
96         udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
97                          pInfo->dataFormat[0], pInfo->dataFormat[1],
98                          pInfo->dataFormat[2], pInfo->dataFormat[3],
99                          pInfo->formatVersion[0]);
100         *pErrorCode=U_UNSUPPORTED_ERROR;
101         return 0;
102     }
103 
104     const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
105     uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
106 
107     if(length>=0) {
108         length-=headerSize;
109         // formatVersion 2 initially has indexes[8], 32 bytes.
110         if(length<32) {
111             udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
112                              (int)length);
113             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
114             return 0;
115         }
116     }
117 
118     const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
119     int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
120     if(length>=0) {
121         if(length<totalSize) {
122             udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
123                              "for pnames.icu\n",
124                              (int)length, (int)totalSize);
125             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
126             return 0;
127         }
128 
129         int32_t numBytesIndexesAndValueMaps=
130             udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
131 
132         // Swap the indexes[] and the valueMaps[].
133         ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
134 
135         // Copy the rest of the data.
136         if(inBytes!=outBytes) {
137             uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
138                         inBytes+numBytesIndexesAndValueMaps,
139                         totalSize-numBytesIndexesAndValueMaps);
140         }
141 
142         // We need not swap anything else:
143         //
144         // The ByteTries are already byte-serialized, and are fixed on ASCII.
145         // (On an EBCDIC machine, the input string is converted to lowercase ASCII
146         // while matching.)
147         //
148         // The name groups are mostly invariant characters, but since we only
149         // generate, and keep in subversion, ASCII versions of pnames.icu,
150         // and since only ICU4J uses the pnames.icu data file
151         // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
152         // we just copy those bytes too.
153     }
154 
155     return headerSize+totalSize;
156 }
157 
158 /* Unicode properties data swapping ----------------------------------------- */
159 
160 static int32_t U_CALLCONV
uprops_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)161 uprops_swap(const UDataSwapper *ds,
162             const void *inData, int32_t length, void *outData,
163             UErrorCode *pErrorCode) {
164     const UDataInfo *pInfo;
165     int32_t headerSize, i;
166 
167     int32_t dataIndexes[UPROPS_INDEX_COUNT];
168     const int32_t *inData32;
169 
170     /* udata_swapDataHeader checks the arguments */
171     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
172     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
173         return 0;
174     }
175 
176     /* check data format and format version */
177     pInfo=(const UDataInfo *)((const char *)inData+4);
178     if(!(
179         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
180         pInfo->dataFormat[1]==0x50 &&
181         pInfo->dataFormat[2]==0x72 &&
182         pInfo->dataFormat[3]==0x6f &&
183         (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
184         (pInfo->formatVersion[0]>=7 ||
185             (pInfo->formatVersion[2]==UTRIE_SHIFT &&
186              pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
187     )) {
188         udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
189                          pInfo->dataFormat[0], pInfo->dataFormat[1],
190                          pInfo->dataFormat[2], pInfo->dataFormat[3],
191                          pInfo->formatVersion[0]);
192         *pErrorCode=U_UNSUPPORTED_ERROR;
193         return 0;
194     }
195 
196     /* the properties file must contain at least the indexes array */
197     if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
198         udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
199                          length-headerSize);
200         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
201         return 0;
202     }
203 
204     /* read the indexes */
205     inData32=(const int32_t *)((const char *)inData+headerSize);
206     for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
207         dataIndexes[i]=udata_readInt32(ds, inData32[i]);
208     }
209 
210     /*
211      * comments are copied from the data format description in genprops/store.c
212      * indexes[] constants are in uprops.h
213      */
214     int32_t dataTop;
215     if(length>=0) {
216         int32_t *outData32;
217 
218         /*
219          * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
220          * In earlier formatVersions, it is 0 and a lower dataIndexes entry
221          * has the top of the last item.
222          */
223         for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
224 
225         if((length-headerSize)<(4*dataTop)) {
226             udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
227                              length-headerSize);
228             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
229             return 0;
230         }
231 
232         outData32=(int32_t *)((char *)outData+headerSize);
233 
234         /* copy everything for inaccessible data (padding) */
235         if(inData32!=outData32) {
236             uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
237         }
238 
239         /* swap the indexes[16] */
240         ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
241 
242         /*
243          * swap the main properties UTrie
244          * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
245          */
246         utrie_swapAnyVersion(ds,
247             inData32+UPROPS_INDEX_COUNT,
248             4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
249             outData32+UPROPS_INDEX_COUNT,
250             pErrorCode);
251 
252         /*
253          * swap the properties and exceptions words
254          * P  const uint32_t props32[i1-i0];
255          * E  const uint32_t exceptions[i2-i1];
256          */
257         ds->swapArray32(ds,
258             inData32+dataIndexes[UPROPS_PROPS32_INDEX],
259             4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
260             outData32+dataIndexes[UPROPS_PROPS32_INDEX],
261             pErrorCode);
262 
263         /*
264          * swap the UChars
265          * U  const UChar uchars[2*(i3-i2)];
266          */
267         ds->swapArray16(ds,
268             inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
269             4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
270             outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
271             pErrorCode);
272 
273         /*
274          * swap the additional UTrie
275          * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
276          */
277         utrie_swapAnyVersion(ds,
278             inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
279             4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
280             outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
281             pErrorCode);
282 
283         /*
284          * swap the properties vectors
285          * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
286          */
287         ds->swapArray32(ds,
288             inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
289             4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
290             outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
291             pErrorCode);
292 
293         // swap the Script_Extensions data
294         // SCX const uint16_t scriptExtensions[2*(i7-i6)];
295         ds->swapArray16(ds,
296             inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
297             4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
298             outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
299             pErrorCode);
300     }
301 
302     /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
303     return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
304 }
305 
306 /* Unicode case mapping data swapping --------------------------------------- */
307 
308 static int32_t U_CALLCONV
ucase_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)309 ucase_swap(const UDataSwapper *ds,
310            const void *inData, int32_t length, void *outData,
311            UErrorCode *pErrorCode) {
312     const UDataInfo *pInfo;
313     int32_t headerSize;
314 
315     const uint8_t *inBytes;
316     uint8_t *outBytes;
317 
318     const int32_t *inIndexes;
319     int32_t indexes[16];
320 
321     int32_t i, offset, count, size;
322 
323     /* udata_swapDataHeader checks the arguments */
324     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
325     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
326         return 0;
327     }
328 
329     /* check data format and format version */
330     pInfo=(const UDataInfo *)((const char *)inData+4);
331     if(!(
332         pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
333         pInfo->dataFormat[1]==UCASE_FMT_1 &&
334         pInfo->dataFormat[2]==UCASE_FMT_2 &&
335         pInfo->dataFormat[3]==UCASE_FMT_3 &&
336         ((pInfo->formatVersion[0]==1 &&
337           pInfo->formatVersion[2]==UTRIE_SHIFT &&
338           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
339          (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4))
340     )) {
341         udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
342                          pInfo->dataFormat[0], pInfo->dataFormat[1],
343                          pInfo->dataFormat[2], pInfo->dataFormat[3],
344                          pInfo->formatVersion[0]);
345         *pErrorCode=U_UNSUPPORTED_ERROR;
346         return 0;
347     }
348 
349     inBytes=(const uint8_t *)inData+headerSize;
350     outBytes=(uint8_t *)outData+headerSize;
351 
352     inIndexes=(const int32_t *)inBytes;
353 
354     if(length>=0) {
355         length-=headerSize;
356         if(length<16*4) {
357             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
358                              length);
359             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
360             return 0;
361         }
362     }
363 
364     /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
365     for(i=0; i<16; ++i) {
366         indexes[i]=udata_readInt32(ds, inIndexes[i]);
367     }
368 
369     /* get the total length of the data */
370     size=indexes[UCASE_IX_LENGTH];
371 
372     if(length>=0) {
373         if(length<size) {
374             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
375                              length);
376             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
377             return 0;
378         }
379 
380         /* copy the data for inaccessible bytes */
381         if(inBytes!=outBytes) {
382             uprv_memcpy(outBytes, inBytes, size);
383         }
384 
385         offset=0;
386 
387         /* swap the int32_t indexes[] */
388         count=indexes[UCASE_IX_INDEX_TOP]*4;
389         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
390         offset+=count;
391 
392         /* swap the UTrie */
393         count=indexes[UCASE_IX_TRIE_SIZE];
394         utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
395         offset+=count;
396 
397         /* swap the uint16_t exceptions[] and unfold[] */
398         count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
399         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
400         offset+=count;
401 
402         U_ASSERT(offset==size);
403     }
404 
405     return headerSize+size;
406 }
407 
408 /* Unicode bidi/shaping data swapping --------------------------------------- */
409 
410 static int32_t U_CALLCONV
ubidi_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)411 ubidi_swap(const UDataSwapper *ds,
412            const void *inData, int32_t length, void *outData,
413            UErrorCode *pErrorCode) {
414     const UDataInfo *pInfo;
415     int32_t headerSize;
416 
417     const uint8_t *inBytes;
418     uint8_t *outBytes;
419 
420     const int32_t *inIndexes;
421     int32_t indexes[16];
422 
423     int32_t i, offset, count, size;
424 
425     /* udata_swapDataHeader checks the arguments */
426     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
427     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
428         return 0;
429     }
430 
431     /* check data format and format version */
432     pInfo=(const UDataInfo *)((const char *)inData+4);
433     if(!(
434         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
435         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
436         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
437         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
438         ((pInfo->formatVersion[0]==1 &&
439           pInfo->formatVersion[2]==UTRIE_SHIFT &&
440           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
441          pInfo->formatVersion[0]==2)
442     )) {
443         udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
444                          pInfo->dataFormat[0], pInfo->dataFormat[1],
445                          pInfo->dataFormat[2], pInfo->dataFormat[3],
446                          pInfo->formatVersion[0]);
447         *pErrorCode=U_UNSUPPORTED_ERROR;
448         return 0;
449     }
450 
451     inBytes=(const uint8_t *)inData+headerSize;
452     outBytes=(uint8_t *)outData+headerSize;
453 
454     inIndexes=(const int32_t *)inBytes;
455 
456     if(length>=0) {
457         length-=headerSize;
458         if(length<16*4) {
459             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
460                              length);
461             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
462             return 0;
463         }
464     }
465 
466     /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
467     for(i=0; i<16; ++i) {
468         indexes[i]=udata_readInt32(ds, inIndexes[i]);
469     }
470 
471     /* get the total length of the data */
472     size=indexes[UBIDI_IX_LENGTH];
473 
474     if(length>=0) {
475         if(length<size) {
476             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
477                              length);
478             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
479             return 0;
480         }
481 
482         /* copy the data for inaccessible bytes */
483         if(inBytes!=outBytes) {
484             uprv_memcpy(outBytes, inBytes, size);
485         }
486 
487         offset=0;
488 
489         /* swap the int32_t indexes[] */
490         count=indexes[UBIDI_IX_INDEX_TOP]*4;
491         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
492         offset+=count;
493 
494         /* swap the UTrie */
495         count=indexes[UBIDI_IX_TRIE_SIZE];
496         utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
497         offset+=count;
498 
499         /* swap the uint32_t mirrors[] */
500         count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
501         ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
502         offset+=count;
503 
504         /* just skip the uint8_t jgArray[] and jgArray2[] */
505         count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
506         offset+=count;
507         count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
508         offset+=count;
509 
510         U_ASSERT(offset==size);
511     }
512 
513     return headerSize+size;
514 }
515 
516 /* Unicode normalization data swapping -------------------------------------- */
517 
518 #if !UCONFIG_NO_NORMALIZATION
519 
520 static int32_t U_CALLCONV
unorm_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)521 unorm_swap(const UDataSwapper *ds,
522            const void *inData, int32_t length, void *outData,
523            UErrorCode *pErrorCode) {
524     const UDataInfo *pInfo;
525     int32_t headerSize;
526 
527     const uint8_t *inBytes;
528     uint8_t *outBytes;
529 
530     const int32_t *inIndexes;
531     int32_t indexes[32];
532 
533     int32_t i, offset, count, size;
534 
535     /* udata_swapDataHeader checks the arguments */
536     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
537     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
538         return 0;
539     }
540 
541     /* check data format and format version */
542     pInfo=(const UDataInfo *)((const char *)inData+4);
543     if(!(
544         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
545         pInfo->dataFormat[1]==0x6f &&
546         pInfo->dataFormat[2]==0x72 &&
547         pInfo->dataFormat[3]==0x6d &&
548         pInfo->formatVersion[0]==2
549     )) {
550         udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
551                          pInfo->dataFormat[0], pInfo->dataFormat[1],
552                          pInfo->dataFormat[2], pInfo->dataFormat[3],
553                          pInfo->formatVersion[0]);
554         *pErrorCode=U_UNSUPPORTED_ERROR;
555         return 0;
556     }
557 
558     inBytes=(const uint8_t *)inData+headerSize;
559     outBytes=(uint8_t *)outData+headerSize;
560 
561     inIndexes=(const int32_t *)inBytes;
562 
563     if(length>=0) {
564         length-=headerSize;
565         if(length<32*4) {
566             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
567                              length);
568             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
569             return 0;
570         }
571     }
572 
573     /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
574     for(i=0; i<32; ++i) {
575         indexes[i]=udata_readInt32(ds, inIndexes[i]);
576     }
577 
578     /* calculate the total length of the data */
579     size=
580         32*4+ /* size of indexes[] */
581         indexes[_NORM_INDEX_TRIE_SIZE]+
582         indexes[_NORM_INDEX_UCHAR_COUNT]*2+
583         indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
584         indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
585         indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
586         indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
587 
588     if(length>=0) {
589         if(length<size) {
590             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
591                              length);
592             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
593             return 0;
594         }
595 
596         /* copy the data for inaccessible bytes */
597         if(inBytes!=outBytes) {
598             uprv_memcpy(outBytes, inBytes, size);
599         }
600 
601         offset=0;
602 
603         /* swap the indexes[] */
604         count=32*4;
605         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
606         offset+=count;
607 
608         /* swap the main UTrie */
609         count=indexes[_NORM_INDEX_TRIE_SIZE];
610         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
611         offset+=count;
612 
613         /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
614         count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
615         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
616         offset+=count;
617 
618         /* swap the FCD UTrie */
619         count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
620         if(count!=0) {
621             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
622             offset+=count;
623         }
624 
625         /* swap the aux UTrie */
626         count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
627         if(count!=0) {
628             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
629             offset+=count;
630         }
631 
632         /* swap the uint16_t combiningTable[] */
633         count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
634         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
635         offset+=count;
636     }
637 
638     return headerSize+size;
639 }
640 
641 #endif
642 
643 /* Swap 'Test' data from gentest */
644 static int32_t U_CALLCONV
test_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)645 test_swap(const UDataSwapper *ds,
646            const void *inData, int32_t length, void *outData,
647            UErrorCode *pErrorCode) {
648     const UDataInfo *pInfo;
649     int32_t headerSize;
650 
651     const uint8_t *inBytes;
652     uint8_t *outBytes;
653 
654     int32_t offset;
655 
656     /* udata_swapDataHeader checks the arguments */
657     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
658     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
659         udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
660         return 0;
661     }
662 
663     /* check data format and format version */
664     pInfo=(const UDataInfo *)((const char *)inData+4);
665     if(!(
666         pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
667         pInfo->dataFormat[1]==0x65 &&
668         pInfo->dataFormat[2]==0x73 &&
669         pInfo->dataFormat[3]==0x74 &&
670         pInfo->formatVersion[0]==1
671     )) {
672         udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
673                          pInfo->dataFormat[0], pInfo->dataFormat[1],
674                          pInfo->dataFormat[2], pInfo->dataFormat[3],
675                          pInfo->formatVersion[0]);
676         *pErrorCode=U_UNSUPPORTED_ERROR;
677         return 0;
678     }
679 
680     inBytes=(const uint8_t *)inData+headerSize;
681     outBytes=(uint8_t *)outData+headerSize;
682 
683     int32_t size16 = 2; // 16bit plus padding
684     int32_t sizeStr = 5; // 4 char inv-str plus null
685     int32_t size = size16 + sizeStr;
686 
687     if(length>=0) {
688         if(length<size) {
689             udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
690                              length, size);
691             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
692             return 0;
693         }
694 
695 	offset =0;
696 	/* swap a 1 entry array */
697         ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
698 	offset+=size16;
699 	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
700     }
701 
702     return headerSize+size;
703 }
704 
705 /* swap any data (except a .dat package) ------------------------------------ */
706 
707 static const struct {
708     uint8_t dataFormat[4];
709     UDataSwapFn *swapFn;
710 } swapFns[]={
711     { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
712 #if !UCONFIG_NO_LEGACY_CONVERSION
713     { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
714 #endif
715 #if !UCONFIG_NO_CONVERSION
716     { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
717 #endif
718 #if !UCONFIG_NO_IDNA
719     { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
720 #endif
721     /* insert data formats here, descending by expected frequency of occurrence */
722     { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
723 
724     { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
725                                   ucase_swap },         /* dataFormat="cAsE" */
726 
727     { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
728                                   ubidi_swap },         /* dataFormat="BiDi" */
729 
730 #if !UCONFIG_NO_NORMALIZATION
731     { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
732     { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
733 #endif
734 #if !UCONFIG_NO_COLLATION
735     { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
736     { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
737 #endif
738 #if !UCONFIG_NO_BREAK_ITERATION
739     { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
740     { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
741 #endif
742     { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
743     { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
744 #if !UCONFIG_NO_NORMALIZATION
745     { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
746 #endif
747     { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
748 };
749 
750 U_CAPI int32_t U_EXPORT2
udata_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)751 udata_swap(const UDataSwapper *ds,
752            const void *inData, int32_t length, void *outData,
753            UErrorCode *pErrorCode) {
754     char dataFormatChars[4];
755     const UDataInfo *pInfo;
756     int32_t i, swappedLength;
757 
758     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
759         return 0;
760     }
761 
762     /*
763      * Preflight the header first; checks for illegal arguments, too.
764      * Do not swap the header right away because the format-specific swapper
765      * will swap it, get the headerSize again, and also use the header
766      * information. Otherwise we would have to pass some of the information
767      * and not be able to use the UDataSwapFn signature.
768      */
769     udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
770 
771     /*
772      * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
773      * then we could check here for further known magic values and structures.
774      */
775     if(U_FAILURE(*pErrorCode)) {
776         return 0; /* the data format was not recognized */
777     }
778 
779     pInfo=(const UDataInfo *)((const char *)inData+4);
780 
781     {
782         /* convert the data format from ASCII to Unicode to the system charset */
783         UChar u[4]={
784              pInfo->dataFormat[0], pInfo->dataFormat[1],
785              pInfo->dataFormat[2], pInfo->dataFormat[3]
786         };
787 
788         if(uprv_isInvariantUString(u, 4)) {
789             u_UCharsToChars(u, dataFormatChars, 4);
790         } else {
791             dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
792         }
793     }
794 
795     /* dispatch to the swap function for the dataFormat */
796     for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
797         if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
798             swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
799 
800             if(U_FAILURE(*pErrorCode)) {
801                 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
802                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
803                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
804                                  dataFormatChars[0], dataFormatChars[1],
805                                  dataFormatChars[2], dataFormatChars[3],
806                                  u_errorName(*pErrorCode));
807             } else if(swappedLength<(length-15)) {
808                 /* swapped less than expected */
809                 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
810                                  swappedLength, length,
811                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
812                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
813                                  dataFormatChars[0], dataFormatChars[1],
814                                  dataFormatChars[2], dataFormatChars[3],
815                                  u_errorName(*pErrorCode));
816             }
817 
818             return swappedLength;
819         }
820     }
821 
822     /* the dataFormat was not recognized */
823     udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
824                      pInfo->dataFormat[0], pInfo->dataFormat[1],
825                      pInfo->dataFormat[2], pInfo->dataFormat[3],
826                      dataFormatChars[0], dataFormatChars[1],
827                      dataFormatChars[2], dataFormatChars[3]);
828 
829     *pErrorCode=U_UNSUPPORTED_ERROR;
830     return 0;
831 }
832