1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2000-2011, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  ucol_data.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2011jul02
14 *   created by: Markus Scherer
15 *
16 * Private implementation header for C/C++ collation.
17 * Some file data structure definitions were moved here from i18n/ucol_imp.h
18 * so that the common library (via ucol_swp.cpp) need not depend on the i18n library at all.
19 *
20 * We do not want to move the collation swapper to the i18n library because
21 * a) the resource bundle swapper depends on it and would have to move too, and
22 * b) we might want to eventually implement runtime data swapping,
23 *    which might (or might not) be easier if all swappers are in the common library.
24 */
25 
26 #ifndef __UCOL_DATA_H__
27 #define __UCOL_DATA_H__
28 
29 #include "unicode/utypes.h"
30 
31 #if !UCONFIG_NO_COLLATION
32 
33 /* let us know whether reserved fields are reset to zero or junked */
34 #define UCOL_HEADER_MAGIC 0x20030618
35 
36 typedef struct {
37       int32_t size;
38       /* all the offsets are in bytes */
39       /* to get the address add to the header address and cast properly */
40       uint32_t options; /* these are the default options for the collator */
41       uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */
42       uint32_t contractionUCACombos;        /* this one is needed only for UCA, to copy the appropriate contractions */
43       uint32_t magic;            /* magic number - lets us know whether reserved data is reset or junked */
44       uint32_t mappingPosition;  /* const uint8_t *mappingPosition; */
45       uint32_t expansion;        /* uint32_t *expansion;            */
46       uint32_t contractionIndex; /* UChar *contractionIndex;        */
47       uint32_t contractionCEs;   /* uint32_t *contractionCEs;       */
48       uint32_t contractionSize;  /* needed for various closures */
49       /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars      */
50 
51       uint32_t endExpansionCE;      /* array of last collation element in
52                                        expansion */
53       uint32_t expansionCESize;     /* array of maximum expansion size
54                                        corresponding to the expansion
55                                        collation elements with last element
56                                        in endExpansionCE*/
57       int32_t  endExpansionCECount; /* size of endExpansionCE */
58       uint32_t unsafeCP;            /* hash table of unsafe code points */
59       uint32_t contrEndCP;          /* hash table of final code points  */
60                                     /*   in contractions.               */
61 
62       int32_t contractionUCACombosSize;     /* number of UCA contraction items. */
63                                             /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */
64       UBool jamoSpecial;                    /* is jamoSpecial */
65       UBool isBigEndian;                    /* is this data big endian? from the UDataInfo header*/
66       uint8_t charSetFamily;                /* what is the charset family of this data from the UDataInfo header*/
67       uint8_t contractionUCACombosWidth;    /* width of UCA combos field */
68       UVersionInfo version;
69       UVersionInfo UCAVersion;              /* version of the UCA, read from file */
70       UVersionInfo UCDVersion;              /* UCD version, obtained by u_getUnicodeVersion */
71       UVersionInfo formatVersion;           /* format version from the UDataInfo header */
72       uint32_t scriptToLeadByte;            /* offset to script to lead collation byte mapping data */
73       uint32_t leadByteToScript;            /* offset to lead collation byte to script mapping data */
74       uint8_t reserved[76];                 /* for future use */
75 } UCATableHeader;
76 
77 typedef struct {
78   uint32_t byteSize;
79   uint32_t tableSize;
80   uint32_t contsSize;
81   uint32_t table;
82   uint32_t conts;
83   UVersionInfo UCAVersion;              /* version of the UCA, read from file */
84   uint8_t padding[8];
85 } InverseUCATableHeader;
86 
87 #endif  /* !UCONFIG_NO_COLLATION */
88 
89 #endif  /* __UCOL_DATA_H__ */
90