1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2002-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  propsvec.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2002feb22
14 *   created by: Markus W. Scherer
15 *
16 *   Store bits (Unicode character properties) in bit set vectors.
17 */
18 
19 #ifndef __UPROPSVEC_H__
20 #define __UPROPSVEC_H__
21 
22 #include "unicode/utypes.h"
23 #include "utrie.h"
24 #include "utrie2.h"
25 
26 U_CDECL_BEGIN
27 
28 /**
29  * Unicode Properties Vectors associated with code point ranges.
30  *
31  * Rows of uint32_t integers in a contiguous array store
32  * the range limits and the properties vectors.
33  *
34  * Logically, each row has a certain number of uint32_t values,
35  * which is set via the upvec_open() "columns" parameter.
36  *
37  * Internally, two additional columns are stored.
38  * In each internal row,
39  * row[0] contains the start code point and
40  * row[1] contains the limit code point,
41  * which is the start of the next range.
42  *
43  * Initially, there is only one "normal" row for
44  * range [0..0x110000[ with values 0.
45  * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
46  *
47  * It would be possible to store only one range boundary per row,
48  * but self-contained rows allow to later sort them by contents.
49  */
50 struct UPropsVectors;
51 typedef struct UPropsVectors UPropsVectors;
52 
53 /*
54  * Special pseudo code points for storing the initialValue and the errorValue,
55  * which are used to initialize a UTrie2 or similar.
56  */
57 #define UPVEC_FIRST_SPECIAL_CP 0x110000
58 #define UPVEC_INITIAL_VALUE_CP 0x110000
59 #define UPVEC_ERROR_VALUE_CP 0x110001
60 #define UPVEC_MAX_CP 0x110001
61 
62 /*
63  * Special pseudo code point used in upvec_compact() signalling the end of
64  * delivering special values and the beginning of delivering real ones.
65  * Stable value, unlike UPVEC_MAX_CP which might grow over time.
66  */
67 #define UPVEC_START_REAL_VALUES_CP 0x200000
68 
69 /*
70  * Open a UPropsVectors object.
71  * @param columns Number of value integers (uint32_t) per row.
72  */
73 U_CAPI UPropsVectors * U_EXPORT2
74 upvec_open(int32_t columns, UErrorCode *pErrorCode);
75 
76 U_CAPI void U_EXPORT2
77 upvec_close(UPropsVectors *pv);
78 
79 /*
80  * In rows for code points [start..end], select the column,
81  * reset the mask bits and set the value bits (ANDed with the mask).
82  *
83  * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
84  */
85 U_CAPI void U_EXPORT2
86 upvec_setValue(UPropsVectors *pv,
87                UChar32 start, UChar32 end,
88                int32_t column,
89                uint32_t value, uint32_t mask,
90                UErrorCode *pErrorCode);
91 
92 /*
93  * Logically const but must not be used on the same pv concurrently!
94  * Always returns 0 if called after upvec_compact().
95  */
96 U_CAPI uint32_t U_EXPORT2
97 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
98 
99 /*
100  * pRangeStart and pRangeEnd can be NULL.
101  * @return NULL if rowIndex out of range and for illegal arguments,
102  *         or if called after upvec_compact()
103  */
104 U_CAPI uint32_t * U_EXPORT2
105 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
106              UChar32 *pRangeStart, UChar32 *pRangeEnd);
107 
108 /*
109  * Compact the vectors:
110  * - modify the memory
111  * - keep only unique vectors
112  * - store them contiguously from the beginning of the memory
113  * - for each (non-unique) row, call the handler function
114  *
115  * The handler's rowIndex is the index of the row in the compacted
116  * memory block.
117  * (Therefore, it starts at 0 increases in increments of the columns value.)
118  *
119  * In a first phase, only special values are delivered (each exactly once),
120  * with start==end both equalling a special pseudo code point.
121  * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
122  * where rowIndex is the length of the compacted array,
123  * and the row is arbitrary (but not NULL).
124  * Then, in the second phase, the handler is called for each row of real values.
125  */
126 typedef void U_CALLCONV
127 UPVecCompactHandler(void *context,
128                     UChar32 start, UChar32 end,
129                     int32_t rowIndex, uint32_t *row, int32_t columns,
130                     UErrorCode *pErrorCode);
131 
132 U_CAPI void U_EXPORT2
133 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
134 
135 /*
136  * Get the vectors array after calling upvec_compact().
137  * The caller must not modify nor release the returned array.
138  * Returns NULL if called before upvec_compact().
139  */
140 U_CAPI const uint32_t * U_EXPORT2
141 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
142 
143 /*
144  * Get a clone of the vectors array after calling upvec_compact().
145  * The caller owns the returned array and must uprv_free() it.
146  * Returns NULL if called before upvec_compact().
147  */
148 U_CAPI uint32_t * U_EXPORT2
149 upvec_cloneArray(const UPropsVectors *pv,
150                  int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
151 
152 /*
153  * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
154  * vectors array, and freeze the trie.
155  */
156 U_CAPI UTrie2 * U_EXPORT2
157 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
158 
159 struct UPVecToUTrie2Context {
160     UTrie2 *trie;
161     int32_t initialValue;
162     int32_t errorValue;
163     int32_t maxValue;
164 };
165 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
166 
167 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
168 U_CAPI void U_CALLCONV
169 upvec_compactToUTrie2Handler(void *context,
170                              UChar32 start, UChar32 end,
171                              int32_t rowIndex, uint32_t *row, int32_t columns,
172                              UErrorCode *pErrorCode);
173 
174 U_CDECL_END
175 
176 #endif
177