1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * scriptset.cpp
10 *
11 * created on: 2013 Jan 7
12 * created by: Andy Heninger
13 */
14 
15 #include "unicode/utypes.h"
16 
17 #include "unicode/uchar.h"
18 #include "unicode/unistr.h"
19 
20 #include "scriptset.h"
21 #include "uassert.h"
22 #include "cmemory.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 //----------------------------------------------------------------------------
27 //
28 //  ScriptSet implementation
29 //
30 //----------------------------------------------------------------------------
31 ScriptSet::ScriptSet() {
32     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
33         bits[i] = 0;
34     }
35 }
36 
37 ScriptSet::~ScriptSet() {
38 }
39 
40 ScriptSet::ScriptSet(const ScriptSet &other) {
41     *this = other;
42 }
43 
44 
45 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
46     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
47         bits[i] = other.bits[i];
48     }
49     return *this;
50 }
51 
52 
53 UBool ScriptSet::operator == (const ScriptSet &other) const {
54     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
55         if (bits[i] != other.bits[i]) {
56             return FALSE;
57         }
58     }
59     return TRUE;
60 }
61 
62 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
63     if (U_FAILURE(status)) {
64         return FALSE;
65     }
66     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
67         status = U_ILLEGAL_ARGUMENT_ERROR;
68         return FALSE;
69     }
70     uint32_t index = script / 32;
71     uint32_t bit   = 1 << (script & 31);
72     return ((bits[index] & bit) != 0);
73 }
74 
75 
76 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
77     if (U_FAILURE(status)) {
78         return *this;
79     }
80     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
81         status = U_ILLEGAL_ARGUMENT_ERROR;
82         return *this;
83     }
84     uint32_t index = script / 32;
85     uint32_t bit   = 1 << (script & 31);
86     bits[index] |= bit;
87     return *this;
88 }
89 
90 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
91     if (U_FAILURE(status)) {
92         return *this;
93     }
94     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
95         status = U_ILLEGAL_ARGUMENT_ERROR;
96         return *this;
97     }
98     uint32_t index = script / 32;
99     uint32_t bit   = 1 << (script & 31);
100     bits[index] &= ~bit;
101     return *this;
102 }
103 
104 
105 
106 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
107     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
108         bits[i] |= other.bits[i];
109     }
110     return *this;
111 }
112 
113 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
114     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
115         bits[i] &= other.bits[i];
116     }
117     return *this;
118 }
119 
120 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
121     ScriptSet t;
122     t.set(script, status);
123     if (U_SUCCESS(status)) {
124         this->intersect(t);
125     }
126     return *this;
127 }
128 
129 UBool ScriptSet::intersects(const ScriptSet &other) const {
130     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
131         if ((bits[i] & other.bits[i]) != 0) {
132             return true;
133         }
134     }
135     return false;
136 }
137 
138 UBool ScriptSet::contains(const ScriptSet &other) const {
139     ScriptSet t(*this);
140     t.intersect(other);
141     return (t == other);
142 }
143 
144 
145 ScriptSet &ScriptSet::setAll() {
146     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
147         bits[i] = 0xffffffffu;
148     }
149     return *this;
150 }
151 
152 
153 ScriptSet &ScriptSet::resetAll() {
154     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
155         bits[i] = 0;
156     }
157     return *this;
158 }
159 
160 int32_t ScriptSet::countMembers() const {
161     // This bit counter is good for sparse numbers of '1's, which is
162     //  very much the case that we will usually have.
163     int32_t count = 0;
164     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
165         uint32_t x = bits[i];
166         while (x > 0) {
167             count++;
168             x &= (x - 1);    // and off the least significant one bit.
169         }
170     }
171     return count;
172 }
173 
174 int32_t ScriptSet::hashCode() const {
175     int32_t hash = 0;
176     for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
177         hash ^= bits[i];
178     }
179     return hash;
180 }
181 
182 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
183     // TODO: Wants a better implementation.
184     if (fromIndex < 0) {
185         return -1;
186     }
187     UErrorCode status = U_ZERO_ERROR;
188     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
189         if (test((UScriptCode)scriptIndex, status)) {
190             return scriptIndex;
191         }
192     }
193     return -1;
194 }
195 
196 UBool ScriptSet::isEmpty() const {
197     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
198         if (bits[i] != 0) {
199             return FALSE;
200         }
201     }
202     return TRUE;
203 }
204 
205 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
206     UBool firstTime = TRUE;
207     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
208         if (!firstTime) {
209             dest.append((UChar)0x20);
210         }
211         firstTime = FALSE;
212         const char *scriptName = uscript_getShortName((UScriptCode(i)));
213         dest.append(UnicodeString(scriptName, -1, US_INV));
214     }
215     return dest;
216 }
217 
218 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
219     resetAll();
220     if (U_FAILURE(status)) {
221         return *this;
222     }
223     UnicodeString oneScriptName;
224     for (int32_t i=0; i<scriptString.length();) {
225         UChar32 c = scriptString.char32At(i);
226         i = scriptString.moveIndex32(i, 1);
227         if (!u_isUWhiteSpace(c)) {
228             oneScriptName.append(c);
229             if (i < scriptString.length()) {
230                 continue;
231             }
232         }
233         if (oneScriptName.length() > 0) {
234             char buf[40];
235             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
236             buf[sizeof(buf)-1] = 0;
237             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
238             if (sc == UCHAR_INVALID_CODE) {
239                 status = U_ILLEGAL_ARGUMENT_ERROR;
240             } else {
241                 this->set((UScriptCode)sc, status);
242             }
243             if (U_FAILURE(status)) {
244                 return *this;
245             }
246             oneScriptName.remove();
247         }
248     }
249     return *this;
250 }
251 
252 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
253     if (U_FAILURE(status)) { return; }
254     static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5;
255     MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
256     UErrorCode internalStatus = U_ZERO_ERROR;
257     int32_t script_count = -1;
258 
259     while (TRUE) {
260         script_count = uscript_getScriptExtensions(
261             codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
262         if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
263             // Need to allocate more space
264             if (scripts.resize(script_count) == NULL) {
265                 status = U_MEMORY_ALLOCATION_ERROR;
266                 return;
267             }
268             internalStatus = U_ZERO_ERROR;
269         } else {
270             break;
271         }
272     }
273 
274     // Check if we failed for some reason other than buffer overflow
275     if (U_FAILURE(internalStatus)) {
276         status = internalStatus;
277         return;
278     }
279 
280     // Load the scripts into the ScriptSet and return
281     for (int32_t i = 0; i < script_count; i++) {
282         this->set(scripts[i], status);
283         if (U_FAILURE(status)) { return; }
284     }
285 }
286 
287 U_NAMESPACE_END
288 
289 U_CAPI UBool U_EXPORT2
290 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
291     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
292     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
293     return (*s1 == *s2);
294 }
295 
296 U_CAPI int8_t U_EXPORT2
297 uhash_compareScriptSet(UElement key0, UElement key1) {
298     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
299     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
300     int32_t diff = s0->countMembers() - s1->countMembers();
301     if (diff != 0) return diff;
302     int32_t i0 = s0->nextSetBit(0);
303     int32_t i1 = s1->nextSetBit(0);
304     while ((diff = i0-i1) == 0 && i0 > 0) {
305         i0 = s0->nextSetBit(i0+1);
306         i1 = s1->nextSetBit(i1+1);
307     }
308     return (int8_t)diff;
309 }
310 
311 U_CAPI int32_t U_EXPORT2
312 uhash_hashScriptSet(const UElement key) {
313     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
314     return s->hashCode();
315 }
316 
317 U_CAPI void U_EXPORT2
318 uhash_deleteScriptSet(void *obj) {
319     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
320     delete s;
321 }
322