1 /*
2 **********************************************************************
3 *   Copyright (C) 2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *
7 * scriptset.cpp
8 *
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
17 
18 #include "scriptset.h"
19 #include "uassert.h"
20 #include "cmemory.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 //----------------------------------------------------------------------------
25 //
26 //  ScriptSet implementation
27 //
28 //----------------------------------------------------------------------------
ScriptSet()29 ScriptSet::ScriptSet() {
30     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
31         bits[i] = 0;
32     }
33 }
34 
~ScriptSet()35 ScriptSet::~ScriptSet() {
36 }
37 
ScriptSet(const ScriptSet & other)38 ScriptSet::ScriptSet(const ScriptSet &other) {
39     *this = other;
40 }
41 
42 
operator =(const ScriptSet & other)43 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
44     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
45         bits[i] = other.bits[i];
46     }
47     return *this;
48 }
49 
50 
operator ==(const ScriptSet & other) const51 UBool ScriptSet::operator == (const ScriptSet &other) const {
52     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
53         if (bits[i] != other.bits[i]) {
54             return FALSE;
55         }
56     }
57     return TRUE;
58 }
59 
test(UScriptCode script,UErrorCode & status) const60 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
61     if (U_FAILURE(status)) {
62         return FALSE;
63     }
64     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
65         status = U_ILLEGAL_ARGUMENT_ERROR;
66         return FALSE;
67     }
68     uint32_t index = script / 32;
69     uint32_t bit   = 1 << (script & 31);
70     return ((bits[index] & bit) != 0);
71 }
72 
73 
set(UScriptCode script,UErrorCode & status)74 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
75     if (U_FAILURE(status)) {
76         return *this;
77     }
78     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
79         status = U_ILLEGAL_ARGUMENT_ERROR;
80         return *this;
81     }
82     uint32_t index = script / 32;
83     uint32_t bit   = 1 << (script & 31);
84     bits[index] |= bit;
85     return *this;
86 }
87 
reset(UScriptCode script,UErrorCode & status)88 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
89     if (U_FAILURE(status)) {
90         return *this;
91     }
92     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
93         status = U_ILLEGAL_ARGUMENT_ERROR;
94         return *this;
95     }
96     uint32_t index = script / 32;
97     uint32_t bit   = 1 << (script & 31);
98     bits[index] &= ~bit;
99     return *this;
100 }
101 
102 
103 
Union(const ScriptSet & other)104 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
105     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
106         bits[i] |= other.bits[i];
107     }
108     return *this;
109 }
110 
intersect(const ScriptSet & other)111 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
112     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
113         bits[i] &= other.bits[i];
114     }
115     return *this;
116 }
117 
intersect(UScriptCode script,UErrorCode & status)118 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
119     ScriptSet t;
120     t.set(script, status);
121     if (U_SUCCESS(status)) {
122         this->intersect(t);
123     }
124     return *this;
125 }
126 
intersects(const ScriptSet & other) const127 UBool ScriptSet::intersects(const ScriptSet &other) const {
128     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
129         if ((bits[i] & other.bits[i]) != 0) {
130             return true;
131         }
132     }
133     return false;
134 }
135 
contains(const ScriptSet & other) const136 UBool ScriptSet::contains(const ScriptSet &other) const {
137     ScriptSet t(*this);
138     t.intersect(other);
139     return (t == other);
140 }
141 
142 
setAll()143 ScriptSet &ScriptSet::setAll() {
144     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
145         bits[i] = 0xffffffffu;
146     }
147     return *this;
148 }
149 
150 
resetAll()151 ScriptSet &ScriptSet::resetAll() {
152     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
153         bits[i] = 0;
154     }
155     return *this;
156 }
157 
countMembers() const158 int32_t ScriptSet::countMembers() const {
159     // This bit counter is good for sparse numbers of '1's, which is
160     //  very much the case that we will usually have.
161     int32_t count = 0;
162     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
163         uint32_t x = bits[i];
164         while (x > 0) {
165             count++;
166             x &= (x - 1);    // and off the least significant one bit.
167         }
168     }
169     return count;
170 }
171 
hashCode() const172 int32_t ScriptSet::hashCode() const {
173     int32_t hash = 0;
174     for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
175         hash ^= bits[i];
176     }
177     return hash;
178 }
179 
nextSetBit(int32_t fromIndex) const180 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
181     // TODO: Wants a better implementation.
182     if (fromIndex < 0) {
183         return -1;
184     }
185     UErrorCode status = U_ZERO_ERROR;
186     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
187         if (test((UScriptCode)scriptIndex, status)) {
188             return scriptIndex;
189         }
190     }
191     return -1;
192 }
193 
displayScripts(UnicodeString & dest) const194 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
195     UBool firstTime = TRUE;
196     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
197         if (!firstTime) {
198             dest.append((UChar)0x20);
199         }
200         firstTime = FALSE;
201         const char *scriptName = uscript_getShortName((UScriptCode(i)));
202         dest.append(UnicodeString(scriptName, -1, US_INV));
203     }
204     return dest;
205 }
206 
parseScripts(const UnicodeString & scriptString,UErrorCode & status)207 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
208     resetAll();
209     if (U_FAILURE(status)) {
210         return *this;
211     }
212     UnicodeString oneScriptName;
213     for (int32_t i=0; i<scriptString.length();) {
214         UChar32 c = scriptString.char32At(i);
215         i = scriptString.moveIndex32(i, 1);
216         if (!u_isUWhiteSpace(c)) {
217             oneScriptName.append(c);
218             if (i < scriptString.length()) {
219                 continue;
220             }
221         }
222         if (oneScriptName.length() > 0) {
223             char buf[40];
224             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
225             buf[sizeof(buf)-1] = 0;
226             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
227             if (sc == UCHAR_INVALID_CODE) {
228                 status = U_ILLEGAL_ARGUMENT_ERROR;
229             } else {
230                 this->set((UScriptCode)sc, status);
231             }
232             if (U_FAILURE(status)) {
233                 return *this;
234             }
235             oneScriptName.remove();
236         }
237     }
238     return *this;
239 }
240 
241 U_NAMESPACE_END
242 
243 U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1,const UElement key2)244 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
245     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
246     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
247     return (*s1 == *s2);
248 }
249 
250 U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0,UElement key1)251 uhash_compareScriptSet(UElement key0, UElement key1) {
252     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
253     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
254     int32_t diff = s0->countMembers() - s1->countMembers();
255     if (diff != 0) return diff;
256     int32_t i0 = s0->nextSetBit(0);
257     int32_t i1 = s1->nextSetBit(0);
258     while ((diff = i0-i1) == 0 && i0 > 0) {
259         i0 = s0->nextSetBit(i0+1);
260         i1 = s1->nextSetBit(i1+1);
261     }
262     return (int8_t)diff;
263 }
264 
265 U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key)266 uhash_hashScriptSet(const UElement key) {
267     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
268     return s->hashCode();
269 }
270 
271 U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void * obj)272 uhash_deleteScriptSet(void *obj) {
273     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
274     delete s;
275 }
276