1 /*
2 **********************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * scriptset.cpp
8 *
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
11 */
12
13 #include "unicode/utypes.h"
14
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
17
18 #include "scriptset.h"
19 #include "uassert.h"
20 #include "cmemory.h"
21
22 U_NAMESPACE_BEGIN
23
24 //----------------------------------------------------------------------------
25 //
26 // ScriptSet implementation
27 //
28 //----------------------------------------------------------------------------
ScriptSet()29 ScriptSet::ScriptSet() {
30 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
31 bits[i] = 0;
32 }
33 }
34
~ScriptSet()35 ScriptSet::~ScriptSet() {
36 }
37
ScriptSet(const ScriptSet & other)38 ScriptSet::ScriptSet(const ScriptSet &other) {
39 *this = other;
40 }
41
42
operator =(const ScriptSet & other)43 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
44 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
45 bits[i] = other.bits[i];
46 }
47 return *this;
48 }
49
50
operator ==(const ScriptSet & other) const51 UBool ScriptSet::operator == (const ScriptSet &other) const {
52 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
53 if (bits[i] != other.bits[i]) {
54 return FALSE;
55 }
56 }
57 return TRUE;
58 }
59
test(UScriptCode script,UErrorCode & status) const60 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
61 if (U_FAILURE(status)) {
62 return FALSE;
63 }
64 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
65 status = U_ILLEGAL_ARGUMENT_ERROR;
66 return FALSE;
67 }
68 uint32_t index = script / 32;
69 uint32_t bit = 1 << (script & 31);
70 return ((bits[index] & bit) != 0);
71 }
72
73
set(UScriptCode script,UErrorCode & status)74 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
75 if (U_FAILURE(status)) {
76 return *this;
77 }
78 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
79 status = U_ILLEGAL_ARGUMENT_ERROR;
80 return *this;
81 }
82 uint32_t index = script / 32;
83 uint32_t bit = 1 << (script & 31);
84 bits[index] |= bit;
85 return *this;
86 }
87
reset(UScriptCode script,UErrorCode & status)88 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
89 if (U_FAILURE(status)) {
90 return *this;
91 }
92 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
93 status = U_ILLEGAL_ARGUMENT_ERROR;
94 return *this;
95 }
96 uint32_t index = script / 32;
97 uint32_t bit = 1 << (script & 31);
98 bits[index] &= ~bit;
99 return *this;
100 }
101
102
103
Union(const ScriptSet & other)104 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
105 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
106 bits[i] |= other.bits[i];
107 }
108 return *this;
109 }
110
intersect(const ScriptSet & other)111 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
112 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
113 bits[i] &= other.bits[i];
114 }
115 return *this;
116 }
117
intersect(UScriptCode script,UErrorCode & status)118 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
119 ScriptSet t;
120 t.set(script, status);
121 if (U_SUCCESS(status)) {
122 this->intersect(t);
123 }
124 return *this;
125 }
126
intersects(const ScriptSet & other) const127 UBool ScriptSet::intersects(const ScriptSet &other) const {
128 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
129 if ((bits[i] & other.bits[i]) != 0) {
130 return true;
131 }
132 }
133 return false;
134 }
135
contains(const ScriptSet & other) const136 UBool ScriptSet::contains(const ScriptSet &other) const {
137 ScriptSet t(*this);
138 t.intersect(other);
139 return (t == other);
140 }
141
142
setAll()143 ScriptSet &ScriptSet::setAll() {
144 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
145 bits[i] = 0xffffffffu;
146 }
147 return *this;
148 }
149
150
resetAll()151 ScriptSet &ScriptSet::resetAll() {
152 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
153 bits[i] = 0;
154 }
155 return *this;
156 }
157
countMembers() const158 int32_t ScriptSet::countMembers() const {
159 // This bit counter is good for sparse numbers of '1's, which is
160 // very much the case that we will usually have.
161 int32_t count = 0;
162 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
163 uint32_t x = bits[i];
164 while (x > 0) {
165 count++;
166 x &= (x - 1); // and off the least significant one bit.
167 }
168 }
169 return count;
170 }
171
hashCode() const172 int32_t ScriptSet::hashCode() const {
173 int32_t hash = 0;
174 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
175 hash ^= bits[i];
176 }
177 return hash;
178 }
179
nextSetBit(int32_t fromIndex) const180 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
181 // TODO: Wants a better implementation.
182 if (fromIndex < 0) {
183 return -1;
184 }
185 UErrorCode status = U_ZERO_ERROR;
186 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
187 if (test((UScriptCode)scriptIndex, status)) {
188 return scriptIndex;
189 }
190 }
191 return -1;
192 }
193
displayScripts(UnicodeString & dest) const194 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
195 UBool firstTime = TRUE;
196 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
197 if (!firstTime) {
198 dest.append((UChar)0x20);
199 }
200 firstTime = FALSE;
201 const char *scriptName = uscript_getShortName((UScriptCode(i)));
202 dest.append(UnicodeString(scriptName, -1, US_INV));
203 }
204 return dest;
205 }
206
parseScripts(const UnicodeString & scriptString,UErrorCode & status)207 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
208 resetAll();
209 if (U_FAILURE(status)) {
210 return *this;
211 }
212 UnicodeString oneScriptName;
213 for (int32_t i=0; i<scriptString.length();) {
214 UChar32 c = scriptString.char32At(i);
215 i = scriptString.moveIndex32(i, 1);
216 if (!u_isUWhiteSpace(c)) {
217 oneScriptName.append(c);
218 if (i < scriptString.length()) {
219 continue;
220 }
221 }
222 if (oneScriptName.length() > 0) {
223 char buf[40];
224 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
225 buf[sizeof(buf)-1] = 0;
226 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
227 if (sc == UCHAR_INVALID_CODE) {
228 status = U_ILLEGAL_ARGUMENT_ERROR;
229 } else {
230 this->set((UScriptCode)sc, status);
231 }
232 if (U_FAILURE(status)) {
233 return *this;
234 }
235 oneScriptName.remove();
236 }
237 }
238 return *this;
239 }
240
241 U_NAMESPACE_END
242
243 U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1,const UElement key2)244 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
245 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
246 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
247 return (*s1 == *s2);
248 }
249
250 U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0,UElement key1)251 uhash_compareScriptSet(UElement key0, UElement key1) {
252 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
253 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
254 int32_t diff = s0->countMembers() - s1->countMembers();
255 if (diff != 0) return diff;
256 int32_t i0 = s0->nextSetBit(0);
257 int32_t i1 = s1->nextSetBit(0);
258 while ((diff = i0-i1) == 0 && i0 > 0) {
259 i0 = s0->nextSetBit(i0+1);
260 i1 = s1->nextSetBit(i1+1);
261 }
262 return (int8_t)diff;
263 }
264
265 U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key)266 uhash_hashScriptSet(const UElement key) {
267 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
268 return s->hashCode();
269 }
270
271 U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void * obj)272 uhash_deleteScriptSet(void *obj) {
273 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
274 delete s;
275 }
276