1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  *   Copyright (C) 2002-2010, International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  *******************************************************************************
8 */
9 
10 package com.ibm.icu.impl;
11 /**
12  * @version     1.1
13  * @author     Markus W. Scherer
14  * Ram: Add documentation, remove unwanted methods, improve coverage.
15  */
16 
17 /**
18  * Simple class for handling serialized USet/UnicodeSet structures
19  * without object creation. See ICU4C icu/source/common/uset.c.
20  *
21  * @internal
22  */
23 public final class USerializedSet {
24     /**
25      * Fill in the given serialized set object.
26      * @param src pointer to start of array
27      * @param srcStart pointer to start of serialized data (length value)
28      * @return true if the given array is valid, otherwise false
29      */
getSet(char src[], int srcStart)30     public final boolean getSet(char src[], int srcStart) {
31         // leave most argument checking up to Java exceptions
32         array=null;
33         arrayOffset=bmpLength=length=0;
34 
35         length=src[srcStart++];
36 
37         if ((length&0x8000) != 0) {
38             /* there are supplementary values */
39             length&=0x7fff;
40             if(src.length<(srcStart+1+length)) {
41                 length=0;
42                 throw new IndexOutOfBoundsException();
43             }
44             bmpLength=src[srcStart++];
45         } else {
46             /* only BMP values */
47             if(src.length<(srcStart+length)) {
48                 length=0;
49                 throw new IndexOutOfBoundsException();
50             }
51             bmpLength=length;
52         }
53         array = new char[length];
54         System.arraycopy(src,srcStart,array,0,length);
55         //arrayOffset=srcStart;
56         return true;
57     }
58 
59     /**
60      * Set the USerializedSet to contain the given character (and nothing
61      * else).
62      */
setToOne(int c)63     public final void setToOne(int c) {
64         if( 0x10ffff<c) {
65             return;
66         }
67 
68         if(c<0xffff) {
69             bmpLength=length=2;
70             array[0]=(char)c;
71             array[1]=(char)(c+1);
72         } else if(c==0xffff) {
73             bmpLength=1;
74             length=3;
75             array[0]=0xffff;
76             array[1]=1;
77             array[2]=0;
78         } else if(c<0x10ffff) {
79             bmpLength=0;
80             length=4;
81             array[0]=(char)(c>>16);
82             array[1]=(char)c;
83             ++c;
84             array[2]=(char)(c>>16);
85             array[3]=(char)c;
86         } else /* c==0x10ffff */ {
87             bmpLength=0;
88             length=2;
89             array[0]=0x10;
90             array[1]=0xffff;
91         }
92     }
93 
94     /**
95      * Returns a range of characters contained in the given serialized
96      * set.
97      * @param rangeIndex a non-negative integer in the range <code>0..
98      * getSerializedRangeCount()-1</code>
99      * @param range variable to receive the data in the range
100      * @return true if rangeIndex is valid, otherwise false
101      */
getRange(int rangeIndex, int[] range)102     public final boolean getRange(int rangeIndex, int[] range) {
103         if( rangeIndex<0) {
104             return false;
105         }
106         if(array==null){
107             array = new char[8];
108         }
109         if(range==null || range.length <2){
110             throw new IllegalArgumentException();
111         }
112         rangeIndex*=2; /* address start/limit pairs */
113         if(rangeIndex<bmpLength) {
114             range[0]=array[rangeIndex++];
115             if(rangeIndex<bmpLength) {
116                 range[1]=array[rangeIndex]-1;
117             } else if(rangeIndex<length) {
118                 range[1]=((((int)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
119             } else {
120                 range[1]=0x10ffff;
121             }
122             return true;
123         } else {
124             rangeIndex-=bmpLength;
125             rangeIndex*=2; /* address pairs of pairs of units */
126             int suppLength=length-bmpLength;
127             if(rangeIndex<suppLength) {
128                 int offset=arrayOffset+bmpLength;
129                 range[0]=(((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1];
130                 rangeIndex+=2;
131                 if(rangeIndex<suppLength) {
132                     range[1]=((((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1])-1;
133                 } else {
134                     range[1]=0x10ffff;
135                 }
136                 return true;
137             } else {
138                 return false;
139             }
140         }
141     }
142 
143     /**
144      * Returns true if the given USerializedSet contains the given
145      * character.
146      * @param c the character to test for
147      * @return true if set contains c
148      */
contains(int c)149     public final boolean contains(int c) {
150 
151         if(c>0x10ffff) {
152             return false;
153         }
154 
155         if(c<=0xffff) {
156             int i;
157             /* find c in the BMP part */
158             for(i=0; i<bmpLength && (char)c>=array[i]; ++i) {}
159             return ((i&1) != 0);
160         } else {
161             int i;
162             /* find c in the supplementary part */
163             char high=(char)(c>>16), low=(char)c;
164             for(i=bmpLength;
165                 i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
166                 i+=2) {}
167 
168             /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
169             return (((i+bmpLength)&2)!=0);
170         }
171     }
172 
173     /**
174      * Returns the number of disjoint ranges of characters contained in
175      * the given serialized set.  Ignores any strings contained in the
176      * set.
177      * @return a non-negative integer counting the character ranges
178      * contained in set
179      */
countRanges()180     public final int countRanges() {
181         return (bmpLength+(length-bmpLength)/2+1)/2;
182     }
183 
184     private char array[] = new char[8];
185     private int arrayOffset, bmpLength, length;
186 }
187