1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2002-2010, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.impl; 11 /** 12 * @version 1.1 13 * @author Markus W. Scherer 14 * Ram: Add documentation, remove unwanted methods, improve coverage. 15 */ 16 17 /** 18 * Simple class for handling serialized USet/UnicodeSet structures 19 * without object creation. See ICU4C icu/source/common/uset.c. 20 * 21 * @internal 22 */ 23 public final class USerializedSet { 24 /** 25 * Fill in the given serialized set object. 26 * @param src pointer to start of array 27 * @param srcStart pointer to start of serialized data (length value) 28 * @return true if the given array is valid, otherwise false 29 */ getSet(char src[], int srcStart)30 public final boolean getSet(char src[], int srcStart) { 31 // leave most argument checking up to Java exceptions 32 array=null; 33 arrayOffset=bmpLength=length=0; 34 35 length=src[srcStart++]; 36 37 if ((length&0x8000) != 0) { 38 /* there are supplementary values */ 39 length&=0x7fff; 40 if(src.length<(srcStart+1+length)) { 41 length=0; 42 throw new IndexOutOfBoundsException(); 43 } 44 bmpLength=src[srcStart++]; 45 } else { 46 /* only BMP values */ 47 if(src.length<(srcStart+length)) { 48 length=0; 49 throw new IndexOutOfBoundsException(); 50 } 51 bmpLength=length; 52 } 53 array = new char[length]; 54 System.arraycopy(src,srcStart,array,0,length); 55 //arrayOffset=srcStart; 56 return true; 57 } 58 59 /** 60 * Set the USerializedSet to contain the given character (and nothing 61 * else). 62 */ setToOne(int c)63 public final void setToOne(int c) { 64 if( 0x10ffff<c) { 65 return; 66 } 67 68 if(c<0xffff) { 69 bmpLength=length=2; 70 array[0]=(char)c; 71 array[1]=(char)(c+1); 72 } else if(c==0xffff) { 73 bmpLength=1; 74 length=3; 75 array[0]=0xffff; 76 array[1]=1; 77 array[2]=0; 78 } else if(c<0x10ffff) { 79 bmpLength=0; 80 length=4; 81 array[0]=(char)(c>>16); 82 array[1]=(char)c; 83 ++c; 84 array[2]=(char)(c>>16); 85 array[3]=(char)c; 86 } else /* c==0x10ffff */ { 87 bmpLength=0; 88 length=2; 89 array[0]=0x10; 90 array[1]=0xffff; 91 } 92 } 93 94 /** 95 * Returns a range of characters contained in the given serialized 96 * set. 97 * @param rangeIndex a non-negative integer in the range <code>0.. 98 * getSerializedRangeCount()-1</code> 99 * @param range variable to receive the data in the range 100 * @return true if rangeIndex is valid, otherwise false 101 */ getRange(int rangeIndex, int[] range)102 public final boolean getRange(int rangeIndex, int[] range) { 103 if( rangeIndex<0) { 104 return false; 105 } 106 if(array==null){ 107 array = new char[8]; 108 } 109 if(range==null || range.length <2){ 110 throw new IllegalArgumentException(); 111 } 112 rangeIndex*=2; /* address start/limit pairs */ 113 if(rangeIndex<bmpLength) { 114 range[0]=array[rangeIndex++]; 115 if(rangeIndex<bmpLength) { 116 range[1]=array[rangeIndex]-1; 117 } else if(rangeIndex<length) { 118 range[1]=((((int)array[rangeIndex])<<16)|array[rangeIndex+1])-1; 119 } else { 120 range[1]=0x10ffff; 121 } 122 return true; 123 } else { 124 rangeIndex-=bmpLength; 125 rangeIndex*=2; /* address pairs of pairs of units */ 126 int suppLength=length-bmpLength; 127 if(rangeIndex<suppLength) { 128 int offset=arrayOffset+bmpLength; 129 range[0]=(((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1]; 130 rangeIndex+=2; 131 if(rangeIndex<suppLength) { 132 range[1]=((((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1])-1; 133 } else { 134 range[1]=0x10ffff; 135 } 136 return true; 137 } else { 138 return false; 139 } 140 } 141 } 142 143 /** 144 * Returns true if the given USerializedSet contains the given 145 * character. 146 * @param c the character to test for 147 * @return true if set contains c 148 */ contains(int c)149 public final boolean contains(int c) { 150 151 if(c>0x10ffff) { 152 return false; 153 } 154 155 if(c<=0xffff) { 156 int i; 157 /* find c in the BMP part */ 158 for(i=0; i<bmpLength && (char)c>=array[i]; ++i) {} 159 return ((i&1) != 0); 160 } else { 161 int i; 162 /* find c in the supplementary part */ 163 char high=(char)(c>>16), low=(char)c; 164 for(i=bmpLength; 165 i<length && (high>array[i] || (high==array[i] && low>=array[i+1])); 166 i+=2) {} 167 168 /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ 169 return (((i+bmpLength)&2)!=0); 170 } 171 } 172 173 /** 174 * Returns the number of disjoint ranges of characters contained in 175 * the given serialized set. Ignores any strings contained in the 176 * set. 177 * @return a non-negative integer counting the character ranges 178 * contained in set 179 */ countRanges()180 public final int countRanges() { 181 return (bmpLength+(length-bmpLength)/2+1)/2; 182 } 183 184 private char array[] = new char[8]; 185 private int arrayOffset, bmpLength, length; 186 } 187