1 package org.unicode.cldr.icu;
2 
3 /**
4  * This Class is the Java representation of the ICU4C structure UDataInfo which can
5  * be found in <I>$icu4c_root</I>/source/common/unicode/udata.h
6  *
7  * <p>
8  * This class is used by LDML2ICUBinaryWriter to store information that must be written in the ICU Binary format.
9  *
10  * Note that if this data structure ever grows, the getSize() method must be updated.
11  *
12  * @author Brian Rower - June 2008
13  *
14  */
15 public class UDataInfo {
16 
17     /**
18      * Use to signify that this data is in Big Endian form.
19      * Currently the only mode supported in Java is Big Endian.
20      */
21     public static final byte BIGENDIAN = 1;
22 
23     /**
24      * charsetFamily is equal to this value when the platform is an ASCII based platform.
25      * Currently the only mode supported in Java is ASCII
26      * This mirrors the ICU4C version in <I>$icu4c_root</I>/source/common/unicode/utypes.h
27      */
28     public static final byte ASCII_FAMILY = 0;
29 
30     /**
31      * This is the value for setting sizeofUChar. Currently it is 16 bits (2 bytes).
32      * UChar is currently defined in <I>$icu4c_root</I>/source/common/unicode/umachine.h
33      */
34     public static final byte SIZE_OF_UCHAR = 2;
35 
36     /**
37      * This field stores the size of this data structure in memory.
38      * Add up the size of each part of it.
39      */
40     public short size;
41 
42     /**
43      * This field is currently unused, set it to zero.
44      */
45     public short reservedWord;
46 
47     /**
48      * This field is used to signify the Endian mode of a system.
49      * Choose from the static final int's provided in this class.
50      * In Java, there is only one possibility: Big Endian.
51      */
52     public byte isBigEndian;
53 
54     /**
55      * This field stores the character set which is being used.
56      */
57     public byte charsetFamily;
58 
59     /**
60      * Size of the UChar structure in C.
61      */
62     public byte sizeofUChar;
63 
64     /**
65      * This field is currently unused, set it to zero.
66      */
67     public byte reservedByte;
68 
69     /**
70      * This field stores an identifier for the data format.
71      * Array should be of length 4.
72      */
73     public byte[] dataFormat;
74 
75     /**
76      * This field stores the Format version. Array should be of length 4.<br>
77      * [0] = major<br>
78      * [1] = minor<br>
79      * [2] = milli<br>
80      * [3] = micro<br>
81      */
82     public byte[] formatVersion;
83 
84     /**
85      * This field stores the data version. Array should be of length 4.<br>
86      * [0] = major<br>
87      * [1] = minor<br>
88      * [2] = milli<br>
89      * [3] = micro<br>
90      */
91     public byte[] dataVersion;
92 
93     class IncorrectArrayLengthException extends Exception {
94         /**
95          *
96          */
97         private static final long serialVersionUID = -3238261375903639881L;
98 
IncorrectArrayLengthException(String message)99         IncorrectArrayLengthException(String message) {
100             super(message);
101         }
102     }
103 
UDataInfo(short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar, byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion)104     public UDataInfo(short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar,
105         byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion)
106         throws IncorrectArrayLengthException {
107         if (dataFormat.length != 4) {
108             throw new IncorrectArrayLengthException("The byte array 'dataFormat' must be of length 4.");
109         }
110         if (formatVersion.length != 4) {
111             throw new IncorrectArrayLengthException("The byte array 'formatVersion' must be of length 4.");
112         }
113         if (dataVersion.length != 4) {
114             throw new IncorrectArrayLengthException("The byte array 'dataVersion' must be of length 4.");
115         }
116         this.size = size;
117         this.reservedWord = reservedWord;
118         this.isBigEndian = isBigEndian;
119         this.charsetFamily = charsetFamily;
120         this.sizeofUChar = sizeofUChar;
121         this.reservedByte = reservedByte;
122         this.dataFormat = dataFormat;
123         this.formatVersion = formatVersion;
124         this.dataVersion = dataVersion;
125     }
126 
127     /**
128      * This method returns the size that this structure will occupy when written to binary file.
129      * byte = 1 byte <Br>
130      * short = 2 bytes<Br>
131      * int = 4 bytes<Br>
132      * long = 8 bytes<Br>
133      * float = 4 bytes<Br>
134      * double = 8 bytes<br>
135      * char = 2 bytes<br>
136      *
137      * @return The number of bytes that UDataInfo occupies
138      */
getSize()139     public static short getSize() {
140         /*
141          * number of short elements = 2
142          * number of byte elements = 4
143          * number of byte array elements of length 4 = 3
144          * 2*2 + 4*1 + 3*4 = 4 + 4 + 12 = 20 bytes
145          */
146         return 20;
147     }
148 
149     /**
150      * Returns a byte array representing the UDataStructure so that it can be written byte by byte.
151      *
152      * @returns a byte array of the contents of this UDataStructure.
153      */
getByteArray()154     public byte[] getByteArray() {
155         // This size may change, see get size method above.
156         byte[] b = new byte[20];
157         byte[] sizeBytes = shortToBytes(size);
158 
159         // write the size
160         b[0] = sizeBytes[0];
161         b[1] = sizeBytes[1];
162 
163         // write the reserved word (a bunch of zeros)
164         b[2] = 0;
165         b[3] = 0;
166 
167         // write isBigEndian
168         b[4] = isBigEndian;
169 
170         // write charsetFamily
171         b[5] = charsetFamily;
172 
173         // write sizeofUChar
174         b[6] = sizeofUChar;
175 
176         // write reserved byte (some zeros)
177         b[7] = 0;
178 
179         // write the dataFormat
180         b[8] = dataFormat[0];
181         b[9] = dataFormat[1];
182         b[10] = dataFormat[2];
183         b[11] = dataFormat[3];
184 
185         // write the formatVersion
186         b[12] = formatVersion[0];
187         b[13] = formatVersion[1];
188         b[14] = formatVersion[2];
189         b[15] = formatVersion[3];
190 
191         // write the dataVersion
192         b[16] = dataVersion[0];
193         b[17] = dataVersion[1];
194         b[18] = dataVersion[2];
195         b[19] = dataVersion[3];
196 
197         return b;
198     }
199 
200     /**
201      * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte.
202      * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return:
203      * [0] = 0110 0000 or 0x60
204      * [1] = 0110 1101 or 0x6D
205      */
shortToBytes(short x)206     private static byte[] shortToBytes(short x) {
207         byte[] b = new byte[2];
208         byte mask = (byte) 0xFF;
209         b[1] = (byte) (x & mask); // bitwise and with the lower byte
210         b[0] = (byte) ((x >>> 8) & mask); // shift four bits to the right and fill with zeros, and then bitwise and with
211         // the lower byte
212         return b;
213     }
214 }
215