1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Arrays;
15 
16 /**
17 * <p>Internal reader class for ICU data file uname.dat containing
18 * Unicode codepoint name data.</p>
19 * <p>This class simply reads unames.icu, authenticates that it is a valid
20 * ICU data file and split its contents up into blocks of data for use in
21 * <a href=UCharacterName.html>com.ibm.icu.impl.UCharacterName</a>.
22 * </p>
23 * <p>unames.icu which is in big-endian format is jared together with this
24 * package.</p>
25 * @author Syn Wee Quek
26 * @since release 2.1, February 1st 2002
27 */
28 
29 final class UCharacterNameReader implements ICUBinary.Authenticate
30 {
31     // public methods ----------------------------------------------------
32 
33     @Override
isDataVersionAcceptable(byte version[])34     public boolean isDataVersionAcceptable(byte version[])
35     {
36         return version[0] == 1;
37     }
38 
39     // protected constructor ---------------------------------------------
40 
41     /**
42     * <p>Protected constructor.</p>
43     * @param bytes ICU uprop.dat file buffer
44     * @exception IOException throw if data file fails authentication
45     */
UCharacterNameReader(ByteBuffer bytes)46     protected UCharacterNameReader(ByteBuffer bytes) throws IOException
47     {
48         ICUBinary.readHeader(bytes, DATA_FORMAT_ID_, this);
49         m_byteBuffer_ = bytes;
50     }
51 
52     // protected methods -------------------------------------------------
53 
54     /**
55     * Read and break up the stream of data passed in as arguments
56     * and fills up UCharacterName.
57     * If unsuccessful false will be returned.
58     * @param data instance of datablock
59     * @exception IOException thrown when there's a data error.
60     */
read(UCharacterName data)61     protected void read(UCharacterName data) throws IOException
62     {
63         // reading index
64         m_tokenstringindex_ = m_byteBuffer_.getInt();
65         m_groupindex_       = m_byteBuffer_.getInt();
66         m_groupstringindex_ = m_byteBuffer_.getInt();
67         m_algnamesindex_    = m_byteBuffer_.getInt();
68 
69         // reading tokens
70         int count = m_byteBuffer_.getChar();
71         char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
72         int size = m_groupindex_ - m_tokenstringindex_;
73         byte tokenstr[] = new byte[size];
74         m_byteBuffer_.get(tokenstr);
75         data.setToken(token, tokenstr);
76 
77         // reading the group information records
78         count = m_byteBuffer_.getChar();
79         data.setGroupCountSize(count, GROUP_INFO_SIZE_);
80         count *= GROUP_INFO_SIZE_;
81         char group[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
82 
83         size = m_algnamesindex_ - m_groupstringindex_;
84         byte groupstring[] = new byte[size];
85         m_byteBuffer_.get(groupstring);
86 
87         data.setGroup(group, groupstring);
88 
89         count = m_byteBuffer_.getInt();
90         UCharacterName.AlgorithmName alg[] =
91                                  new UCharacterName.AlgorithmName[count];
92 
93         for (int i = 0; i < count; i ++)
94         {
95             UCharacterName.AlgorithmName an = readAlg();
96             if (an == null) {
97                 throw new IOException("unames.icu read error: Algorithmic names creation error");
98             }
99             alg[i] = an;
100         }
101         data.setAlgorithm(alg);
102     }
103 
104     /**
105     * <p>Checking the file for the correct format.</p>
106     * @param dataformatid
107     * @param dataformatversion
108     * @return true if the file format version is correct
109     */
110     ///CLOVER:OFF
authenticate(byte dataformatid[], byte dataformatversion[])111     protected boolean authenticate(byte dataformatid[],
112                                    byte dataformatversion[])
113     {
114         return Arrays.equals(
115                 ICUBinary.getVersionByteArrayFromCompactInt(DATA_FORMAT_ID_),
116                 dataformatid) &&
117                isDataVersionAcceptable(dataformatversion);
118     }
119     ///CLOVER:ON
120 
121     // private variables -------------------------------------------------
122 
123     /**
124     * Byte buffer for names
125     */
126     private ByteBuffer m_byteBuffer_;
127     /**
128     * Size of the group information block in number of char
129     */
130     private static final int GROUP_INFO_SIZE_ = 3;
131 
132     /**
133     * Index of the offset information
134     */
135     private int m_tokenstringindex_;
136     private int m_groupindex_;
137     private int m_groupstringindex_;
138     private int m_algnamesindex_;
139 
140     /**
141     * Size of an algorithmic name information group
142     * start code point size + end code point size + type size + variant size +
143     * size of data size
144     */
145     private static final int ALG_INFO_SIZE_ = 12;
146 
147     /**
148     * File format id that this class understands.
149     */
150     private static final int DATA_FORMAT_ID_ = 0x756E616D;
151 
152     // private methods ---------------------------------------------------
153 
154     /**
155     * Reads an individual record of AlgorithmNames
156     * @return an instance of AlgorithNames if read is successful otherwise null
157     * @exception IOException thrown when file read error occurs or data is corrupted
158     */
readAlg()159     private UCharacterName.AlgorithmName readAlg() throws IOException
160     {
161         UCharacterName.AlgorithmName result =
162                                        new UCharacterName.AlgorithmName();
163         int rangestart = m_byteBuffer_.getInt();
164         int rangeend   = m_byteBuffer_.getInt();
165         byte type      = m_byteBuffer_.get();
166         byte variant   = m_byteBuffer_.get();
167         if (!result.setInfo(rangestart, rangeend, type, variant)) {
168             return null;
169         }
170 
171         int size = m_byteBuffer_.getChar();
172         if (type == UCharacterName.AlgorithmName.TYPE_1_)
173         {
174             char factor[] = ICUBinary.getChars(m_byteBuffer_, variant, 0);
175 
176             result.setFactor(factor);
177             size -= (variant << 1);
178         }
179 
180         StringBuilder prefix = new StringBuilder();
181         char c = (char)(m_byteBuffer_.get() & 0x00FF);
182         while (c != 0)
183         {
184             prefix.append(c);
185             c = (char)(m_byteBuffer_.get() & 0x00FF);
186         }
187 
188         result.setPrefix(prefix.toString());
189 
190         size -= (ALG_INFO_SIZE_ + prefix.length() + 1);
191 
192         if (size > 0)
193         {
194             byte string[] = new byte[size];
195             m_byteBuffer_.get(string);
196             result.setFactorString(string);
197         }
198         return result;
199     }
200 }
201