1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2005, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.tool.compression;
10 
11 
12 /**
13  * Utility class to generate the tables used by the SCSU interface and
14  * the UnicodeCompressor class.
15  *
16  * @author Stephen F. Booth
17  * @version 1.0 08 Mar 99
18  */
19 class CompressionTableGenerator
20 {
21     // duplicate of constants in SCSU
22 
23     final static int LATININDEX                  = 0xF9;
24     final static int IPAEXTENSIONINDEX           = 0xFA;
25     final static int GREEKINDEX                  = 0xFB;
26     final static int ARMENIANINDEX               = 0xFC;
27     final static int HIRAGANAINDEX               = 0xFD;
28     final static int KATAKANAINDEX               = 0xFE;
29     final static int HALFWIDTHKATAKANAINDEX      = 0xFF;
30 
31     final static int SDEFINEX                    = 0x0B;
32     final static int SRESERVED                   = 0x0C;  // reserved value
33     final static int SQUOTEU                     = 0x0E;
34     final static int SCHANGEU                    = 0x0F;
35 
36     final static int SQUOTE0                     = 0x01;
37     final static int SQUOTE1                     = 0x02;
38     final static int SQUOTE2                     = 0x03;
39     final static int SQUOTE3                     = 0x04;
40     final static int SQUOTE4                     = 0x05;
41     final static int SQUOTE5                     = 0x06;
42     final static int SQUOTE6                     = 0x07;
43     final static int SQUOTE7                     = 0x08;
44 
45     final static int SCHANGE0                    = 0x10;
46     final static int SCHANGE1                    = 0x11;
47     final static int SCHANGE2                    = 0x12;
48     final static int SCHANGE3                    = 0x13;
49     final static int SCHANGE4                    = 0x14;
50     final static int SCHANGE5                    = 0x15;
51     final static int SCHANGE6                    = 0x16;
52     final static int SCHANGE7                    = 0x17;
53 
54     final static int SDEFINE0                    = 0x18;
55     final static int SDEFINE1                    = 0x19;
56     final static int SDEFINE2                    = 0x1A;
57     final static int SDEFINE3                    = 0x1B;
58     final static int SDEFINE4                    = 0x1C;
59     final static int SDEFINE5                    = 0x1D;
60     final static int SDEFINE6                    = 0x1E;
61     final static int SDEFINE7                    = 0x1F;
62 
63     //==========================
64     // Unicode mode tags
65     //==========================
66     final static int UCHANGE0                    = 0xE0;
67     final static int UCHANGE1                    = 0xE1;
68     final static int UCHANGE2                    = 0xE2;
69     final static int UCHANGE3                    = 0xE3;
70     final static int UCHANGE4                    = 0xE4;
71     final static int UCHANGE5                    = 0xE5;
72     final static int UCHANGE6                    = 0xE6;
73     final static int UCHANGE7                    = 0xE7;
74 
75     final static int UDEFINE0                    = 0xE8;
76     final static int UDEFINE1                    = 0xE9;
77     final static int UDEFINE2                    = 0xEA;
78     final static int UDEFINE3                    = 0xEB;
79     final static int UDEFINE4                    = 0xEC;
80     final static int UDEFINE5                    = 0xED;
81     final static int UDEFINE6                    = 0xEE;
82     final static int UDEFINE7                    = 0xEF;
83 
84     final static int UQUOTEU                     = 0xF0;
85     final static int UDEFINEX                    = 0xF1;
86     final static int URESERVED                   = 0xF2;  // reserved value
87 
88     final static int BLOCKSIZE = 0xFF;
89 
90     /**
91      * Generate the table used as sOffsetTable in SCSU.
92      * This table contains preformed indices so we can do array lookups
93      * instead of calculations for speed during decompression.
94      */
printOffsetTable()95     static void printOffsetTable()
96     {
97         int     i           = 0;
98     int []    offsetTable = new int [ BLOCKSIZE + 1 ];
99 
100         // 0x00 is reserved
101 
102         // half blocks from U+0080 to U+3380
103         for( i = 0x01; i < 0x68; i++ )
104             offsetTable[i] = i * 0x80;
105 
106         // half blocks from U+E000 to U+FF80
107         for( i = 0x68; i < 0xA8; i++ )
108             offsetTable[i] = (i * 0x80) + 0xAC00;
109 
110         // 0xA8..0xF8 is reserved
111 
112         offsetTable[ LATININDEX ] = 0x00C0;
113         offsetTable[ IPAEXTENSIONINDEX ] = 0x0250;
114         offsetTable[ GREEKINDEX ] = 0x0370;
115         offsetTable[ ARMENIANINDEX ] = 0x0530;
116         offsetTable[ HIRAGANAINDEX ] = 0x3040;
117         offsetTable[ KATAKANAINDEX ] = 0x30A0;
118         offsetTable[ HALFWIDTHKATAKANAINDEX ] = 0xFF60;
119 
120         // dump the generated table
121     System.out.println("static int [] sOffsetTable = {");
122         for(i = 0; i < offsetTable.length - 1; i++)
123             System.out.print("0x" + Integer.toHexString(offsetTable[i])
124                  + ", ");
125         for(i = offsetTable.length - 1; i < offsetTable.length; i++)
126             System.out.print("0x" + Integer.toHexString(offsetTable[i]));
127         System.out.println();
128         System.out.println("};");
129     }
130 
131     /**
132      * Generate the table used as sSingleTagTable in UnicodeCompressor.
133      * This table contains boolean values indicating if a byte is a
134      * single-byte mode tag.
135      */
printSingleTagTable()136     static void printSingleTagTable()
137     {
138         int        i              = 0;
139     boolean [] singleTagTable = new boolean  [ BLOCKSIZE + 1 ];
140 
141         for( i = 0x00; i <= BLOCKSIZE; i++ ) {
142             switch( i ) {
143 
144         case SQUOTEU:  case SCHANGEU:
145         case SDEFINEX: case SRESERVED:
146         case SQUOTE0:  case SQUOTE1:
147         case SQUOTE2:  case SQUOTE3:
148         case SQUOTE4:  case SQUOTE5:
149         case SQUOTE6:  case SQUOTE7:
150         case SCHANGE0: case SCHANGE1:
151         case SCHANGE2: case SCHANGE3:
152         case SCHANGE4: case SCHANGE5:
153         case SCHANGE6: case SCHANGE7:
154         case SDEFINE0: case SDEFINE1:
155         case SDEFINE2: case SDEFINE3:
156         case SDEFINE4: case SDEFINE5:
157         case SDEFINE6: case SDEFINE7:
158         singleTagTable[i] = true;
159                 break;
160 
161         default:
162         singleTagTable[i] = false;
163                 break;
164             }
165         }
166 
167         // dump the generated table
168         System.out.println("private static boolean [] sSingleTagTable = {");
169         for(i = 0; i < singleTagTable.length - 1; i++)
170             System.out.print(singleTagTable[i] + ", ");
171         for(i = singleTagTable.length - 1; i < singleTagTable.length; i++)
172             System.out.print(singleTagTable[i]);
173         System.out.println();
174         System.out.println("};");
175     }
176 
177 
178     /**
179      * Generate the table used as sUnicodeTagTable in
180      * This table contains boolean values indicating if a byte is a
181      * unicode mode tag.
182      */
printUnicodeTagTable()183     static void printUnicodeTagTable()
184     {
185         int        i               = 0;
186     boolean [] unicodeTagTable = new boolean  [ BLOCKSIZE + 1 ];
187 
188         for( i = 0x00; i <= BLOCKSIZE; i++ ) {
189             switch( i ) {
190         case UQUOTEU:  case UDEFINEX:
191         case URESERVED:
192         case UCHANGE0: case UCHANGE1:
193         case UCHANGE2: case UCHANGE3:
194         case UCHANGE4: case UCHANGE5:
195         case UCHANGE6: case UCHANGE7:
196         case UDEFINE0: case UDEFINE1:
197         case UDEFINE2: case UDEFINE3:
198         case UDEFINE4: case UDEFINE5:
199         case UDEFINE6: case UDEFINE7:
200         unicodeTagTable[i] = true;
201                 break;
202 
203         default:
204         unicodeTagTable[i] = false;
205                 break;
206             }
207         }
208 
209         // dump the generated table
210         System.out.println("private static boolean [] sUnicodeTagTable = {");
211         for(i = 0; i < unicodeTagTable.length - 1; i++)
212             System.out.print(unicodeTagTable[i] + ", ");
213         for(i = unicodeTagTable.length - 1; i < unicodeTagTable.length; i++)
214             System.out.print(unicodeTagTable[i]);
215         System.out.println();
216         System.out.println("};");
217     }
218 
main(String[] argv)219     public static void main(String[] argv)
220     {
221         printOffsetTable();
222         printSingleTagTable();
223         printUnicodeTagTable();
224     }
225 }
226