1 /**
2 *******************************************************************************
3 * Copyright (C) 2002-2010, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 package com.ibm.icu.dev.tool.translit;
8 
9 import java.io.FileOutputStream;
10 import java.io.OutputStreamWriter;
11 import java.util.Hashtable;
12 
13 import com.ibm.icu.impl.Utility;
14 import com.ibm.icu.lang.UCharacter;
15 import com.ibm.icu.text.Transliterator;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18 import com.ibm.icu.text.UnicodeSetIterator;
19 /**
20  * @author ram
21  *
22  * To change this generated comment edit the template variable "typecomment":
23  * Window>Preferences>Java>Templates.
24  * To enable and disable the creation of type comments go to
25  * Window>Preferences>Java>Code Generation.7F
26  */
27 public class WriteIndicCharts {
28 
main(String[] args)29     public static void main(String[] args){
30         writeIICharts();
31     }
32 
33 
34     static String header =  "<html>\n" +
35                             "    <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+
36                             "           Inter-Indic Transliteration Comparison chart"+
37                             "    </head>\n"+
38                             "    <body bgcolor=#FFFFFF>\n"+
39                             "         <table border=1 width=100% >\n"+
40                             "            <tr>\n"+
41                             "            <th width=9%>Inter-Indic</th>\n"+
42                             "            <th width=9%>Latin</th>\n"+
43                             "            <th width=9%>Devanagari</th>\n"+
44                             "            <th width=9%>Bengali</th>\n"+
45                             "            <th width=9%>Gurmukhi</th>\n"+
46                             "            <th width=9%>Gujarati</th>\n"+
47                             "            <th width=9%>Oriya</th>\n"+
48                             "            <th width=9%>Tamil</th>\n"+
49                             "            <th width=9%>Telugu</th>\n"+
50                             "            <th width=9%>Kannada</th>\n"+
51                             "            <th width=9%>Malayalam</th>\n"+
52                             "            </tr>\n";
53     static String footer =  "          </table>\n"+
54                             "    </body>\n" +
55                             "</html>\n";
56 
57     static UnicodeSet deva = new UnicodeSet("[:deva:]");
58     static UnicodeSet beng = new UnicodeSet("[:beng:]");
59     static UnicodeSet gujr = new UnicodeSet("[:gujr:]");
60     static UnicodeSet guru = new UnicodeSet("[:guru:]");
61     static UnicodeSet orya = new UnicodeSet("[:orya:]");
62     static UnicodeSet taml = new UnicodeSet("[:taml:]");
63     static UnicodeSet telu = new UnicodeSet("[:telu:]");
64     static UnicodeSet knda = new UnicodeSet("[:knda:]");
65     static UnicodeSet mlym = new UnicodeSet("[:mlym:]");
66     static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]");
67 
writeIICharts()68     public static void writeIICharts(){
69         try{
70             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
71             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
72             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
73             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
74             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
75             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
76             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
77             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
78             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
79             Transliterator t10 = Transliterator.getInstance("InterIndic-Latin");
80             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
81 
82             for(int i=0x00;i<=0x80;i++){
83                String[] arr =  new String[10];
84                arr[0]=UTF16.valueOf(i+ 0xE000);
85                table.put(UTF16.valueOf(i),arr);
86             }
87 
88             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
89 
90             os.write(header);
91 
92             writeIICharts(t9,0x0900,1);
93             writeIICharts(t1,0x0980,2);
94             writeIICharts(t2,0x0A00,3);
95             writeIICharts(t3,0x0A80,4);
96             writeIICharts(t4,0x0B00,5);
97             writeIICharts(t5,0x0B80,6);
98             writeIICharts(t6,0x0c00,7);
99             writeIICharts(t7,0x0C80,8);
100             writeIICharts(t8,0x0D00,9);
101 
102             for(int i=0x00;i<=0x80;i++){
103                 String[] temp = (String[])table.get(UTF16.valueOf(i));
104                 boolean write = false;
105                 for(int k=1;k<temp.length && temp[k]!=null;k++){
106                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 ||
107                        temp[k].indexOf(":UNASSIGNED")<0){
108                         write = true;
109                     }
110                 }
111                 if(write){
112                     os.write("        <tr>\n");
113                     for(int j=0; j<temp.length;j++){
114                         if(temp[j]!=null){
115                             boolean fallback=false;
116                             boolean unassigned=false;
117                             boolean unmapped = false;
118                             boolean consumed =false;
119                             String str = temp[j];
120 
121                             if(temp[j].indexOf(":FALLBACK")>=0){
122                                 str = temp[j].substring(0,temp[j].indexOf(":"));
123                                 fallback=true;
124                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
125                             }
126                             if(temp[j].indexOf(":UNASSIGNED")>=0){
127                                 str = temp[j].substring(0,temp[j].indexOf(":"));
128                                 unassigned=true;
129                             }
130 
131                             if(temp[j].indexOf(":UNMAPPED")>=0){
132                                 str = temp[j].substring(0,temp[j].indexOf(":"));
133                                 unmapped=true;
134                             }
135                             if(temp[j].indexOf(":CONSUMED")>=0){
136                                 str = temp[j].substring(0,temp[j].indexOf(":"));
137                                 consumed=true;
138                             }
139 
140                             String name;
141                             StringBuffer nameBuf=new StringBuffer();
142                             for(int f=0; f<str.length();f++){
143                                 if(f>0){ nameBuf.append("+");}
144                                 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f)));
145                             }
146                             name = nameBuf.toString();
147                             if(fallback){
148 
149                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
150                                     os.write("            <td  width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
151                                 }else{
152                                     os.write("            <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
153                                 }
154                             }else if(unmapped){
155                                 os.write("            <td bgcolor=#FF9999 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
156                             }else if(unassigned){
157                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
158                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
159                                 }else{
160                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
161                                 }
162                             }else if(consumed){
163                                  if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
164                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
165                                 }else{
166                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
167                                 }
168                             }else if(name.indexOf("private")!=-1){
169                                 String s = t10.transliterate(str);
170                                 os.write("            <td width=9% bgcolor=#FFBBBB  align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
171                                 if(!s.equals(str)){
172                                     os.write("            <td width=9%  bgcolor=#CCEEDD align=center>"+s +"</td>");
173                                 }else{
174                                     os.write("            <td width=9% bgcolor=#CCEEDD align=center>&nbsp;</td>");
175                                 }
176                             }else{
177                                os.write("            <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
178                             }
179                         }else{
180                              os.write("           <td width=9% >&nbsp</td>\n");
181                         }
182                     }
183                     os.write("        </tr>\n");
184                 }
185             }
186             os.write(footer);
187             os.close();
188         }catch( Exception e){
189             e.printStackTrace();
190         }
191     }
writeCharts()192     public static void writeCharts(){
193         try{
194             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
195             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
196             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
197             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
198             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
199             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
200             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
201             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
202             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
203 
204             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
205 
206             for(int i=0x0900;i<=0x097F;i++){
207                String[] arr =  new String[10];
208                arr[0]=UTF16.valueOf((i&0xFF) + 0xE000);
209                table.put(UTF16.valueOf(i),arr);
210             }
211 
212             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
213 
214             os.write(header);
215             /*
216             writeCharts(t1,beng,1);
217             writeCharts(t2,guru,2);
218             writeCharts(t3,gujr,3);
219             writeCharts(t4,orya,4);
220             writeCharts(t5,taml,5);
221             writeCharts(t6,telu,6);
222             writeCharts(t7,knda,7);
223             writeCharts(t8,mlym,8);
224             */
225             /*
226             writeCharts(t9,0x0900,1);
227             writeCharts(t1,0x0980,2);
228             writeCharts(t2,0x0A00,3);
229             writeCharts(t3,0x0A80,4);
230             writeCharts(t4,0x0B00,5);
231             writeCharts(t5,0x0B80,6);
232             writeCharts(t6,0x0c00,7);
233             writeCharts(t7,0x0C80,8);
234             writeCharts(t8,0x0D00,9);
235             */
236             writeIICharts(t9,0x0900,1);
237             writeIICharts(t1,0x0980,2);
238             writeIICharts(t2,0x0A00,3);
239             writeIICharts(t3,0x0A80,4);
240             writeIICharts(t4,0x0B00,5);
241             writeIICharts(t5,0x0B80,6);
242             writeIICharts(t6,0x0c00,7);
243             writeIICharts(t7,0x0C80,8);
244             writeIICharts(t8,0x0D00,9);
245             for(int i=0x0900;i<=0x097F;i++){
246                 String[] temp = (String[])table.get(UTF16.valueOf(i));
247                 boolean write = false;
248                 for(int k=1;k<temp.length;k++){
249                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){
250                         write = true;
251                     }
252                 }
253                 if(write){
254                     os.write("        <tr>\n");
255                     for(int j=0; j<temp.length;j++){
256                         if(temp[j]!=null){
257                             boolean fallback=false;
258                             String str = temp[j];
259 
260                             if(temp[j].indexOf(":FALLBACK")>=0){
261                                 str = temp[j].substring(0,temp[j].indexOf(":"));
262                                 fallback=true;
263                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
264                             }
265                             String name = UCharacter.getExtendedName(UTF16.charAt(str,0));
266                             if(fallback){
267                                 os.write("            <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
268                             }else if(name.indexOf("unassigned")!=-1){
269                                 os.write("            <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
270                             }else if(name.indexOf("private")!=-1){
271 
272 
273                                 os.write("            <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
274 
275                             }else{
276                                os.write("            <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
277                             }
278                         }else{
279                              os.write("           <td>&nbsp</td>\n");
280                         }
281                     }
282                     os.write("        </tr>\n");
283                 }
284             }
285             os.write(footer);
286             os.close();
287         }catch( Exception e){
288             e.printStackTrace();
289         }
290     }
291     static Hashtable table = new Hashtable();
getKey(int cp)292     static String getKey(int cp){
293         int delta = cp & 0xFF;
294         delta-= (delta>0x7f)? 0x80 : 0;
295         //delta+=0x0900;
296         return UTF16.valueOf(delta);
297     }
298 
writeCharts(Transliterator trans, int start, int index)299     public static void writeCharts(Transliterator trans, int start, int index){
300 
301         Transliterator inverse = trans.getInverse();
302         for(int i=0;i<=0x7f;i++){
303             String cp = UTF16.valueOf(start+i);
304             String s1 = inverse.transliterate(cp);
305             String s2 = trans.transliterate(s1);
306 
307             String[] arr = (String[])table.get(getKey(start+i));
308             if(cp.equals(s2)){
309                 arr[index] = s1;
310             }else{
311                 arr[index] = s1 + ":FALLBACK";
312             }
313         }
314     }
315 
writeIICharts(Transliterator trans,int start, int index)316     public static void writeIICharts(Transliterator trans,int start, int index){
317 
318         Transliterator inverse = trans.getInverse();
319         UnicodeSetIterator iter = new UnicodeSetIterator(inter);
320 
321         while(iter.next()){
322             String cp =UTF16.valueOf(iter.codepoint);
323             String s1 = trans.transliterate(cp);
324             String s2 = inverse.transliterate(s1);
325             String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF));
326             if(cp.equals(s1)){
327                 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED";
328             }else if(cp.equals(s2)){
329                 arr[index] = s1;
330             }else if(s1.equals(s2)){
331                 if(s1.equals("")){
332                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
333                 }else{
334                     arr[index] = s1+ ":FALLBACK";
335                 }
336             } else{
337                 if(s2.equals("")){
338                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
339                 }else{
340                     arr[index] = s1+ ":FALLBACK";
341                 }
342             }
343         }
344     }
writeCharts(Transliterator trans, UnicodeSet target, int index)345     public static void writeCharts(Transliterator trans, UnicodeSet target, int index){
346         UnicodeSetIterator tIter = new UnicodeSetIterator(target);
347         Transliterator inverse = trans.getInverse();
348         while(tIter.next()){
349             String cp = UTF16.valueOf(tIter.codepoint);
350             String s1 = inverse.transliterate(cp);
351             String s2 = trans.transliterate(s1);
352 
353             String[] arr = (String[])table.get(getKey(tIter.codepoint));
354             if(cp.equals(s2)){
355                 arr[index] = cp;
356             }else{
357                 arr[index] = cp + ":FALLBACK";
358             }
359         }
360     }
361 }
362 
363