1 /* 2 ********************************************************************** 3 * Copyright (c) 2005-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.posix; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.nio.ByteBuffer; 15 import java.nio.charset.Charset; 16 import java.util.ArrayList; 17 import java.util.Collections; 18 import java.util.List; 19 import java.util.ListIterator; 20 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.icu.SimpleConverter; 23 24 import com.ibm.icu.dev.tool.UOption; 25 import com.ibm.icu.impl.Utility; 26 import com.ibm.icu.text.UnicodeSet; 27 import com.ibm.icu.text.UnicodeSetIterator; 28 29 /** 30 * Class to generate POSIX format charmap 31 * 32 * @author John C. Emmons 33 */ 34 35 public class GenerateCharmap { 36 37 private static final int DESTDIR = 2, 38 UNICODESET = 3, 39 CHARSET = 4; 40 41 private static final UOption[] options = { 42 UOption.HELP_H(), 43 UOption.HELP_QUESTION_MARK(), 44 UOption.create("destdir", 'd', UOption.REQUIRES_ARG).setDefault("."), 45 UOption.create("unicodeset", 'u', UOption.REQUIRES_ARG).setDefault("[\\u0000-\\U0010FFFF]"), 46 UOption.create("charset", 'c', UOption.REQUIRES_ARG).setDefault("UTF-8"), 47 }; 48 main(String[] args)49 public static void main(String[] args) throws IOException { 50 UOption.parseArgs(args, options); 51 String codeset = options[CHARSET].value; 52 GenerateCharmap gp = new GenerateCharmap(new UnicodeSet(options[UNICODESET].value), 53 Charset.forName(codeset), codeset); 54 PrintWriter out = FileUtilities.openUTF8Writer(options[DESTDIR].value + File.separator, codeset + ".cm"); 55 gp.write(out); 56 out.close(); 57 } 58 59 public class CharmapLine implements Comparable<Object> { 60 public String CharacterValue; 61 public String CharacterName; 62 public String CharacterAltName; 63 CharmapLine(String Name, String AltName, String Value)64 public CharmapLine(String Name, String AltName, String Value) { 65 CharacterName = Name; 66 CharacterAltName = AltName; 67 CharacterValue = Value; 68 if (Name.equals(AltName)) 69 CharacterAltName = ""; 70 } 71 compareTo(Object o)72 public int compareTo(Object o) { 73 CharmapLine c = (CharmapLine) o; 74 return (CharacterValue.compareTo(c.CharacterValue)); 75 } 76 } 77 78 UnicodeSet chars; 79 Charset cs; 80 String codeset; 81 GenerateCharmap(UnicodeSet chars, Charset cs, String codeset)82 public GenerateCharmap(UnicodeSet chars, Charset cs, String codeset) { 83 this.cs = cs; 84 if (cs != null && !cs.name().equals("UTF-8")) { 85 UnicodeSet csset = new SimpleConverter(cs).getCharset(); 86 chars = new UnicodeSet(chars).retainAll(csset); 87 } 88 this.chars = chars; 89 this.codeset = codeset; 90 91 } 92 write(PrintWriter out)93 public void write(PrintWriter out) { 94 out.println("######################"); 95 out.println("# POSIX charmap "); 96 out.println("# Generated automatically from the Unicode Character Database and Common Locale Data Repository"); 97 out.println("# see http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html"); 98 out.println("# charset:\t" + codeset); 99 out.println("######################"); 100 out.println("#################################################################################################"); 101 out.println("# Copyright 1991-2011 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in #"); 102 out.println("# http://www.unicode.org/copyright.html. #"); 103 out.println("# #"); 104 out.println("# Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode #"); 105 out.println("# data files and any associated documentation (the \"Data Files\") or Unicode software and any #"); 106 out.println("# associated documentation (the \"Software\") to deal in the Data Files or Software without #"); 107 out.println("# restriction, including without limitation the rights to use, copy, modify, merge, publish, #"); 108 out.println("# distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom #"); 109 out.println("# the Data Files or Software are furnished to do so, provided that (a) the above copyright #"); 110 out.println("# notice(s) and this permission notice appear with all copies of the Data Files or Software, #"); 111 out.println("# (b) both the above copyright notice(s) and this permission notice appear in associated #"); 112 out.println("# documentation, and (c) there is clear notice in each modified Data File or in the Software as #"); 113 out.println("# well as in the documentation associated with the Data File(s) or Software that the data or #"); 114 out.println("# software has been modified. #"); 115 out.println("# #"); 116 out.println("# THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #"); 117 out.println("# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A #"); 118 out.println("# PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT #"); 119 out.println("# HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR #"); 120 out.println("# CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, #"); 121 out.println("# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN #"); 122 out.println("# CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. #"); 123 out.println("#################################################################################################"); 124 out.println(); 125 doCharmap(out, cs); 126 out.println("######################"); 127 out.println(); 128 } 129 130 /** 131 * @param out 132 */ doCharmap(PrintWriter out, Charset cs)133 private void doCharmap(PrintWriter out, Charset cs) { 134 135 // print character types, restricted to the charset 136 int LongestCharNameLength = 0; 137 int LongestCharValueLength = 0; 138 UnicodeSet us = new UnicodeSet("[^[:Noncharacter_Code_Point:][:Cn:][:Cs:]]").retainAll(chars); 139 List<CharmapLine> cml = new ArrayList<CharmapLine>(); 140 CharmapLine current; 141 for (UnicodeSetIterator it = new UnicodeSetIterator(us); it.next();) { 142 String Name = POSIXUtilities.POSIXCharFullName(it.getString()); 143 String AltName = POSIXUtilities.POSIXCharName(it.getString()); 144 String Value = getCodepointValue(it.getString(), cs); 145 current = new CharmapLine(Name, AltName, Value); 146 cml.add(current); 147 if (current.CharacterName.length() > LongestCharNameLength) 148 LongestCharNameLength = current.CharacterName.length(); 149 if (current.CharacterValue.length() > LongestCharValueLength) 150 LongestCharValueLength = current.CharacterValue.length(); 151 } 152 153 Collections.sort(cml); 154 155 out.print("<code_set_name> \""); 156 out.print(codeset); 157 out.println("\""); 158 out.println("<mb_cur_min> 1"); 159 out.print("<mb_cur_max> "); 160 out.print(LongestCharValueLength / 4); 161 out.println(); 162 out.println(); 163 out.println("CHARMAP"); 164 165 for (ListIterator<CharmapLine> li = cml.listIterator(); li.hasNext();) { 166 current = li.next(); 167 168 out.print(current.CharacterName); 169 for (int i = LongestCharNameLength + 1; i > current.CharacterName.length(); i--) 170 out.print(" "); 171 out.println(current.CharacterValue); 172 if (current.CharacterAltName.length() > 0) { 173 out.print(current.CharacterAltName); 174 for (int i = LongestCharNameLength + 1; i > current.CharacterAltName.length(); i--) 175 out.print(" "); 176 out.println(current.CharacterValue); 177 } 178 } 179 180 out.println(); 181 out.println("END CHARMAP"); 182 out.println(); 183 184 } 185 getCodepointValue(String cp, Charset cs)186 private String getCodepointValue(String cp, Charset cs) { 187 StringBuffer result = new StringBuffer(); 188 ByteBuffer bb = cs.encode(cp); 189 int i; 190 while (bb.hasRemaining()) { 191 result.append("\\x"); 192 byte b = bb.get(); 193 if (b < 0) 194 i = b + 256; 195 else 196 i = b; 197 198 result.append(Utility.hex(i, 2)); 199 } 200 return result.toString(); 201 } 202 } 203