1 /* 2 * Copyright (c) 1996, 2000, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 41 import java.lang.Character; 42 43 /** 44 * Utility class for normalizing and merging patterns for collation. 45 * This is to be used with MergeCollation for adding patterns to an 46 * existing rule table. 47 * @see MergeCollation 48 * @author Mark Davis, Helena Shih 49 */ 50 51 class PatternEntry { 52 /** 53 * Gets the current extension, quoted 54 */ appendQuotedExtension(StringBuffer toAddTo)55 public void appendQuotedExtension(StringBuffer toAddTo) { 56 appendQuoted(extension,toAddTo); 57 } 58 59 /** 60 * Gets the current chars, quoted 61 */ appendQuotedChars(StringBuffer toAddTo)62 public void appendQuotedChars(StringBuffer toAddTo) { 63 appendQuoted(chars,toAddTo); 64 } 65 66 /** 67 * WARNING this is used for searching in a Vector. 68 * Because Vector.indexOf doesn't take a comparator, 69 * this method is ill-defined and ignores strength. 70 */ equals(Object obj)71 public boolean equals(Object obj) { 72 if (obj == null) return false; 73 PatternEntry other = (PatternEntry) obj; 74 boolean result = chars.equals(other.chars); 75 return result; 76 } 77 hashCode()78 public int hashCode() { 79 return chars.hashCode(); 80 } 81 82 /** 83 * For debugging. 84 */ toString()85 public String toString() { 86 StringBuffer result = new StringBuffer(); 87 addToBuffer(result, true, false, null); 88 return result.toString(); 89 } 90 91 /** 92 * Gets the strength of the entry. 93 */ getStrength()94 final int getStrength() { 95 return strength; 96 } 97 98 /** 99 * Gets the expanding characters of the entry. 100 */ getExtension()101 final String getExtension() { 102 return extension; 103 } 104 105 /** 106 * Gets the core characters of the entry. 107 */ getChars()108 final String getChars() { 109 return chars; 110 } 111 112 // ===== privates ===== 113 addToBuffer(StringBuffer toAddTo, boolean showExtension, boolean showWhiteSpace, PatternEntry lastEntry)114 void addToBuffer(StringBuffer toAddTo, 115 boolean showExtension, 116 boolean showWhiteSpace, 117 PatternEntry lastEntry) 118 { 119 if (showWhiteSpace && toAddTo.length() > 0) 120 if (strength == Collator.PRIMARY || lastEntry != null) 121 toAddTo.append('\n'); 122 else 123 toAddTo.append(' '); 124 if (lastEntry != null) { 125 toAddTo.append('&'); 126 if (showWhiteSpace) 127 toAddTo.append(' '); 128 lastEntry.appendQuotedChars(toAddTo); 129 appendQuotedExtension(toAddTo); 130 if (showWhiteSpace) 131 toAddTo.append(' '); 132 } 133 switch (strength) { 134 case Collator.IDENTICAL: toAddTo.append('='); break; 135 case Collator.TERTIARY: toAddTo.append(','); break; 136 case Collator.SECONDARY: toAddTo.append(';'); break; 137 case Collator.PRIMARY: toAddTo.append('<'); break; 138 case RESET: toAddTo.append('&'); break; 139 case UNSET: toAddTo.append('?'); break; 140 } 141 if (showWhiteSpace) 142 toAddTo.append(' '); 143 appendQuoted(chars,toAddTo); 144 if (showExtension && !extension.isEmpty()) { 145 toAddTo.append('/'); 146 appendQuoted(extension,toAddTo); 147 } 148 } 149 appendQuoted(String chars, StringBuffer toAddTo)150 static void appendQuoted(String chars, StringBuffer toAddTo) { 151 boolean inQuote = false; 152 char ch = chars.charAt(0); 153 if (Character.isSpaceChar(ch)) { 154 inQuote = true; 155 toAddTo.append('\''); 156 } else { 157 if (PatternEntry.isSpecialChar(ch)) { 158 inQuote = true; 159 toAddTo.append('\''); 160 } else { 161 switch (ch) { 162 case 0x0010: case '\f': case '\r': 163 case '\t': case '\n': case '@': 164 inQuote = true; 165 toAddTo.append('\''); 166 break; 167 case '\'': 168 inQuote = true; 169 toAddTo.append('\''); 170 break; 171 default: 172 if (inQuote) { 173 inQuote = false; toAddTo.append('\''); 174 } 175 break; 176 } 177 } 178 } 179 toAddTo.append(chars); 180 if (inQuote) 181 toAddTo.append('\''); 182 } 183 184 //======================================================================== 185 // Parsing a pattern into a list of PatternEntries.... 186 //======================================================================== 187 PatternEntry(int strength, StringBuffer chars, StringBuffer extension)188 PatternEntry(int strength, 189 StringBuffer chars, 190 StringBuffer extension) 191 { 192 this.strength = strength; 193 this.chars = chars.toString(); 194 this.extension = (extension.length() > 0) ? extension.toString() 195 : ""; 196 } 197 198 static class Parser { 199 private String pattern; 200 private int i; 201 Parser(String pattern)202 public Parser(String pattern) { 203 this.pattern = pattern; 204 this.i = 0; 205 } 206 next()207 public PatternEntry next() throws ParseException { 208 int newStrength = UNSET; 209 210 newChars.setLength(0); 211 newExtension.setLength(0); 212 213 boolean inChars = true; 214 boolean inQuote = false; 215 mainLoop: 216 while (i < pattern.length()) { 217 char ch = pattern.charAt(i); 218 if (inQuote) { 219 if (ch == '\'') { 220 inQuote = false; 221 } else { 222 if (newChars.length() == 0) newChars.append(ch); 223 else if (inChars) newChars.append(ch); 224 else newExtension.append(ch); 225 } 226 } else switch (ch) { 227 case '=': if (newStrength != UNSET) break mainLoop; 228 newStrength = Collator.IDENTICAL; break; 229 case ',': if (newStrength != UNSET) break mainLoop; 230 newStrength = Collator.TERTIARY; break; 231 case ';': if (newStrength != UNSET) break mainLoop; 232 newStrength = Collator.SECONDARY; break; 233 case '<': if (newStrength != UNSET) break mainLoop; 234 newStrength = Collator.PRIMARY; break; 235 case '&': if (newStrength != UNSET) break mainLoop; 236 newStrength = RESET; break; 237 case '\t': 238 case '\n': 239 case '\f': 240 case '\r': 241 case ' ': break; // skip whitespace TODO use Character 242 case '/': inChars = false; break; 243 case '\'': 244 inQuote = true; 245 ch = pattern.charAt(++i); 246 if (newChars.length() == 0) newChars.append(ch); 247 else if (inChars) newChars.append(ch); 248 else newExtension.append(ch); 249 break; 250 default: 251 if (newStrength == UNSET) { 252 throw new ParseException 253 ("missing char (=,;<&) : " + 254 pattern.substring(i, 255 (i+10 < pattern.length()) ? 256 i+10 : pattern.length()), 257 i); 258 } 259 if (PatternEntry.isSpecialChar(ch) && (inQuote == false)) 260 throw new ParseException 261 ("Unquoted punctuation character : " + Integer.toString(ch, 16), i); 262 if (inChars) { 263 newChars.append(ch); 264 } else { 265 newExtension.append(ch); 266 } 267 break; 268 } 269 i++; 270 } 271 if (newStrength == UNSET) 272 return null; 273 if (newChars.length() == 0) { 274 throw new ParseException 275 ("missing chars (=,;<&): " + 276 pattern.substring(i, 277 (i+10 < pattern.length()) ? 278 i+10 : pattern.length()), 279 i); 280 } 281 282 return new PatternEntry(newStrength, newChars, newExtension); 283 } 284 285 // We re-use these objects in order to improve performance 286 private StringBuffer newChars = new StringBuffer(); 287 private StringBuffer newExtension = new StringBuffer(); 288 289 } 290 isSpecialChar(char ch)291 static boolean isSpecialChar(char ch) { 292 return ((ch == '\u0020') || 293 ((ch <= '\u002F') && (ch >= '\u0022')) || 294 ((ch <= '\u003F') && (ch >= '\u003A')) || 295 ((ch <= '\u0060') && (ch >= '\u005B')) || 296 ((ch <= '\u007E') && (ch >= '\u007B'))); 297 } 298 299 300 static final int RESET = -2; 301 static final int UNSET = -1; 302 303 int strength = UNSET; 304 String chars = ""; 305 String extension = ""; 306 } 307