1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 * Copyright (C) 1996-2011, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 */ 7 package com.ibm.icu.text; 8 import com.ibm.icu.impl.PatternProps; 9 import com.ibm.icu.impl.UCharacterName; 10 import com.ibm.icu.impl.Utility; 11 import com.ibm.icu.lang.UCharacter; 12 13 /** 14 * A transliterator that performs name to character mapping. 15 * @author Alan Liu 16 */ 17 class NameUnicodeTransliterator extends Transliterator { 18 19 static final String _ID = "Name-Any"; 20 21 static final String OPEN_PAT = "\\N~{~"; 22 static final char OPEN_DELIM = '\\'; // first char of OPEN_PAT 23 static final char CLOSE_DELIM = '}'; 24 static final char SPACE = ' '; 25 26 27 /** 28 * System registration hook. 29 */ register()30 static void register() { 31 Transliterator.registerFactory(_ID, new Transliterator.Factory() { 32 @Override 33 public Transliterator getInstance(String ID) { 34 return new NameUnicodeTransliterator(null); 35 } 36 }); 37 } 38 39 /** 40 * Constructs a transliterator. 41 */ NameUnicodeTransliterator(UnicodeFilter filter)42 public NameUnicodeTransliterator(UnicodeFilter filter) { 43 super(_ID, filter); 44 } 45 46 /** 47 * Implements {@link Transliterator#handleTransliterate}. 48 */ 49 @Override handleTransliterate(Replaceable text, Position offsets, boolean isIncremental)50 protected void handleTransliterate(Replaceable text, 51 Position offsets, boolean isIncremental) { 52 53 int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space 54 55 StringBuffer name = new StringBuffer(maxLen); 56 57 // Get the legal character set 58 UnicodeSet legal = new UnicodeSet(); 59 UCharacterName.INSTANCE.getCharNameCharacters(legal); 60 61 int cursor = offsets.start; 62 int limit = offsets.limit; 63 64 // Modes: 65 // 0 - looking for open delimiter 66 // 1 - after open delimiter 67 int mode = 0; 68 int openPos = -1; // open delim candidate pos 69 70 int c; 71 while (cursor < limit) { 72 c = text.char32At(cursor); 73 74 switch (mode) { 75 case 0: // looking for open delimiter 76 if (c == OPEN_DELIM) { // quick check first 77 openPos = cursor; 78 int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit); 79 if (i >= 0 && i < limit) { 80 mode = 1; 81 name.setLength(0); 82 cursor = i; 83 continue; // *** reprocess char32At(cursor) 84 } 85 } 86 break; 87 88 case 1: // after open delimiter 89 // Look for legal chars. If \s+ is found, convert it 90 // to a single space. If closeDelimiter is found, exit 91 // the loop. If any other character is found, exit the 92 // loop. If the limit is reached, exit the loop. 93 94 // Convert \s+ => SPACE. This assumes there are no 95 // runs of >1 space characters in names. 96 if (PatternProps.isWhiteSpace(c)) { 97 // Ignore leading whitespace 98 if (name.length() > 0 && 99 name.charAt(name.length()-1) != SPACE) { 100 name.append(SPACE); 101 // If we are too long then abort. maxLen includes 102 // temporary trailing space, so use '>'. 103 if (name.length() > maxLen) { 104 mode = 0; 105 } 106 } 107 break; 108 } 109 110 if (c == CLOSE_DELIM) { 111 112 int len = name.length(); 113 114 // Delete trailing space, if any 115 if (len > 0 && 116 name.charAt(len-1) == SPACE) { 117 name.setLength(--len); 118 } 119 120 c = UCharacter.getCharFromExtendedName(name.toString()); 121 if (c != -1) { 122 // Lookup succeeded 123 124 // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); 125 cursor++; // advance over CLOSE_DELIM 126 127 String str = UTF16.valueOf(c); 128 text.replace(openPos, cursor, str); 129 130 // Adjust indices for the change in the length of 131 // the string. Do not assume that str.length() == 132 // 1, in case of surrogates. 133 int delta = cursor - openPos - str.length(); 134 cursor -= delta; 135 limit -= delta; 136 // assert(cursor == openPos + str.length()); 137 } 138 // If the lookup failed, we leave things as-is and 139 // still switch to mode 0 and continue. 140 mode = 0; 141 openPos = -1; // close off candidate 142 continue; // *** reprocess char32At(cursor) 143 } 144 145 if (legal.contains(c)) { 146 UTF16.append(name, c); 147 // If we go past the longest possible name then abort. 148 // maxLen includes temporary trailing space, so use '>='. 149 if (name.length() >= maxLen) { 150 mode = 0; 151 } 152 } 153 154 // Invalid character 155 else { 156 --cursor; // Backup and reprocess this character 157 mode = 0; 158 } 159 160 break; 161 } 162 163 cursor += UTF16.getCharCount(c); 164 } 165 166 offsets.contextLimit += limit - offsets.limit; 167 offsets.limit = limit; 168 // In incremental mode, only advance the cursor up to the last 169 // open delimiter candidate. 170 offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; 171 } 172 173 /* (non-Javadoc) 174 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 175 */ 176 @Override addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)177 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 178 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 179 if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) { 180 return; // we have to contain both prefix and suffix 181 } 182 UnicodeSet items = new UnicodeSet() 183 .addAll('0', '9') 184 .addAll('A', 'F') 185 .addAll('a', 'z') // for controls 186 .add('<').add('>') // for controls 187 .add('(').add(')') // for controls 188 .add('-') 189 .add(' ') 190 .addAll(UnicodeNameTransliterator.OPEN_DELIM) 191 .add(CLOSE_DELIM); 192 items.retainAll(myFilter); 193 if (items.size() > 0) { 194 sourceSet.addAll(items); 195 // could produce any character 196 targetSet.addAll(0, 0x10FFFF); 197 } 198 } 199 } 200