1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.event;
18 
19 import android.text.TextUtils;
20 import android.util.SparseIntArray;
21 
22 import com.android.inputmethod.latin.common.Constants;
23 
24 import java.text.Normalizer;
25 import java.util.ArrayList;
26 
27 import javax.annotation.Nonnull;
28 
29 /**
30  * A combiner that handles dead keys.
31  */
32 public class DeadKeyCombiner implements Combiner {
33 
34     private static class Data {
35         // This class data taken from KeyCharacterMap.java.
36 
37         /* Characters used to display placeholders for dead keys. */
38         private static final int ACCENT_ACUTE = '\u00B4';
39         private static final int ACCENT_BREVE = '\u02D8';
40         private static final int ACCENT_CARON = '\u02C7';
41         private static final int ACCENT_CEDILLA = '\u00B8';
42         private static final int ACCENT_CIRCUMFLEX = '\u02C6';
43         private static final int ACCENT_COMMA_ABOVE = '\u1FBD';
44         private static final int ACCENT_COMMA_ABOVE_RIGHT = '\u02BC';
45         private static final int ACCENT_DOT_ABOVE = '\u02D9';
46         private static final int ACCENT_DOT_BELOW = Constants.CODE_PERIOD; // approximate
47         private static final int ACCENT_DOUBLE_ACUTE = '\u02DD';
48         private static final int ACCENT_GRAVE = '\u02CB';
49         private static final int ACCENT_HOOK_ABOVE = '\u02C0';
50         private static final int ACCENT_HORN = Constants.CODE_SINGLE_QUOTE; // approximate
51         private static final int ACCENT_MACRON = '\u00AF';
52         private static final int ACCENT_MACRON_BELOW = '\u02CD';
53         private static final int ACCENT_OGONEK = '\u02DB';
54         private static final int ACCENT_REVERSED_COMMA_ABOVE = '\u02BD';
55         private static final int ACCENT_RING_ABOVE = '\u02DA';
56         private static final int ACCENT_STROKE = Constants.CODE_DASH; // approximate
57         private static final int ACCENT_TILDE = '\u02DC';
58         private static final int ACCENT_TURNED_COMMA_ABOVE = '\u02BB';
59         private static final int ACCENT_UMLAUT = '\u00A8';
60         private static final int ACCENT_VERTICAL_LINE_ABOVE = '\u02C8';
61         private static final int ACCENT_VERTICAL_LINE_BELOW = '\u02CC';
62 
63         /* Legacy dead key display characters used in previous versions of the API (before L)
64          * We still support these characters by mapping them to their non-legacy version. */
65         private static final int ACCENT_GRAVE_LEGACY = Constants.CODE_GRAVE_ACCENT;
66         private static final int ACCENT_CIRCUMFLEX_LEGACY = Constants.CODE_CIRCUMFLEX_ACCENT;
67         private static final int ACCENT_TILDE_LEGACY = Constants.CODE_TILDE;
68 
69         /**
70          * Maps Unicode combining diacritical to display-form dead key.
71          */
72         static final SparseIntArray sCombiningToAccent = new SparseIntArray();
73         static final SparseIntArray sAccentToCombining = new SparseIntArray();
74         static {
75             // U+0300: COMBINING GRAVE ACCENT
76             addCombining('\u0300', ACCENT_GRAVE);
77             // U+0301: COMBINING ACUTE ACCENT
78             addCombining('\u0301', ACCENT_ACUTE);
79             // U+0302: COMBINING CIRCUMFLEX ACCENT
80             addCombining('\u0302', ACCENT_CIRCUMFLEX);
81             // U+0303: COMBINING TILDE
82             addCombining('\u0303', ACCENT_TILDE);
83             // U+0304: COMBINING MACRON
84             addCombining('\u0304', ACCENT_MACRON);
85             // U+0306: COMBINING BREVE
86             addCombining('\u0306', ACCENT_BREVE);
87             // U+0307: COMBINING DOT ABOVE
88             addCombining('\u0307', ACCENT_DOT_ABOVE);
89             // U+0308: COMBINING DIAERESIS
90             addCombining('\u0308', ACCENT_UMLAUT);
91             // U+0309: COMBINING HOOK ABOVE
92             addCombining('\u0309', ACCENT_HOOK_ABOVE);
93             // U+030A: COMBINING RING ABOVE
94             addCombining('\u030A', ACCENT_RING_ABOVE);
95             // U+030B: COMBINING DOUBLE ACUTE ACCENT
96             addCombining('\u030B', ACCENT_DOUBLE_ACUTE);
97             // U+030C: COMBINING CARON
98             addCombining('\u030C', ACCENT_CARON);
99             // U+030D: COMBINING VERTICAL LINE ABOVE
100             addCombining('\u030D', ACCENT_VERTICAL_LINE_ABOVE);
101             // U+030E: COMBINING DOUBLE VERTICAL LINE ABOVE
102             //addCombining('\u030E', ACCENT_DOUBLE_VERTICAL_LINE_ABOVE);
103             // U+030F: COMBINING DOUBLE GRAVE ACCENT
104             //addCombining('\u030F', ACCENT_DOUBLE_GRAVE);
105             // U+0310: COMBINING CANDRABINDU
106             //addCombining('\u0310', ACCENT_CANDRABINDU);
107             // U+0311: COMBINING INVERTED BREVE
108             //addCombining('\u0311', ACCENT_INVERTED_BREVE);
109             // U+0312: COMBINING TURNED COMMA ABOVE
110             addCombining('\u0312', ACCENT_TURNED_COMMA_ABOVE);
111             // U+0313: COMBINING COMMA ABOVE
112             addCombining('\u0313', ACCENT_COMMA_ABOVE);
113             // U+0314: COMBINING REVERSED COMMA ABOVE
114             addCombining('\u0314', ACCENT_REVERSED_COMMA_ABOVE);
115             // U+0315: COMBINING COMMA ABOVE RIGHT
116             addCombining('\u0315', ACCENT_COMMA_ABOVE_RIGHT);
117             // U+031B: COMBINING HORN
118             addCombining('\u031B', ACCENT_HORN);
119             // U+0323: COMBINING DOT BELOW
120             addCombining('\u0323', ACCENT_DOT_BELOW);
121             // U+0326: COMBINING COMMA BELOW
122             //addCombining('\u0326', ACCENT_COMMA_BELOW);
123             // U+0327: COMBINING CEDILLA
124             addCombining('\u0327', ACCENT_CEDILLA);
125             // U+0328: COMBINING OGONEK
126             addCombining('\u0328', ACCENT_OGONEK);
127             // U+0329: COMBINING VERTICAL LINE BELOW
128             addCombining('\u0329', ACCENT_VERTICAL_LINE_BELOW);
129             // U+0331: COMBINING MACRON BELOW
130             addCombining('\u0331', ACCENT_MACRON_BELOW);
131             // U+0335: COMBINING SHORT STROKE OVERLAY
132             addCombining('\u0335', ACCENT_STROKE);
133             // U+0342: COMBINING GREEK PERISPOMENI
134             //addCombining('\u0342', ACCENT_PERISPOMENI);
135             // U+0344: COMBINING GREEK DIALYTIKA TONOS
136             //addCombining('\u0344', ACCENT_DIALYTIKA_TONOS);
137             // U+0345: COMBINING GREEK YPOGEGRAMMENI
138             //addCombining('\u0345', ACCENT_YPOGEGRAMMENI);
139 
140             // One-way mappings to equivalent preferred accents.
141             // U+0340: COMBINING GRAVE TONE MARK
142             sCombiningToAccent.append('\u0340', ACCENT_GRAVE);
143             // U+0341: COMBINING ACUTE TONE MARK
144             sCombiningToAccent.append('\u0341', ACCENT_ACUTE);
145             // U+0343: COMBINING GREEK KORONIS
146             sCombiningToAccent.append('\u0343', ACCENT_COMMA_ABOVE);
147 
148             // One-way legacy mappings to preserve compatibility with older applications.
149             // U+0300: COMBINING GRAVE ACCENT
sAccentToCombining.append(ACCENT_GRAVE_LEGACY, '\\u0300')150             sAccentToCombining.append(ACCENT_GRAVE_LEGACY, '\u0300');
151             // U+0302: COMBINING CIRCUMFLEX ACCENT
sAccentToCombining.append(ACCENT_CIRCUMFLEX_LEGACY, '\\u0302')152             sAccentToCombining.append(ACCENT_CIRCUMFLEX_LEGACY, '\u0302');
153             // U+0303: COMBINING TILDE
sAccentToCombining.append(ACCENT_TILDE_LEGACY, '\\u0303')154             sAccentToCombining.append(ACCENT_TILDE_LEGACY, '\u0303');
155         }
156 
addCombining(int combining, int accent)157         private static void addCombining(int combining, int accent) {
158             sCombiningToAccent.append(combining, accent);
159             sAccentToCombining.append(accent, combining);
160         }
161 
162         // Caution! This may only contain chars, not supplementary code points. It's unlikely
163         // it will ever need to, but if it does we'll have to change this
164         private static final SparseIntArray sNonstandardDeadCombinations = new SparseIntArray();
165         static {
166             // Non-standard decompositions.
167             // Stroke modifier for Finnish multilingual keyboard and others.
168             // U+0110: LATIN CAPITAL LETTER D WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'D', '\\u0110')169             addNonStandardDeadCombination(ACCENT_STROKE, 'D', '\u0110');
170             // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'G', '\\u01e4')171             addNonStandardDeadCombination(ACCENT_STROKE, 'G', '\u01e4');
172             // U+0126: LATIN CAPITAL LETTER H WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'H', '\\u0126')173             addNonStandardDeadCombination(ACCENT_STROKE, 'H', '\u0126');
174             // U+0197: LATIN CAPITAL LETTER I WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'I', '\\u0197')175             addNonStandardDeadCombination(ACCENT_STROKE, 'I', '\u0197');
176             // U+0141: LATIN CAPITAL LETTER L WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'L', '\\u0141')177             addNonStandardDeadCombination(ACCENT_STROKE, 'L', '\u0141');
178             // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'O', '\\u00d8')179             addNonStandardDeadCombination(ACCENT_STROKE, 'O', '\u00d8');
180             // U+0166: LATIN CAPITAL LETTER T WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'T', '\\u0166')181             addNonStandardDeadCombination(ACCENT_STROKE, 'T', '\u0166');
182             // U+0111: LATIN SMALL LETTER D WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'd', '\\u0111')183             addNonStandardDeadCombination(ACCENT_STROKE, 'd', '\u0111');
184             // U+01E5: LATIN SMALL LETTER G WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'g', '\\u01e5')185             addNonStandardDeadCombination(ACCENT_STROKE, 'g', '\u01e5');
186             // U+0127: LATIN SMALL LETTER H WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'h', '\\u0127')187             addNonStandardDeadCombination(ACCENT_STROKE, 'h', '\u0127');
188             // U+0268: LATIN SMALL LETTER I WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'i', '\\u0268')189             addNonStandardDeadCombination(ACCENT_STROKE, 'i', '\u0268');
190             // U+0142: LATIN SMALL LETTER L WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'l', '\\u0142')191             addNonStandardDeadCombination(ACCENT_STROKE, 'l', '\u0142');
192             // U+00F8: LATIN SMALL LETTER O WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 'o', '\\u00f8')193             addNonStandardDeadCombination(ACCENT_STROKE, 'o', '\u00f8');
194             // U+0167: LATIN SMALL LETTER T WITH STROKE
addNonStandardDeadCombination(ACCENT_STROKE, 't', '\\u0167')195             addNonStandardDeadCombination(ACCENT_STROKE, 't', '\u0167');
196         }
197 
addNonStandardDeadCombination(final int deadCodePoint, final int spacingCodePoint, final int result)198         private static void addNonStandardDeadCombination(final int deadCodePoint,
199                 final int spacingCodePoint, final int result) {
200             final int combination = (deadCodePoint << 16) | spacingCodePoint;
201             sNonstandardDeadCombinations.put(combination, result);
202         }
203 
204         public static final int NOT_A_CHAR = 0;
205         public static final int BITS_TO_SHIFT_DEAD_CODE_POINT_FOR_NON_STANDARD_COMBINATION = 16;
206         // Get a non-standard combination
getNonstandardCombination(final int deadCodePoint, final int spacingCodePoint)207         public static char getNonstandardCombination(final int deadCodePoint,
208                 final int spacingCodePoint) {
209             final int combination = spacingCodePoint |
210                     (deadCodePoint << BITS_TO_SHIFT_DEAD_CODE_POINT_FOR_NON_STANDARD_COMBINATION);
211             return (char)sNonstandardDeadCombinations.get(combination, NOT_A_CHAR);
212         }
213     }
214 
215     // TODO: make this a list of events instead
216     final StringBuilder mDeadSequence = new StringBuilder();
217 
218     @Nonnull
createEventChainFromSequence(final @Nonnull CharSequence text, @Nonnull final Event originalEvent)219     private static Event createEventChainFromSequence(final @Nonnull CharSequence text,
220             @Nonnull final Event originalEvent) {
221         int index = text.length();
222         if (index <= 0) {
223             return originalEvent;
224         }
225         Event lastEvent = null;
226         do {
227             final int codePoint = Character.codePointBefore(text, index);
228             lastEvent = Event.createHardwareKeypressEvent(codePoint,
229                     originalEvent.mKeyCode, lastEvent, false /* isKeyRepeat */);
230             index -= Character.charCount(codePoint);
231         } while (index > 0);
232         return lastEvent;
233     }
234 
235     @Override
236     @Nonnull
processEvent(final ArrayList<Event> previousEvents, final Event event)237     public Event processEvent(final ArrayList<Event> previousEvents, final Event event) {
238         if (TextUtils.isEmpty(mDeadSequence)) {
239             // No dead char is currently being tracked: this is the most common case.
240             if (event.isDead()) {
241                 // The event was a dead key. Start tracking it.
242                 mDeadSequence.appendCodePoint(event.mCodePoint);
243                 return Event.createConsumedEvent(event);
244             }
245             // Regular keystroke when not keeping track of a dead key. Simply said, there are
246             // no dead keys at all in the current input, so this combiner has nothing to do and
247             // simply returns the event as is. The majority of events will go through this path.
248             return event;
249         }
250         if (Character.isWhitespace(event.mCodePoint)
251                 || event.mCodePoint == mDeadSequence.codePointBefore(mDeadSequence.length())) {
252             // When whitespace or twice the same dead key, we should output the dead sequence as is.
253             final Event resultEvent = createEventChainFromSequence(mDeadSequence.toString(),
254                     event);
255             mDeadSequence.setLength(0);
256             return resultEvent;
257         }
258         if (event.isFunctionalKeyEvent()) {
259             if (Constants.CODE_DELETE == event.mKeyCode) {
260                 // Remove the last code point
261                 final int trimIndex = mDeadSequence.length() - Character.charCount(
262                         mDeadSequence.codePointBefore(mDeadSequence.length()));
263                 mDeadSequence.setLength(trimIndex);
264                 return Event.createConsumedEvent(event);
265             }
266             return event;
267         }
268         if (event.isDead()) {
269             mDeadSequence.appendCodePoint(event.mCodePoint);
270             return Event.createConsumedEvent(event);
271         }
272         // Combine normally.
273         final StringBuilder sb = new StringBuilder();
274         sb.appendCodePoint(event.mCodePoint);
275         int codePointIndex = 0;
276         while (codePointIndex < mDeadSequence.length()) {
277             final int deadCodePoint = mDeadSequence.codePointAt(codePointIndex);
278             final char replacementSpacingChar =
279                     Data.getNonstandardCombination(deadCodePoint, event.mCodePoint);
280             if (Data.NOT_A_CHAR != replacementSpacingChar) {
281                 sb.setCharAt(0, replacementSpacingChar);
282             } else {
283                 final int combining = Data.sAccentToCombining.get(deadCodePoint);
284                 sb.appendCodePoint(0 == combining ? deadCodePoint : combining);
285             }
286             codePointIndex += Character.isSupplementaryCodePoint(deadCodePoint) ? 2 : 1;
287         }
288         final String normalizedString = Normalizer.normalize(sb, Normalizer.Form.NFC);
289         final Event resultEvent = createEventChainFromSequence(normalizedString, event);
290         mDeadSequence.setLength(0);
291         return resultEvent;
292     }
293 
294     @Override
reset()295     public void reset() {
296         mDeadSequence.setLength(0);
297     }
298 
299     @Override
getCombiningStateFeedback()300     public CharSequence getCombiningStateFeedback() {
301         return mDeadSequence;
302     }
303 }
304