1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                  *
5 *******************************************************************************
6 *
7 *******************************************************************************
8 */
9 /**
10  * A JNI interface for ICU converters.
11  *
12  *
13  * @author Ram Viswanadha, IBM
14  */
15 package java.nio.charset;
16 
17 import java.nio.ByteBuffer;
18 import java.nio.CharBuffer;
19 import java.util.HashMap;
20 import java.util.Map;
21 import libcore.icu.ICU;
22 import libcore.icu.NativeConverter;
23 import libcore.util.EmptyArray;
24 import libcore.util.NativeAllocationRegistry;
25 
26 final class CharsetEncoderICU extends CharsetEncoder {
27     private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
28     static {
29         // ICU has different default replacements to the RI in some cases. There are many
30         // additional cases, but this covers all the charsets that Java guarantees will be
31         // available, which is where compatibility seems most important. (The RI even uses
32         // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that
33         // byte corresponds to an entirely different character.)
34         // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it
35         // can represent it, but this is what the RI does...
36         byte[] questionMark = new byte[] { (byte) '?' };
37         DEFAULT_REPLACEMENTS.put("UTF-8",      questionMark);
38         DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
39         DEFAULT_REPLACEMENTS.put("US-ASCII",   questionMark);
40     }
41 
42     private static final int INPUT_OFFSET = 0;
43     private static final int OUTPUT_OFFSET = 1;
44     private static final int INVALID_CHAR_COUNT = 2;
45     /*
46      * data[INPUT_OFFSET]   = on input contains the start of input and on output the number of input chars consumed
47      * data[OUTPUT_OFFSET]  = on input contains the start of output and on output the number of output bytes written
48      * data[INVALID_CHARS]  = number of invalid chars
49      */
50     private int[] data = new int[3];
51 
52     /* handle to the ICU converter that is opened */
53     private final long converterHandle;
54 
55     private char[] input = null;
56     private byte[] output = null;
57 
58     private char[] allocatedInput = null;
59     private byte[] allocatedOutput = null;
60 
61     // These instance variables are always assigned in the methods before being used. This class
62     // is inherently thread-unsafe so we don't have to worry about synchronization.
63     private int inEnd;
64     private int outEnd;
65 
newInstance(Charset cs, String icuCanonicalName)66     public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) {
67         // This complexity is necessary to ensure that even if the constructor, superclass
68         // constructor, or call to updateCallback throw, we still free the native peer.
69         long address = 0;
70         try {
71             address = NativeConverter.openConverter(icuCanonicalName);
72             float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address);
73             float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address);
74             byte[] replacement = makeReplacement(icuCanonicalName, address);
75             CharsetEncoderICU result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address);
76             address = 0; // CharsetEncoderICU has taken ownership; its finalizer will do the free.
77             return result;
78         } finally {
79             if (address != 0) {
80                 NativeConverter.closeConverter(address);
81             }
82         }
83     }
84 
makeReplacement(String icuCanonicalName, long address)85     private static byte[] makeReplacement(String icuCanonicalName, long address) {
86         // We have our own map of RI-compatible default replacements (where ICU disagrees)...
87         byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
88         if (replacement != null) {
89             return replacement.clone();
90         }
91         // ...but fall back to asking ICU.
92         return NativeConverter.getSubstitutionBytes(address);
93     }
94 
CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address)95     private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) {
96         super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true);
97         // Our native peer needs to know what just happened...
98         this.converterHandle = address;
99         NativeConverter.registerConverter(this, converterHandle);
100         updateCallback();
101     }
102 
implReplaceWith(byte[] newReplacement)103     @Override protected void implReplaceWith(byte[] newReplacement) {
104         updateCallback();
105     }
106 
implOnMalformedInput(CodingErrorAction newAction)107     @Override protected void implOnMalformedInput(CodingErrorAction newAction) {
108         updateCallback();
109     }
110 
implOnUnmappableCharacter(CodingErrorAction newAction)111     @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
112         updateCallback();
113     }
114 
updateCallback()115     private void updateCallback() {
116         NativeConverter.setCallbackEncode(converterHandle, this);
117     }
118 
implReset()119     @Override protected void implReset() {
120         NativeConverter.resetCharToByte(converterHandle);
121         data[INPUT_OFFSET] = 0;
122         data[OUTPUT_OFFSET] = 0;
123         data[INVALID_CHAR_COUNT] = 0;
124         output = null;
125         input = null;
126         allocatedInput = null;
127         allocatedOutput = null;
128         inEnd = 0;
129         outEnd = 0;
130     }
131 
implFlush(ByteBuffer out)132     @Override protected CoderResult implFlush(ByteBuffer out) {
133         try {
134             // ICU needs to see an empty input.
135             input = EmptyArray.CHAR;
136             inEnd = 0;
137             data[INPUT_OFFSET] = 0;
138 
139             data[OUTPUT_OFFSET] = getArray(out);
140             data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
141 
142             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true);
143             if (ICU.U_FAILURE(error)) {
144                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
145                     return CoderResult.OVERFLOW;
146                 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) {
147                     if (data[INVALID_CHAR_COUNT] > 0) {
148                         return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
149                     }
150                 }
151             }
152             return CoderResult.UNDERFLOW;
153         } finally {
154             setPosition(out);
155             implReset();
156         }
157     }
158 
encodeLoop(CharBuffer in, ByteBuffer out)159     @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
160         if (!in.hasRemaining()) {
161             return CoderResult.UNDERFLOW;
162         }
163 
164         data[INPUT_OFFSET] = getArray(in);
165         data[OUTPUT_OFFSET]= getArray(out);
166         data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
167 
168         try {
169             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false);
170             if (ICU.U_FAILURE(error)) {
171                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
172                     return CoderResult.OVERFLOW;
173                 } else if (error == ICU.U_INVALID_CHAR_FOUND) {
174                     return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]);
175                 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) {
176                     return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
177                 } else {
178                     throw new AssertionError(error);
179                 }
180             }
181             // Decoding succeeded: give us more data.
182             return CoderResult.UNDERFLOW;
183         } finally {
184             setPosition(in);
185             setPosition(out);
186         }
187     }
188 
getArray(ByteBuffer out)189     private int getArray(ByteBuffer out) {
190         if (out.hasArray()) {
191             output = out.array();
192             outEnd = out.arrayOffset() + out.limit();
193             return out.arrayOffset() + out.position();
194         } else {
195             outEnd = out.remaining();
196             if (allocatedOutput == null || outEnd > allocatedOutput.length) {
197                 allocatedOutput = new byte[outEnd];
198             }
199             // The array's start position is 0
200             output = allocatedOutput;
201             return 0;
202         }
203     }
204 
getArray(CharBuffer in)205     private int getArray(CharBuffer in) {
206         if (in.hasArray()) {
207             input = in.array();
208             inEnd = in.arrayOffset() + in.limit();
209             return in.arrayOffset() + in.position();
210         } else {
211             inEnd = in.remaining();
212             if (allocatedInput == null || inEnd > allocatedInput.length) {
213                 allocatedInput = new char[inEnd];
214             }
215             // Copy the input buffer into the allocated array.
216             int pos = in.position();
217             in.get(allocatedInput, 0, inEnd);
218             in.position(pos);
219             // The array's start position is 0
220             input = allocatedInput;
221             return 0;
222         }
223     }
224 
setPosition(ByteBuffer out)225     private void setPosition(ByteBuffer out) {
226         if (out.hasArray()) {
227             out.position(data[OUTPUT_OFFSET] - out.arrayOffset());
228         } else {
229             out.put(output, 0, data[OUTPUT_OFFSET]);
230         }
231         // release reference to output array, which may not be ours
232         output = null;
233     }
234 
setPosition(CharBuffer in)235     private void setPosition(CharBuffer in) {
236         int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT];
237         if (position < 0) {
238             // The calculated position might be negative if we encountered an
239             // invalid char that spanned input buffers. We adjust it to 0 in this case.
240             //
241             // NOTE: The API doesn't allow us to adjust the position of the previous
242             // input buffer. (Doing that wouldn't serve any useful purpose anyway.)
243             position = 0;
244         }
245 
246         in.position(position);
247         // release reference to input array, which may not be ours
248         input = null;
249     }
250 }
251