1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 // -- This file was mechanically generated: Do not edit! -- //
28 
29 package java.nio.charset;
30 
31 import java.nio.Buffer;
32 import java.nio.ByteBuffer;
33 import java.nio.CharBuffer;
34 import java.nio.BufferOverflowException;
35 import java.nio.BufferUnderflowException;
36 import java.lang.ref.WeakReference;
37 import java.nio.charset.CoderMalfunctionError;                  // javadoc
38 import java.util.Arrays;
39 
40 
41 /**
42  * An engine that can transform a sequence of sixteen-bit Unicode characters into a sequence of
43  * bytes in a specific charset.
44  *
45  * <a name="steps"></a>
46  *
47  * <p> The input character sequence is provided in a character buffer or a series
48  * of such buffers.  The output byte sequence is written to a byte buffer
49  * or a series of such buffers.  An encoder should always be used by making
50  * the following sequence of method invocations, hereinafter referred to as an
51  * <i>encoding operation</i>:
52  *
53  * <ol>
54  *
55  *   <li><p> Reset the encoder via the {@link #reset reset} method, unless it
56  *   has not been used before; </p></li>
57  *
58  *   <li><p> Invoke the {@link #encode encode} method zero or more times, as
59  *   long as additional input may be available, passing <tt>false</tt> for the
60  *   <tt>endOfInput</tt> argument and filling the input buffer and flushing the
61  *   output buffer between invocations; </p></li>
62  *
63  *   <li><p> Invoke the {@link #encode encode} method one final time, passing
64  *   <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li>
65  *
66  *   <li><p> Invoke the {@link #flush flush} method so that the encoder can
67  *   flush any internal state to the output buffer. </p></li>
68  *
69  * </ol>
70  *
71  * Each invocation of the {@link #encode encode} method will encode as many
72  * characters as possible from the input buffer, writing the resulting bytes
73  * to the output buffer.  The {@link #encode encode} method returns when more
74  * input is required, when there is not enough room in the output buffer, or
75  * when an encoding error has occurred.  In each case a {@link CoderResult}
76  * object is returned to describe the reason for termination.  An invoker can
77  * examine this object and fill the input buffer, flush the output buffer, or
78  * attempt to recover from an encoding error, as appropriate, and try again.
79  *
80  * <a name="ce"></a>
81  *
82  * <p> There are two general types of encoding errors.  If the input character
83  * sequence is not a legal sixteen-bit Unicode sequence then the input is considered <i>malformed</i>.  If
84  * the input character sequence is legal but cannot be mapped to a valid
85  * byte sequence in the given charset then an <i>unmappable character</i> has been encountered.
86  *
87  * <a name="cae"></a>
88  *
89  * <p> How an encoding error is handled depends upon the action requested for
90  * that type of error, which is described by an instance of the {@linkplain
91  * CodingErrorAction} class.  The possible error actions are to {@linkplain
92  * CodingErrorAction#IGNORE ignore} the erroneous input, {@link
93  * CodingErrorAction#REPORT report} the error to the invoker via
94  * the returned {@link CoderResult} object, or {@linkplain CodingErrorAction#REPLACE
95  * replace} the erroneous input with the current value of the
96  * replacement byte array.  The replacement
97  *
98 
99  * is initially set to the encoder's default replacement, which often
100  * (but not always) has the initial value&nbsp;<tt>{</tt>&nbsp;<tt>(byte)'?'</tt>&nbsp;<tt>}</tt>;
101 
102 
103 
104 
105  *
106  * its value may be changed via the {@link #replaceWith(byte[])
107  * replaceWith} method.
108  *
109  * <p> The default action for malformed-input and unmappable-character errors
110  * is to {@linkplain CodingErrorAction#REPORT report} them.  The
111  * malformed-input error action may be changed via the {@link
112  * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the
113  * unmappable-character action may be changed via the {@link
114  * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.
115  *
116  * <p> This class is designed to handle many of the details of the encoding
117  * process, including the implementation of error actions.  An encoder for a
118  * specific charset, which is a concrete subclass of this class, need only
119  * implement the abstract {@link #encodeLoop encodeLoop} method, which
120  * encapsulates the basic encoding loop.  A subclass that maintains internal
121  * state should, additionally, override the {@link #implFlush implFlush} and
122  * {@link #implReset implReset} methods.
123  *
124  * <p> Instances of this class are not safe for use by multiple concurrent
125  * threads.  </p>
126  *
127  *
128  * @author Mark Reinhold
129  * @author JSR-51 Expert Group
130  * @since 1.4
131  *
132  * @see ByteBuffer
133  * @see CharBuffer
134  * @see Charset
135  * @see CharsetDecoder
136  */
137 
138 public abstract class CharsetEncoder {
139 
140     private final Charset charset;
141     private final float averageBytesPerChar;
142     private final float maxBytesPerChar;
143 
144     private byte[] replacement;
145     private CodingErrorAction malformedInputAction
146         = CodingErrorAction.REPORT;
147     private CodingErrorAction unmappableCharacterAction
148         = CodingErrorAction.REPORT;
149 
150     // Internal states
151     //
152     private static final int ST_RESET   = 0;
153     private static final int ST_CODING  = 1;
154     private static final int ST_END     = 2;
155     private static final int ST_FLUSHED = 3;
156 
157     private int state = ST_RESET;
158 
159     private static String stateNames[]
160         = { "RESET", "CODING", "CODING_END", "FLUSHED" };
161 
162 
163     /**
164      * Initializes a new encoder.  The new encoder will have the given
165      * bytes-per-char and replacement values.
166      *
167      * @param  cs
168      *         The charset that created this encoder
169      *
170      * @param  averageBytesPerChar
171      *         A positive float value indicating the expected number of
172      *         bytes that will be produced for each input character
173      *
174      * @param  maxBytesPerChar
175      *         A positive float value indicating the maximum number of
176      *         bytes that will be produced for each input character
177      *
178      * @param  replacement
179      *         The initial replacement; must not be <tt>null</tt>, must have
180      *         non-zero length, must not be longer than maxBytesPerChar,
181      *         and must be {@linkplain #isLegalReplacement legal}
182      *
183      * @throws  IllegalArgumentException
184      *          If the preconditions on the parameters do not hold
185      */
186     protected
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement)187     CharsetEncoder(Charset cs,
188                    float averageBytesPerChar,
189                    float maxBytesPerChar,
190                    byte[] replacement)
191     // BEGIN Android-changed
192     {
193       this(cs, averageBytesPerChar, maxBytesPerChar, replacement, false);
194     }
195 
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, boolean trusted)196     CharsetEncoder(Charset cs,
197                    float averageBytesPerChar,
198                    float maxBytesPerChar,
199                    byte[] replacement,
200                    boolean trusted)
201                    // END Android-changed
202     {
203         this.charset = cs;
204         if (averageBytesPerChar <= 0.0f)
205             throw new IllegalArgumentException("Non-positive "
206                                                + "averageBytesPerChar");
207         if (maxBytesPerChar <= 0.0f)
208             throw new IllegalArgumentException("Non-positive "
209                                                + "maxBytesPerChar");
210         if (!Charset.atBugLevel("1.4")) {
211             if (averageBytesPerChar > maxBytesPerChar)
212                 throw new IllegalArgumentException("averageBytesPerChar"
213                                                    + " exceeds "
214                                                    + "maxBytesPerChar");
215         }
216         this.replacement = replacement;
217         this.averageBytesPerChar = averageBytesPerChar;
218         this.maxBytesPerChar = maxBytesPerChar;
219         // BEGIN Android-changed
220         if (!trusted) {
221         // END Android-changed
222         replaceWith(replacement);
223         // BEGIN Android-changed
224         }
225         // END Android-changed
226     }
227 
228     /**
229      * Initializes a new encoder.  The new encoder will have the given
230      * bytes-per-char values and its replacement will be the
231      * byte array <tt>{</tt>&nbsp;<tt>(byte)'?'</tt>&nbsp;<tt>}</tt>.
232      *
233      * @param  cs
234      *         The charset that created this encoder
235      *
236      * @param  averageBytesPerChar
237      *         A positive float value indicating the expected number of
238      *         bytes that will be produced for each input character
239      *
240      * @param  maxBytesPerChar
241      *         A positive float value indicating the maximum number of
242      *         bytes that will be produced for each input character
243      *
244      * @throws  IllegalArgumentException
245      *          If the preconditions on the parameters do not hold
246      */
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar)247     protected CharsetEncoder(Charset cs,
248                              float averageBytesPerChar,
249                              float maxBytesPerChar)
250     {
251         this(cs,
252              averageBytesPerChar, maxBytesPerChar,
253              new byte[] { (byte)'?' });
254     }
255 
256     /**
257      * Returns the charset that created this encoder.
258      *
259      * @return  This encoder's charset
260      */
charset()261     public final Charset charset() {
262         return charset;
263     }
264 
265     /**
266      * Returns this encoder's replacement value.
267      *
268      * @return  This encoder's current replacement,
269      *          which is never <tt>null</tt> and is never empty
270      */
replacement()271     public final byte[] replacement() {
272         return Arrays.copyOf(replacement, replacement.length);
273     }
274 
275     /**
276      * Changes this encoder's replacement value.
277      *
278      * <p> This method invokes the {@link #implReplaceWith implReplaceWith}
279      * method, passing the new replacement, after checking that the new
280      * replacement is acceptable.  </p>
281      *
282      * @param  newReplacement  The replacement value
283      *
284 
285 
286 
287 
288 
289      *         The new replacement; must not be <tt>null</tt>, must have
290      *         non-zero length, must not be longer than the value returned by
291      *         the {@link #maxBytesPerChar() maxBytesPerChar} method, and
292      *         must be {@link #isLegalReplacement legal}
293 
294      *
295      * @return  This encoder
296      *
297      * @throws  IllegalArgumentException
298      *          If the preconditions on the parameter do not hold
299      */
replaceWith(byte[] newReplacement)300     public final CharsetEncoder replaceWith(byte[] newReplacement) {
301         if (newReplacement == null)
302             throw new IllegalArgumentException("Null replacement");
303         int len = newReplacement.length;
304         if (len == 0)
305             throw new IllegalArgumentException("Empty replacement");
306         if (len > maxBytesPerChar)
307             throw new IllegalArgumentException("Replacement too long");
308 
309 
310 
311 
312         if (!isLegalReplacement(newReplacement))
313             throw new IllegalArgumentException("Illegal replacement");
314         this.replacement = Arrays.copyOf(newReplacement, newReplacement.length);
315 
316         this.replacement = newReplacement;
317         implReplaceWith(this.replacement);
318         return this;
319     }
320 
321     /**
322      * Reports a change to this encoder's replacement value.
323      *
324      * <p> The default implementation of this method does nothing.  This method
325      * should be overridden by encoders that require notification of changes to
326      * the replacement.  </p>
327      *
328      * @param  newReplacement    The replacement value
329      */
implReplaceWith(byte[] newReplacement)330     protected void implReplaceWith(byte[] newReplacement) {
331     }
332 
333 
334 
335     private WeakReference<CharsetDecoder> cachedDecoder = null;
336 
337     /**
338      * Tells whether or not the given byte array is a legal replacement value
339      * for this encoder.
340      *
341      * <p> A replacement is legal if, and only if, it is a legal sequence of
342      * bytes in this encoder's charset; that is, it must be possible to decode
343      * the replacement into one or more sixteen-bit Unicode characters.
344      *
345      * <p> The default implementation of this method is not very efficient; it
346      * should generally be overridden to improve performance.  </p>
347      *
348      * @param  repl  The byte array to be tested
349      *
350      * @return  <tt>true</tt> if, and only if, the given byte array
351      *          is a legal replacement value for this encoder
352      */
isLegalReplacement(byte[] repl)353     public boolean isLegalReplacement(byte[] repl) {
354         WeakReference<CharsetDecoder> wr = cachedDecoder;
355         CharsetDecoder dec = null;
356         if ((wr == null) || ((dec = wr.get()) == null)) {
357             dec = charset().newDecoder();
358             dec.onMalformedInput(CodingErrorAction.REPORT);
359             dec.onUnmappableCharacter(CodingErrorAction.REPORT);
360             cachedDecoder = new WeakReference<CharsetDecoder>(dec);
361         } else {
362             dec.reset();
363         }
364         ByteBuffer bb = ByteBuffer.wrap(repl);
365         CharBuffer cb = CharBuffer.allocate((int)(bb.remaining()
366                                                   * dec.maxCharsPerByte()));
367         CoderResult cr = dec.decode(bb, cb, true);
368         return !cr.isError();
369     }
370 
371 
372 
373     /**
374      * Returns this encoder's current action for malformed-input errors.
375      *
376      * @return The current malformed-input action, which is never <tt>null</tt>
377      */
malformedInputAction()378     public CodingErrorAction malformedInputAction() {
379         return malformedInputAction;
380     }
381 
382     /**
383      * Changes this encoder's action for malformed-input errors.
384      *
385      * <p> This method invokes the {@link #implOnMalformedInput
386      * implOnMalformedInput} method, passing the new action.  </p>
387      *
388      * @param  newAction  The new action; must not be <tt>null</tt>
389      *
390      * @return  This encoder
391      *
392      * @throws IllegalArgumentException
393      *         If the precondition on the parameter does not hold
394      */
onMalformedInput(CodingErrorAction newAction)395     public final CharsetEncoder onMalformedInput(CodingErrorAction newAction) {
396         if (newAction == null)
397             throw new IllegalArgumentException("Null action");
398         malformedInputAction = newAction;
399         implOnMalformedInput(newAction);
400         return this;
401     }
402 
403     /**
404      * Reports a change to this encoder's malformed-input action.
405      *
406      * <p> The default implementation of this method does nothing.  This method
407      * should be overridden by encoders that require notification of changes to
408      * the malformed-input action.  </p>
409      *
410      * @param  newAction  The new action
411      */
implOnMalformedInput(CodingErrorAction newAction)412     protected void implOnMalformedInput(CodingErrorAction newAction) { }
413 
414     /**
415      * Returns this encoder's current action for unmappable-character errors.
416      *
417      * @return The current unmappable-character action, which is never
418      *         <tt>null</tt>
419      */
unmappableCharacterAction()420     public CodingErrorAction unmappableCharacterAction() {
421         return unmappableCharacterAction;
422     }
423 
424     /**
425      * Changes this encoder's action for unmappable-character errors.
426      *
427      * <p> This method invokes the {@link #implOnUnmappableCharacter
428      * implOnUnmappableCharacter} method, passing the new action.  </p>
429      *
430      * @param  newAction  The new action; must not be <tt>null</tt>
431      *
432      * @return  This encoder
433      *
434      * @throws IllegalArgumentException
435      *         If the precondition on the parameter does not hold
436      */
onUnmappableCharacter(CodingErrorAction newAction)437     public final CharsetEncoder onUnmappableCharacter(CodingErrorAction
438                                                       newAction)
439     {
440         if (newAction == null)
441             throw new IllegalArgumentException("Null action");
442         unmappableCharacterAction = newAction;
443         implOnUnmappableCharacter(newAction);
444         return this;
445     }
446 
447     /**
448      * Reports a change to this encoder's unmappable-character action.
449      *
450      * <p> The default implementation of this method does nothing.  This method
451      * should be overridden by encoders that require notification of changes to
452      * the unmappable-character action.  </p>
453      *
454      * @param  newAction  The new action
455      */
implOnUnmappableCharacter(CodingErrorAction newAction)456     protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }
457 
458     /**
459      * Returns the average number of bytes that will be produced for each
460      * character of input.  This heuristic value may be used to estimate the size
461      * of the output buffer required for a given input sequence.
462      *
463      * @return  The average number of bytes produced
464      *          per character of input
465      */
averageBytesPerChar()466     public final float averageBytesPerChar() {
467         return averageBytesPerChar;
468     }
469 
470     /**
471      * Returns the maximum number of bytes that will be produced for each
472      * character of input.  This value may be used to compute the worst-case size
473      * of the output buffer required for a given input sequence.
474      *
475      * @return  The maximum number of bytes that will be produced per
476      *          character of input
477      */
maxBytesPerChar()478     public final float maxBytesPerChar() {
479         return maxBytesPerChar;
480     }
481 
482     /**
483      * Encodes as many characters as possible from the given input buffer,
484      * writing the results to the given output buffer.
485      *
486      * <p> The buffers are read from, and written to, starting at their current
487      * positions.  At most {@link Buffer#remaining in.remaining()} characters
488      * will be read and at most {@link Buffer#remaining out.remaining()}
489      * bytes will be written.  The buffers' positions will be advanced to
490      * reflect the characters read and the bytes written, but their marks and
491      * limits will not be modified.
492      *
493      * <p> In addition to reading characters from the input buffer and writing
494      * bytes to the output buffer, this method returns a {@link CoderResult}
495      * object to describe its reason for termination:
496      *
497      * <ul>
498      *
499      *   <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the
500      *   input buffer as possible has been encoded.  If there is no further
501      *   input then the invoker can proceed to the next step of the
502      *   <a href="#steps">encoding operation</a>.  Otherwise this method
503      *   should be invoked again with further input.  </p></li>
504      *
505      *   <li><p> {@link CoderResult#OVERFLOW} indicates that there is
506      *   insufficient space in the output buffer to encode any more characters.
507      *   This method should be invoked again with an output buffer that has
508      *   more {@linkplain Buffer#remaining remaining} bytes. This is
509      *   typically done by draining any encoded bytes from the output
510      *   buffer.  </p></li>
511      *
512      *   <li><p> A {@linkplain CoderResult#malformedForLength
513      *   malformed-input} result indicates that a malformed-input
514      *   error has been detected.  The malformed characters begin at the input
515      *   buffer's (possibly incremented) position; the number of malformed
516      *   characters may be determined by invoking the result object's {@link
517      *   CoderResult#length() length} method.  This case applies only if the
518      *   {@linkplain #onMalformedInput malformed action} of this encoder
519      *   is {@link CodingErrorAction#REPORT}; otherwise the malformed input
520      *   will be ignored or replaced, as requested.  </p></li>
521      *
522      *   <li><p> An {@linkplain CoderResult#unmappableForLength
523      *   unmappable-character} result indicates that an
524      *   unmappable-character error has been detected.  The characters that
525      *   encode the unmappable character begin at the input buffer's (possibly
526      *   incremented) position; the number of such characters may be determined
527      *   by invoking the result object's {@link CoderResult#length() length}
528      *   method.  This case applies only if the {@linkplain #onUnmappableCharacter
529      *   unmappable action} of this encoder is {@link
530      *   CodingErrorAction#REPORT}; otherwise the unmappable character will be
531      *   ignored or replaced, as requested.  </p></li>
532      *
533      * </ul>
534      *
535      * In any case, if this method is to be reinvoked in the same encoding
536      * operation then care should be taken to preserve any characters remaining
537      * in the input buffer so that they are available to the next invocation.
538      *
539      * <p> The <tt>endOfInput</tt> parameter advises this method as to whether
540      * the invoker can provide further input beyond that contained in the given
541      * input buffer.  If there is a possibility of providing additional input
542      * then the invoker should pass <tt>false</tt> for this parameter; if there
543      * is no possibility of providing further input then the invoker should
544      * pass <tt>true</tt>.  It is not erroneous, and in fact it is quite
545      * common, to pass <tt>false</tt> in one invocation and later discover that
546      * no further input was actually available.  It is critical, however, that
547      * the final invocation of this method in a sequence of invocations always
548      * pass <tt>true</tt> so that any remaining unencoded input will be treated
549      * as being malformed.
550      *
551      * <p> This method works by invoking the {@link #encodeLoop encodeLoop}
552      * method, interpreting its results, handling error conditions, and
553      * reinvoking it as necessary.  </p>
554      *
555      *
556      * @param  in
557      *         The input character buffer
558      *
559      * @param  out
560      *         The output byte buffer
561      *
562      * @param  endOfInput
563      *         <tt>true</tt> if, and only if, the invoker can provide no
564      *         additional input characters beyond those in the given buffer
565      *
566      * @return  A coder-result object describing the reason for termination
567      *
568      * @throws  IllegalStateException
569      *          If an encoding operation is already in progress and the previous
570      *          step was an invocation neither of the {@link #reset reset}
571      *          method, nor of this method with a value of <tt>false</tt> for
572      *          the <tt>endOfInput</tt> parameter, nor of this method with a
573      *          value of <tt>true</tt> for the <tt>endOfInput</tt> parameter
574      *          but a return value indicating an incomplete encoding operation
575      *
576      * @throws  CoderMalfunctionError
577      *          If an invocation of the encodeLoop method threw
578      *          an unexpected exception
579      */
encode(CharBuffer in, ByteBuffer out, boolean endOfInput)580     public final CoderResult encode(CharBuffer in, ByteBuffer out,
581                                     boolean endOfInput)
582     {
583         int newState = endOfInput ? ST_END : ST_CODING;
584         if ((state != ST_RESET) && (state != ST_CODING)
585             && !(endOfInput && (state == ST_END)))
586             throwIllegalStateException(state, newState);
587         state = newState;
588 
589         for (;;) {
590 
591             CoderResult cr;
592             try {
593                 cr = encodeLoop(in, out);
594             } catch (BufferUnderflowException x) {
595                 throw new CoderMalfunctionError(x);
596             } catch (BufferOverflowException x) {
597                 throw new CoderMalfunctionError(x);
598             }
599 
600             if (cr.isOverflow())
601                 return cr;
602 
603             if (cr.isUnderflow()) {
604                 if (endOfInput && in.hasRemaining()) {
605                     cr = CoderResult.malformedForLength(in.remaining());
606                     // Fall through to malformed-input case
607                 } else {
608                     return cr;
609                 }
610             }
611 
612             CodingErrorAction action = null;
613             if (cr.isMalformed())
614                 action = malformedInputAction;
615             else if (cr.isUnmappable())
616                 action = unmappableCharacterAction;
617             else
618                 assert false : cr.toString();
619 
620             if (action == CodingErrorAction.REPORT)
621                 return cr;
622 
623             if (action == CodingErrorAction.REPLACE) {
624                 if (out.remaining() < replacement.length)
625                     return CoderResult.OVERFLOW;
626                 out.put(replacement);
627             }
628 
629             if ((action == CodingErrorAction.IGNORE)
630                 || (action == CodingErrorAction.REPLACE)) {
631                 // Skip erroneous input either way
632                 in.position(in.position() + cr.length());
633                 continue;
634             }
635 
636             assert false;
637         }
638 
639     }
640 
641     /**
642      * Flushes this encoder.
643      *
644      * <p> Some encoders maintain internal state and may need to write some
645      * final bytes to the output buffer once the overall input sequence has
646      * been read.
647      *
648      * <p> Any additional output is written to the output buffer beginning at
649      * its current position.  At most {@link Buffer#remaining out.remaining()}
650      * bytes will be written.  The buffer's position will be advanced
651      * appropriately, but its mark and limit will not be modified.
652      *
653      * <p> If this method completes successfully then it returns {@link
654      * CoderResult#UNDERFLOW}.  If there is insufficient room in the output
655      * buffer then it returns {@link CoderResult#OVERFLOW}.  If this happens
656      * then this method must be invoked again, with an output buffer that has
657      * more room, in order to complete the current <a href="#steps">encoding
658      * operation</a>.
659      *
660      * <p> If this encoder has already been flushed then invoking this method
661      * has no effect.
662      *
663      * <p> This method invokes the {@link #implFlush implFlush} method to
664      * perform the actual flushing operation.  </p>
665      *
666      * @param  out
667      *         The output byte buffer
668      *
669      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or
670      *          {@link CoderResult#OVERFLOW}
671      *
672      * @throws  IllegalStateException
673      *          If the previous step of the current encoding operation was an
674      *          invocation neither of the {@link #flush flush} method nor of
675      *          the three-argument {@link
676      *          #encode(CharBuffer,ByteBuffer,boolean) encode} method
677      *          with a value of <tt>true</tt> for the <tt>endOfInput</tt>
678      *          parameter
679      */
flush(ByteBuffer out)680     public final CoderResult flush(ByteBuffer out) {
681         if (state == ST_END) {
682             CoderResult cr = implFlush(out);
683             if (cr.isUnderflow())
684                 state = ST_FLUSHED;
685             return cr;
686         }
687 
688         if (state != ST_FLUSHED)
689             throwIllegalStateException(state, ST_FLUSHED);
690 
691         return CoderResult.UNDERFLOW; // Already flushed
692     }
693 
694     /**
695      * Flushes this encoder.
696      *
697      * <p> The default implementation of this method does nothing, and always
698      * returns {@link CoderResult#UNDERFLOW}.  This method should be overridden
699      * by encoders that may need to write final bytes to the output buffer
700      * once the entire input sequence has been read. </p>
701      *
702      * @param  out
703      *         The output byte buffer
704      *
705      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or
706      *          {@link CoderResult#OVERFLOW}
707      */
implFlush(ByteBuffer out)708     protected CoderResult implFlush(ByteBuffer out) {
709         return CoderResult.UNDERFLOW;
710     }
711 
712     /**
713      * Resets this encoder, clearing any internal state.
714      *
715      * <p> This method resets charset-independent state and also invokes the
716      * {@link #implReset() implReset} method in order to perform any
717      * charset-specific reset actions.  </p>
718      *
719      * @return  This encoder
720      *
721      */
reset()722     public final CharsetEncoder reset() {
723         implReset();
724         state = ST_RESET;
725         return this;
726     }
727 
728     /**
729      * Resets this encoder, clearing any charset-specific internal state.
730      *
731      * <p> The default implementation of this method does nothing.  This method
732      * should be overridden by encoders that maintain internal state.  </p>
733      */
implReset()734     protected void implReset() { }
735 
736     /**
737      * Encodes one or more characters into one or more bytes.
738      *
739      * <p> This method encapsulates the basic encoding loop, encoding as many
740      * characters as possible until it either runs out of input, runs out of room
741      * in the output buffer, or encounters an encoding error.  This method is
742      * invoked by the {@link #encode encode} method, which handles result
743      * interpretation and error recovery.
744      *
745      * <p> The buffers are read from, and written to, starting at their current
746      * positions.  At most {@link Buffer#remaining in.remaining()} characters
747      * will be read, and at most {@link Buffer#remaining out.remaining()}
748      * bytes will be written.  The buffers' positions will be advanced to
749      * reflect the characters read and the bytes written, but their marks and
750      * limits will not be modified.
751      *
752      * <p> This method returns a {@link CoderResult} object to describe its
753      * reason for termination, in the same manner as the {@link #encode encode}
754      * method.  Most implementations of this method will handle encoding errors
755      * by returning an appropriate result object for interpretation by the
756      * {@link #encode encode} method.  An optimized implementation may instead
757      * examine the relevant error action and implement that action itself.
758      *
759      * <p> An implementation of this method may perform arbitrary lookahead by
760      * returning {@link CoderResult#UNDERFLOW} until it receives sufficient
761      * input.  </p>
762      *
763      * @param  in
764      *         The input character buffer
765      *
766      * @param  out
767      *         The output byte buffer
768      *
769      * @return  A coder-result object describing the reason for termination
770      */
encodeLoop(CharBuffer in, ByteBuffer out)771     protected abstract CoderResult encodeLoop(CharBuffer in,
772                                               ByteBuffer out);
773 
774     /**
775      * Convenience method that encodes the remaining content of a single input
776      * character buffer into a newly-allocated byte buffer.
777      *
778      * <p> This method implements an entire <a href="#steps">encoding
779      * operation</a>; that is, it resets this encoder, then it encodes the
780      * characters in the given character buffer, and finally it flushes this
781      * encoder.  This method should therefore not be invoked if an encoding
782      * operation is already in progress.  </p>
783      *
784      * @param  in
785      *         The input character buffer
786      *
787      * @return A newly-allocated byte buffer containing the result of the
788      *         encoding operation.  The buffer's position will be zero and its
789      *         limit will follow the last byte written.
790      *
791      * @throws  IllegalStateException
792      *          If an encoding operation is already in progress
793      *
794      * @throws  MalformedInputException
795      *          If the character sequence starting at the input buffer's current
796      *          position is not a legal sixteen-bit Unicode sequence and the current malformed-input action
797      *          is {@link CodingErrorAction#REPORT}
798      *
799      * @throws  UnmappableCharacterException
800      *          If the character sequence starting at the input buffer's current
801      *          position cannot be mapped to an equivalent byte sequence and
802      *          the current unmappable-character action is {@link
803      *          CodingErrorAction#REPORT}
804      */
encode(CharBuffer in)805     public final ByteBuffer encode(CharBuffer in)
806         throws CharacterCodingException
807     {
808         int n = (int)(in.remaining() * averageBytesPerChar());
809         ByteBuffer out = ByteBuffer.allocate(n);
810 
811         if ((n == 0) && (in.remaining() == 0))
812             return out;
813         reset();
814         for (;;) {
815             CoderResult cr = in.hasRemaining() ?
816                 encode(in, out, true) : CoderResult.UNDERFLOW;
817             if (cr.isUnderflow())
818                 cr = flush(out);
819 
820             if (cr.isUnderflow())
821                 break;
822             if (cr.isOverflow()) {
823                 n = 2*n + 1;    // Ensure progress; n might be 0!
824                 ByteBuffer o = ByteBuffer.allocate(n);
825                 out.flip();
826                 o.put(out);
827                 out = o;
828                 continue;
829             }
830             cr.throwException();
831         }
832         out.flip();
833         return out;
834     }
835 
836 
837 
838 
839 
840 
841 
842 
843 
844 
845 
846 
847 
848 
849 
850 
851 
852 
853 
854 
855 
856 
857 
858 
859 
860 
861 
862 
863 
864 
865 
866 
867 
868 
869 
870 
871 
872 
873 
874 
875 
876 
877 
878 
879 
880 
881 
882 
883 
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896 
897 
898 
899 
900 
901 
902 
903 
904 
905 
906 
907 
908 
909 
910 
911 
912 
913 
canEncode(CharBuffer cb)914     private boolean canEncode(CharBuffer cb) {
915         // Empty buffers or char-sequences are always encodable by definition.
916         if (!cb.hasRemaining()) {
917             return true;
918         }
919 
920         if (state == ST_FLUSHED)
921             reset();
922         else if (state != ST_RESET)
923             throwIllegalStateException(state, ST_CODING);
924         CodingErrorAction ma = malformedInputAction();
925         CodingErrorAction ua = unmappableCharacterAction();
926         try {
927             onMalformedInput(CodingErrorAction.REPORT);
928             onUnmappableCharacter(CodingErrorAction.REPORT);
929             // Android-changed: Account for ignorable codepoints. ICU doesn't report
930             // an error, but will return an empty buffer.
931             ByteBuffer buf = encode(cb);
932             return buf.hasRemaining();
933         } catch (CharacterCodingException x) {
934             // fall through to return false.
935         } finally {
936             onMalformedInput(ma);
937             onUnmappableCharacter(ua);
938             reset();
939         }
940         return false;
941     }
942 
943     /**
944      * Tells whether or not this encoder can encode the given character.
945      *
946      * <p> This method returns <tt>false</tt> if the given character is a
947      * surrogate character; such characters can be interpreted only when they
948      * are members of a pair consisting of a high surrogate followed by a low
949      * surrogate.  The {@link #canEncode(java.lang.CharSequence)
950      * canEncode(CharSequence)} method may be used to test whether or not a
951      * character sequence can be encoded.
952      *
953      * <p> This method may modify this encoder's state; it should therefore not
954      * be invoked if an <a href="#steps">encoding operation</a> is already in
955      * progress.
956      *
957      * <p> The default implementation of this method is not very efficient; it
958      * should generally be overridden to improve performance.  </p>
959      *
960      * @param   c
961      *          The given character
962      *
963      * @return  <tt>true</tt> if, and only if, this encoder can encode
964      *          the given character
965      *
966      * @throws  IllegalStateException
967      *          If an encoding operation is already in progress
968      */
canEncode(char c)969     public boolean canEncode(char c) {
970         CharBuffer cb = CharBuffer.allocate(1);
971         cb.put(c);
972         cb.flip();
973         return canEncode(cb);
974     }
975 
976     /**
977      * Tells whether or not this encoder can encode the given character
978      * sequence.
979      *
980      * <p> If this method returns <tt>false</tt> for a particular character
981      * sequence then more information about why the sequence cannot be encoded
982      * may be obtained by performing a full <a href="#steps">encoding
983      * operation</a>.
984      *
985      * <p> This method may modify this encoder's state; it should therefore not
986      * be invoked if an encoding operation is already in progress.
987      *
988      * <p> The default implementation of this method is not very efficient; it
989      * should generally be overridden to improve performance.  </p>
990      *
991      * @param   cs
992      *          The given character sequence
993      *
994      * @return  <tt>true</tt> if, and only if, this encoder can encode
995      *          the given character without throwing any exceptions and without
996      *          performing any replacements
997      *
998      * @throws  IllegalStateException
999      *          If an encoding operation is already in progress
1000      */
canEncode(CharSequence cs)1001     public boolean canEncode(CharSequence cs) {
1002         CharBuffer cb;
1003         if (cs instanceof CharBuffer)
1004             cb = ((CharBuffer)cs).duplicate();
1005         else
1006             cb = CharBuffer.wrap(cs);
1007         return canEncode(cb);
1008     }
1009 
1010 
1011 
1012 
throwIllegalStateException(int from, int to)1013     private void throwIllegalStateException(int from, int to) {
1014         throw new IllegalStateException("Current state = " + stateNames[from]
1015                                         + ", new state = " + stateNames[to]);
1016     }
1017 
1018 }
1019