1 /*
2  * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $
3  * $Revision: 602520 $
4  * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $
5  *
6  * ====================================================================
7  * Licensed to the Apache Software Foundation (ASF) under one
8  * or more contributor license agreements.  See the NOTICE file
9  * distributed with this work for additional information
10  * regarding copyright ownership.  The ASF licenses this file
11  * to you under the Apache License, Version 2.0 (the
12  * "License"); you may not use this file except in compliance
13  * with the License.  You may obtain a copy of the License at
14  *
15  *   http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing,
18  * software distributed under the License is distributed on an
19  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20  * KIND, either express or implied.  See the License for the
21  * specific language governing permissions and limitations
22  * under the License.
23  * ====================================================================
24  *
25  * This software consists of voluntary contributions made by many
26  * individuals on behalf of the Apache Software Foundation.  For more
27  * information on the Apache Software Foundation, please see
28  * <http://www.apache.org/>.
29  *
30  */
31 
32 package org.apache.http.message;
33 
34 import java.util.NoSuchElementException;
35 
36 import org.apache.http.HeaderIterator;
37 import org.apache.http.ParseException;
38 import org.apache.http.TokenIterator;
39 
40 /**
41  * Basic implementation of a {@link TokenIterator}.
42  * This implementation parses <tt>#token<tt> sequences as
43  * defined by RFC 2616, section 2.
44  * It extends that definition somewhat beyond US-ASCII.
45  *
46  * @version $Revision: 602520 $
47  *
48  * @deprecated Please use {@link java.net.URL#openConnection} instead.
49  *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
50  *     for further details.
51  */
52 @Deprecated
53 public class BasicTokenIterator implements TokenIterator {
54 
55     /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
56     // the order of the characters here is adjusted to put the
57     // most likely candidates at the beginning of the collection
58     public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
59 
60 
61     /** The iterator from which to obtain the next header. */
62     protected final HeaderIterator headerIt;
63 
64     /**
65      * The value of the current header.
66      * This is the header value that includes {@link #currentToken}.
67      * Undefined if the iteration is over.
68      */
69     protected String currentHeader;
70 
71     /**
72      * The token to be returned by the next call to {@link #currentToken}.
73      * <code>null</code> if the iteration is over.
74      */
75     protected String currentToken;
76 
77     /**
78      * The position after {@link #currentToken} in {@link #currentHeader}.
79      * Undefined if the iteration is over.
80      */
81     protected int searchPos;
82 
83 
84     /**
85      * Creates a new instance of {@link BasicTokenIterator}.
86      *
87      * @param headerIterator    the iterator for the headers to tokenize
88      */
89     public BasicTokenIterator(final HeaderIterator headerIterator) {
90         if (headerIterator == null) {
91             throw new IllegalArgumentException
92                 ("Header iterator must not be null.");
93         }
94 
95         this.headerIt = headerIterator;
96         this.searchPos = findNext(-1);
97     }
98 
99 
100     // non-javadoc, see interface TokenIterator
101     public boolean hasNext() {
102         return (this.currentToken != null);
103     }
104 
105 
106     /**
107      * Obtains the next token from this iteration.
108      *
109      * @return  the next token in this iteration
110      *
111      * @throws NoSuchElementException   if the iteration is already over
112      * @throws ParseException   if an invalid header value is encountered
113      */
114     public String nextToken()
115         throws NoSuchElementException, ParseException {
116 
117         if (this.currentToken == null) {
118             throw new NoSuchElementException("Iteration already finished.");
119         }
120 
121         final String result = this.currentToken;
122         // updates currentToken, may trigger ParseException:
123         this.searchPos = findNext(this.searchPos);
124 
125         return result;
126     }
127 
128 
129     /**
130      * Returns the next token.
131      * Same as {@link #nextToken}, but with generic return type.
132      *
133      * @return  the next token in this iteration
134      *
135      * @throws NoSuchElementException   if there are no more tokens
136      * @throws ParseException   if an invalid header value is encountered
137      */
138     public final Object next()
139         throws NoSuchElementException, ParseException {
140         return nextToken();
141     }
142 
143 
144     /**
145      * Removing tokens is not supported.
146      *
147      * @throws UnsupportedOperationException    always
148      */
149     public final void remove()
150         throws UnsupportedOperationException {
151 
152         throw new UnsupportedOperationException
153             ("Removing tokens is not supported.");
154     }
155 
156 
157     /**
158      * Determines the next token.
159      * If found, the token is stored in {@link #currentToken}.
160      * The return value indicates the position after the token
161      * in {@link #currentHeader}. If necessary, the next header
162      * will be obtained from {@link #headerIt}.
163      * If not found, {@link #currentToken} is set to <code>null</code>.
164      *
165      * @param from      the position in the current header at which to
166      *                  start the search, -1 to search in the first header
167      *
168      * @return  the position after the found token in the current header, or
169      *          negative if there was no next token
170      *
171      * @throws ParseException   if an invalid header value is encountered
172      */
173     protected int findNext(int from)
174         throws ParseException {
175 
176         if (from < 0) {
177             // called from the constructor, initialize the first header
178             if (!this.headerIt.hasNext()) {
179                 return -1;
180             }
181             this.currentHeader = this.headerIt.nextHeader().getValue();
182             from = 0;
183         } else {
184             // called after a token, make sure there is a separator
185             from = findTokenSeparator(from);
186         }
187 
188         int start = findTokenStart(from);
189         if (start < 0) {
190             this.currentToken = null;
191             return -1; // nothing found
192         }
193 
194         int end = findTokenEnd(start);
195         this.currentToken = createToken(this.currentHeader, start, end);
196         return end;
197     }
198 
199 
200     /**
201      * Creates a new token to be returned.
202      * Called from {@link #findNext findNext} after the token is identified.
203      * The default implementation simply calls
204      * {@link java.lang.String#substring String.substring}.
205      * <br/>
206      * If header values are significantly longer than tokens, and some
207      * tokens are permanently referenced by the application, there can
208      * be problems with garbage collection. A substring will hold a
209      * reference to the full characters of the original string and
210      * therefore occupies more memory than might be expected.
211      * To avoid this, override this method and create a new string
212      * instead of a substring.
213      *
214      * @param value     the full header value from which to create a token
215      * @param start     the index of the first token character
216      * @param end       the index after the last token character
217      *
218      * @return  a string representing the token identified by the arguments
219      */
220     protected String createToken(String value, int start, int end) {
221         return value.substring(start, end);
222     }
223 
224 
225     /**
226      * Determines the starting position of the next token.
227      * This method will iterate over headers if necessary.
228      *
229      * @param from      the position in the current header at which to
230      *                  start the search
231      *
232      * @return  the position of the token start in the current header,
233      *          negative if no token start could be found
234      */
235     protected int findTokenStart(int from) {
236         if (from < 0) {
237             throw new IllegalArgumentException
238                 ("Search position must not be negative: " + from);
239         }
240 
241         boolean found = false;
242         while (!found && (this.currentHeader != null)) {
243 
244             final int to = this.currentHeader.length();
245             while (!found && (from < to)) {
246 
247                 final char ch = this.currentHeader.charAt(from);
248                 if (isTokenSeparator(ch) || isWhitespace(ch)) {
249                     // whitspace and token separators are skipped
250                     from++;
251                 } else if (isTokenChar(this.currentHeader.charAt(from))) {
252                     // found the start of a token
253                     found = true;
254                 } else {
255                     throw new ParseException
256                         ("Invalid character before token (pos " + from +
257                          "): " + this.currentHeader);
258                 }
259             }
260             if (!found) {
261                 if (this.headerIt.hasNext()) {
262                     this.currentHeader = this.headerIt.nextHeader().getValue();
263                     from = 0;
264                 } else {
265                     this.currentHeader = null;
266                 }
267             }
268         } // while headers
269 
270         return found ? from : -1;
271     }
272 
273 
274     /**
275      * Determines the position of the next token separator.
276      * Because of multi-header joining rules, the end of a
277      * header value is a token separator. This method does
278      * therefore not need to iterate over headers.
279      *
280      * @param from      the position in the current header at which to
281      *                  start the search
282      *
283      * @return  the position of a token separator in the current header,
284      *          or at the end
285      *
286      * @throws ParseException
287      *         if a new token is found before a token separator.
288      *         RFC 2616, section 2.1 explicitly requires a comma between
289      *         tokens for <tt>#</tt>.
290      */
291     protected int findTokenSeparator(int from) {
292         if (from < 0) {
293             throw new IllegalArgumentException
294                 ("Search position must not be negative: " + from);
295         }
296 
297         boolean found = false;
298         final int to = this.currentHeader.length();
299         while (!found && (from < to)) {
300             final char ch = this.currentHeader.charAt(from);
301             if (isTokenSeparator(ch)) {
302                 found = true;
303             } else if (isWhitespace(ch)) {
304                 from++;
305             } else if (isTokenChar(ch)) {
306                 throw new ParseException
307                     ("Tokens without separator (pos " + from +
308                      "): " + this.currentHeader);
309             } else {
310                 throw new ParseException
311                     ("Invalid character after token (pos " + from +
312                      "): " + this.currentHeader);
313             }
314         }
315 
316         return from;
317     }
318 
319 
320     /**
321      * Determines the ending position of the current token.
322      * This method will not leave the current header value,
323      * since the end of the header value is a token boundary.
324      *
325      * @param from      the position of the first character of the token
326      *
327      * @return  the position after the last character of the token.
328      *          The behavior is undefined if <code>from</code> does not
329      *          point to a token character in the current header value.
330      */
331     protected int findTokenEnd(int from) {
332         if (from < 0) {
333             throw new IllegalArgumentException
334                 ("Token start position must not be negative: " + from);
335         }
336 
337         final int to = this.currentHeader.length();
338         int end = from+1;
339         while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
340             end++;
341         }
342 
343         return end;
344     }
345 
346 
347     /**
348      * Checks whether a character is a token separator.
349      * RFC 2616, section 2.1 defines comma as the separator for
350      * <tt>#token</tt> sequences. The end of a header value will
351      * also separate tokens, but that is not a character check.
352      *
353      * @param ch        the character to check
354      *
355      * @return  <code>true</code> if the character is a token separator,
356      *          <code>false</code> otherwise
357      */
358     protected boolean isTokenSeparator(char ch) {
359         return (ch == ',');
360     }
361 
362 
363     /**
364      * Checks whether a character is a whitespace character.
365      * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
366      * The optional preceeding line break is irrelevant, since header
367      * continuation is handled transparently when parsing messages.
368      *
369      * @param ch        the character to check
370      *
371      * @return  <code>true</code> if the character is whitespace,
372      *          <code>false</code> otherwise
373      */
374     protected boolean isWhitespace(char ch) {
375 
376         // we do not use Character.isWhitspace(ch) here, since that allows
377         // many control characters which are not whitespace as per RFC 2616
378         return ((ch == '\t') || Character.isSpaceChar(ch));
379     }
380 
381 
382     /**
383      * Checks whether a character is a valid token character.
384      * Whitespace, control characters, and HTTP separators are not
385      * valid token characters. The HTTP specification (RFC 2616, section 2.2)
386      * defines tokens only for the US-ASCII character set, this
387      * method extends the definition to other character sets.
388      *
389      * @param ch        the character to check
390      *
391      * @return  <code>true</code> if the character is a valid token start,
392      *          <code>false</code> otherwise
393      */
394     protected boolean isTokenChar(char ch) {
395 
396         // common sense extension of ALPHA + DIGIT
397         if (Character.isLetterOrDigit(ch))
398             return true;
399 
400         // common sense extension of CTL
401         if (Character.isISOControl(ch))
402             return false;
403 
404         // no common sense extension for this
405         if (isHttpSeparator(ch))
406             return false;
407 
408         // RFC 2616, section 2.2 defines a token character as
409         // "any CHAR except CTLs or separators". The controls
410         // and separators are included in the checks above.
411         // This will yield unexpected results for Unicode format characters.
412         // If that is a problem, overwrite isHttpSeparator(char) to filter
413         // out the false positives.
414         return true;
415     }
416 
417 
418     /**
419      * Checks whether a character is an HTTP separator.
420      * The implementation in this class checks only for the HTTP separators
421      * defined in RFC 2616, section 2.2. If you need to detect other
422      * separators beyond the US-ASCII character set, override this method.
423      *
424      * @param ch        the character to check
425      *
426      * @return  <code>true</code> if the character is an HTTP separator
427      */
428     protected boolean isHttpSeparator(char ch) {
429         return (HTTP_SEPARATORS.indexOf(ch) >= 0);
430     }
431 
432 
433 } // class BasicTokenIterator
434 
435