1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.net;
19 
20 import java.io.IOException;
21 import java.io.ObjectInputStream;
22 import java.io.ObjectOutputStream;
23 import java.io.Serializable;
24 import java.util.Locale;
25 import libcore.net.UriCodec;
26 import libcore.net.url.UrlUtils;
27 
28 /**
29  * A Uniform Resource Identifier that identifies an abstract or physical
30  * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31  * 2396</a>.
32  *
33  * <h3>Parts of a URI</h3>
34  * A URI is composed of many parts. This class can both parse URI strings into
35  * parts and compose URI strings from parts. For example, consider the parts of
36  * this URI:
37  * {@code http://username:password@host:8080/directory/file?query#fragment}
38  * <table>
39  * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40  * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41  * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42  * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43  * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44  * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45  * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46  * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47  * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48  * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49  * </table>
50  *
51  * <h3>Absolute vs. Relative URIs</h3>
52  * URIs are either {@link #isAbsolute() absolute or relative}.
53  * <ul>
54  *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55  *     <li><strong>Relative:</strong> {@code robots.txt}
56  * </ul>
57  *
58  * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59  * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60  *
61  * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62  * have the absolute URI that a relative URI is relative to, you can use {@link
63  * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64  * {@link #relativize} to compute the relative URI from one URI to another.
65  * <pre>   {@code
66  *   URI absolute = new URI("http://android.com/");
67  *   URI relative = new URI("robots.txt");
68  *   URI resolved = new URI("http://android.com/robots.txt");
69  *
70  *   // print "http://android.com/robots.txt"
71  *   System.out.println(absolute.resolve(relative));
72  *
73  *   // print "robots.txt"
74  *   System.out.println(absolute.relativize(resolved));
75  * }</pre>
76  *
77  * <h3>Opaque vs. Hierarchical URIs</h3>
78  * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79  * URIs are always hierarchical.
80  * <ul>
81  *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82  *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83  * </ul>
84  *
85  * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86  * begin with the slash character: {@code /}. The contents of the
87  * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88  * has an authority, user info, host, port, path or query. An opaque URIs may
89  * have a fragment, however. A typical opaque URI is
90  * {@code mailto:robots@example.com}.
91  * <table>
92  * <tr><th>Component           </th><th>Example value             </th></tr>
93  * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94  * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95  * <tr><td>Fragment            </td><td>                          </td></tr>
96  * </table>
97  * <p>Hierarchical URIs may have values for any URL component. They always
98  * have a non-null path, though that path may be the empty string.
99  *
100  * <h3>Encoding and Decoding URI Components</h3>
101  * Each component of a URI permits a limited set of legal characters. Other
102  * characters must first be <i>encoded</i> before they can be embedded in a URI.
103  * To recover the original characters from a URI, they may be <i>decoded</i>.
104  * <strong>Contrary to what you might expect,</strong> this class uses the
105  * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106  * return decoded strings. For example, consider how this URI is decoded:
107  * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108  * <table>
109  * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110  * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111  * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112  * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113  * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114  * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115  * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116  * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117  * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118  * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119  * </table>
120  * A URI's host, port and scheme are not eligible for encoding and must not
121  * contain illegal characters.
122  *
123  * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124  * class. These constructors accept your original strings and encode them into
125  * their raw form.
126  *
127  * <p>To decode a URI, invoke the single-string constructor, and then use the
128  * appropriate accessor methods to get the decoded components.
129  *
130  * <p>The {@link URL} class can be used to retrieve resources by their URI.
131  */
132 public final class URI implements Comparable<URI>, Serializable {
133 
134     private static final long serialVersionUID = -6052424284110960213l;
135 
136     static final String UNRESERVED = "_-!.~\'()*";
137     static final String PUNCTUATION = ",;:$&+=";
138 
139     static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140     static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141     static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142 
143     /** for java.net.URL, which foolishly combines these two parts */
144     static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145 
146     /** for query, fragment, and scheme-specific part */
147     static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148 
149     /** Retains all ASCII chars including delimiters. */
150     private static final UriCodec ASCII_ONLY = new UriCodec() {
151         @Override protected boolean isRetained(char c) {
152             return c <= 127;
153         }
154     };
155 
156     /**
157      * Encodes the unescaped characters of {@code s} that are not permitted.
158      * Permitted characters are:
159      * <ul>
160      *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161      *   <li>{@code extraOkayChars},
162      *   <li>non-ASCII, non-control, non-whitespace characters
163      * </ul>
164      */
165     private static class PartEncoder extends UriCodec {
166         private final String extraLegalCharacters;
167 
PartEncoder(String extraLegalCharacters)168         PartEncoder(String extraLegalCharacters) {
169             this.extraLegalCharacters = extraLegalCharacters;
170         }
171 
isRetained(char c)172         @Override protected boolean isRetained(char c) {
173             return UNRESERVED.indexOf(c) != -1
174                     || PUNCTUATION.indexOf(c) != -1
175                     || extraLegalCharacters.indexOf(c) != -1
176                     || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177         }
178     }
179 
180     private String string;
181     private transient String scheme;
182     private transient String schemeSpecificPart;
183     private transient String authority;
184     private transient String userInfo;
185     private transient String host;
186     private transient int port = -1;
187     private transient String path;
188     private transient String query;
189     private transient String fragment;
190     private transient boolean opaque;
191     private transient boolean absolute;
192     private transient boolean serverAuthority = false;
193 
194     private transient int hash = -1;
195 
URI()196     private URI() {}
197 
198     /**
199      * Creates a new URI instance by parsing {@code spec}.
200      *
201      * @param spec a URI whose illegal characters have all been encoded.
202      */
URI(String spec)203     public URI(String spec) throws URISyntaxException {
204         parseURI(spec, false);
205     }
206 
207     /**
208      * Creates a new URI instance of the given unencoded component parts.
209      *
210      * @param scheme the URI scheme, or null for a non-absolute URI.
211      */
URI(String scheme, String schemeSpecificPart, String fragment)212     public URI(String scheme, String schemeSpecificPart, String fragment)
213             throws URISyntaxException {
214         StringBuilder uri = new StringBuilder();
215         if (scheme != null) {
216             uri.append(scheme);
217             uri.append(':');
218         }
219         if (schemeSpecificPart != null) {
220             ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221         }
222         if (fragment != null) {
223             uri.append('#');
224             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225         }
226 
227         parseURI(uri.toString(), false);
228     }
229 
230     /**
231      * Creates a new URI instance of the given unencoded component parts.
232      *
233      * @param scheme the URI scheme, or null for a non-absolute URI.
234      */
URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235     public URI(String scheme, String userInfo, String host, int port, String path, String query,
236             String fragment) throws URISyntaxException {
237         if (scheme == null && userInfo == null && host == null && path == null
238                 && query == null && fragment == null) {
239             this.path = "";
240             return;
241         }
242 
243         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244             throw new URISyntaxException(path, "Relative path");
245         }
246 
247         StringBuilder uri = new StringBuilder();
248         if (scheme != null) {
249             uri.append(scheme);
250             uri.append(':');
251         }
252 
253         if (userInfo != null || host != null || port != -1) {
254             uri.append("//");
255         }
256 
257         if (userInfo != null) {
258             USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259             uri.append('@');
260         }
261 
262         if (host != null) {
263             // check for IPv6 addresses that hasn't been enclosed in square brackets
264             if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                 host = "[" + host + "]";
266             }
267             uri.append(host);
268         }
269 
270         if (port != -1) {
271             uri.append(':');
272             uri.append(port);
273         }
274 
275         if (path != null) {
276             PATH_ENCODER.appendEncoded(uri, path);
277         }
278 
279         if (query != null) {
280             uri.append('?');
281             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282         }
283 
284         if (fragment != null) {
285             uri.append('#');
286             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287         }
288 
289         parseURI(uri.toString(), true);
290     }
291 
292     /**
293      * Creates a new URI instance of the given unencoded component parts.
294      *
295      * @param scheme the URI scheme, or null for a non-absolute URI.
296      */
URI(String scheme, String host, String path, String fragment)297     public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298         this(scheme, null, host, -1, path, null, fragment);
299     }
300 
301     /**
302      * Creates a new URI instance of the given unencoded component parts.
303      *
304      * @param scheme the URI scheme, or null for a non-absolute URI.
305      */
URI(String scheme, String authority, String path, String query, String fragment)306     public URI(String scheme, String authority, String path, String query,
307             String fragment) throws URISyntaxException {
308         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309             throw new URISyntaxException(path, "Relative path");
310         }
311 
312         StringBuilder uri = new StringBuilder();
313         if (scheme != null) {
314             uri.append(scheme);
315             uri.append(':');
316         }
317         if (authority != null) {
318             uri.append("//");
319             AUTHORITY_ENCODER.appendEncoded(uri, authority);
320         }
321 
322         if (path != null) {
323             PATH_ENCODER.appendEncoded(uri, path);
324         }
325         if (query != null) {
326             uri.append('?');
327             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328         }
329         if (fragment != null) {
330             uri.append('#');
331             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332         }
333 
334         parseURI(uri.toString(), false);
335     }
336 
337     /**
338      * Breaks uri into its component parts. This first splits URI into scheme,
339      * scheme-specific part and fragment:
340      *   [scheme:][scheme-specific part][#fragment]
341      *
342      * Then it breaks the scheme-specific part into authority, path and query:
343      *   [//authority][path][?query]
344      *
345      * Finally it delegates to parseAuthority to break the authority into user
346      * info, host and port:
347      *   [user-info@][host][:port]
348      */
parseURI(String uri, boolean forceServer)349     private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350         string = uri;
351 
352         // "#fragment"
353         int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354         if (fragmentStart < uri.length()) {
355             fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356         }
357 
358         // scheme:
359         int start;
360         int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361         if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362             absolute = true;
363             scheme = validateScheme(uri, colon);
364             start = colon + 1;
365 
366             if (start == fragmentStart) {
367                 throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368             }
369 
370             // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371             if (!uri.regionMatches(start, "/", 0, 1)) {
372                 opaque = true;
373                 schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                         uri, start, fragmentStart, "scheme specific part");
375                 return;
376             }
377         } else {
378             absolute = false;
379             start = 0;
380         }
381 
382         opaque = false;
383         schemeSpecificPart = uri.substring(start, fragmentStart);
384 
385         // "//authority"
386         int fileStart;
387         if (uri.regionMatches(start, "//", 0, 2)) {
388             int authorityStart = start + 2;
389             fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390             if (authorityStart == uri.length()) {
391                 throw new URISyntaxException(uri, "Authority expected", uri.length());
392             }
393             if (authorityStart < fileStart) {
394                 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395             }
396         } else {
397             fileStart = start;
398         }
399 
400         // "path"
401         int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402         path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403 
404         // "?query"
405         if (queryStart < fragmentStart) {
406             query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407         }
408 
409         parseAuthority(forceServer);
410     }
411 
validateScheme(String uri, int end)412     private String validateScheme(String uri, int end) throws URISyntaxException {
413         if (end == 0) {
414             throw new URISyntaxException(uri, "Scheme expected", 0);
415         }
416 
417         for (int i = 0; i < end; i++) {
418             if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                 throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420             }
421         }
422 
423         return uri.substring(0, end);
424     }
425 
426     /**
427      * Breaks this URI's authority into user info, host and port parts.
428      *   [user-info@][host][:port]
429      * If any part of this fails this method will give up and potentially leave
430      * these fields with their default values.
431      *
432      * @param forceServer true to always throw if the authority cannot be
433      *     parsed. If false, this method may still throw for some kinds of
434      *     errors; this unpredictable behavior is consistent with the RI.
435      */
parseAuthority(boolean forceServer)436     private void parseAuthority(boolean forceServer) throws URISyntaxException {
437         if (authority == null) {
438             return;
439         }
440 
441         String tempUserInfo = null;
442         String temp = authority;
443         int index = temp.indexOf('@');
444         int hostIndex = 0;
445         if (index != -1) {
446             // remove user info
447             tempUserInfo = temp.substring(0, index);
448             validateUserInfo(authority, tempUserInfo, 0);
449             temp = temp.substring(index + 1); // host[:port] is left
450             hostIndex = index + 1;
451         }
452 
453         index = temp.lastIndexOf(':');
454         int endIndex = temp.indexOf(']');
455 
456         String tempHost;
457         int tempPort = -1;
458         if (index != -1 && endIndex < index) {
459             // determine port and host
460             tempHost = temp.substring(0, index);
461 
462             if (index < (temp.length() - 1)) { // port part is not empty
463                 try {
464                     char firstPortChar = temp.charAt(index + 1);
465                     if (firstPortChar >= '0' && firstPortChar <= '9') {
466                         // allow only digits, no signs
467                         tempPort = Integer.parseInt(temp.substring(index + 1));
468                     } else {
469                         if (forceServer) {
470                             throw new URISyntaxException(authority,
471                                 "Invalid port number", hostIndex + index + 1);
472                         }
473                         return;
474                     }
475                 } catch (NumberFormatException e) {
476                     if (forceServer) {
477                         throw new URISyntaxException(authority,
478                                 "Invalid port number", hostIndex + index + 1);
479                     }
480                     return;
481                 }
482             }
483         } else {
484             tempHost = temp;
485         }
486 
487         if (tempHost.isEmpty()) {
488             if (forceServer) {
489                 throw new URISyntaxException(authority, "Expected host", hostIndex);
490             }
491             return;
492         }
493 
494         if (!isValidHost(forceServer, tempHost)) {
495             return;
496         }
497 
498         // this is a server based uri,
499         // fill in the userInfo, host and port fields
500         userInfo = tempUserInfo;
501         host = tempHost;
502         port = tempPort;
503         serverAuthority = true;
504     }
505 
validateUserInfo(String uri, String userInfo, int index)506     private void validateUserInfo(String uri, String userInfo, int index)
507             throws URISyntaxException {
508         for (int i = 0; i < userInfo.length(); i++) {
509             char ch = userInfo.charAt(i);
510             if (ch == ']' || ch == '[') {
511                 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
512             }
513         }
514     }
515 
516     /**
517      * Returns true if {@code host} is a well-formed host name or IP address.
518      *
519      * @param forceServer true to always throw if the host cannot be parsed. If
520      *     false, this method may still throw for some kinds of errors; this
521      *     unpredictable behavior is consistent with the RI.
522      */
isValidHost(boolean forceServer, String host)523     private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
524         if (host.startsWith("[")) {
525             // IPv6 address
526             if (!host.endsWith("]")) {
527                 throw new URISyntaxException(host,
528                         "Expected a closing square bracket for IPv6 address", 0);
529             }
530             if (InetAddress.isNumeric(host)) {
531                 // If it's numeric, the presence of square brackets guarantees
532                 // that it's a numeric IPv6 address.
533                 return true;
534             }
535             throw new URISyntaxException(host, "Malformed IPv6 address");
536         }
537 
538         // '[' and ']' can only be the first char and last char
539         // of the host name
540         if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
541             throw new URISyntaxException(host, "Illegal character in host name", 0);
542         }
543 
544         int index = host.lastIndexOf('.');
545         if (index < 0 || index == host.length() - 1
546                 || !Character.isDigit(host.charAt(index + 1))) {
547             // domain name
548             if (isValidDomainName(host)) {
549                 return true;
550             }
551             if (forceServer) {
552                 throw new URISyntaxException(host, "Illegal character in host name", 0);
553             }
554             return false;
555         }
556 
557         // IPv4 address?
558         try {
559             InetAddress ia = InetAddress.parseNumericAddress(host);
560             if (ia instanceof Inet4Address) {
561                 return true;
562             }
563         } catch (IllegalArgumentException ignored) {
564         }
565 
566         if (forceServer) {
567             throw new URISyntaxException(host, "Malformed IPv4 address", 0);
568         }
569         return false;
570     }
571 
isValidDomainName(String host)572     private boolean isValidDomainName(String host) {
573         try {
574             // The RFCs don't permit underscores in hostnames, but URI has to because
575             // a certain large website doesn't seem to care about standards and specs.
576             // See bugs 18023709, 17579865 and 18016625.
577             UriCodec.validateSimple(host, "_-.");
578         } catch (URISyntaxException e) {
579             return false;
580         }
581 
582         String lastLabel = null;
583         for (String token : host.split("\\.")) {
584             lastLabel = token;
585             if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
586                 return false;
587             }
588         }
589 
590         if (lastLabel == null) {
591             return false;
592         }
593 
594         if (!lastLabel.equals(host)) {
595             char ch = lastLabel.charAt(0);
596             if (ch >= '0' && ch <= '9') {
597                 return false;
598             }
599         }
600         return true;
601     }
602 
603     /**
604      * Compares this URI with the given argument {@code uri}. This method will
605      * return a negative value if this URI instance is less than the given
606      * argument and a positive value if this URI instance is greater than the
607      * given argument. The return value {@code 0} indicates that the two
608      * instances represent the same URI. To define the order the single parts of
609      * the URI are compared with each other. String components will be ordered
610      * in the natural case-sensitive way. A hierarchical URI is less than an
611      * opaque URI and if one part is {@code null} the URI with the undefined
612      * part is less than the other one.
613      *
614      * @param uri
615      *            the URI this instance has to compare with.
616      * @return the value representing the order of the two instances.
617      */
compareTo(URI uri)618     public int compareTo(URI uri) {
619         int ret;
620 
621         // compare schemes
622         if (scheme == null && uri.scheme != null) {
623             return -1;
624         } else if (scheme != null && uri.scheme == null) {
625             return 1;
626         } else if (scheme != null && uri.scheme != null) {
627             ret = scheme.compareToIgnoreCase(uri.scheme);
628             if (ret != 0) {
629                 return ret;
630             }
631         }
632 
633         // compare opacities
634         if (!opaque && uri.opaque) {
635             return -1;
636         } else if (opaque && !uri.opaque) {
637             return 1;
638         } else if (opaque && uri.opaque) {
639             ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
640             if (ret != 0) {
641                 return ret;
642             }
643         } else {
644 
645             // otherwise both must be hierarchical
646 
647             // compare authorities
648             if (authority != null && uri.authority == null) {
649                 return 1;
650             } else if (authority == null && uri.authority != null) {
651                 return -1;
652             } else if (authority != null && uri.authority != null) {
653                 if (host != null && uri.host != null) {
654                     // both are server based, so compare userInfo, host, port
655                     if (userInfo != null && uri.userInfo == null) {
656                         return 1;
657                     } else if (userInfo == null && uri.userInfo != null) {
658                         return -1;
659                     } else if (userInfo != null && uri.userInfo != null) {
660                         ret = userInfo.compareTo(uri.userInfo);
661                         if (ret != 0) {
662                             return ret;
663                         }
664                     }
665 
666                     // userInfo's are the same, compare hostname
667                     ret = host.compareToIgnoreCase(uri.host);
668                     if (ret != 0) {
669                         return ret;
670                     }
671 
672                     // compare port
673                     if (port != uri.port) {
674                         return port - uri.port;
675                     }
676                 } else { // one or both are registry based, compare the whole
677                     // authority
678                     ret = authority.compareTo(uri.authority);
679                     if (ret != 0) {
680                         return ret;
681                     }
682                 }
683             }
684 
685             // authorities are the same
686             // compare paths
687             ret = path.compareTo(uri.path);
688             if (ret != 0) {
689                 return ret;
690             }
691 
692             // compare queries
693 
694             if (query != null && uri.query == null) {
695                 return 1;
696             } else if (query == null && uri.query != null) {
697                 return -1;
698             } else if (query != null && uri.query != null) {
699                 ret = query.compareTo(uri.query);
700                 if (ret != 0) {
701                     return ret;
702                 }
703             }
704         }
705 
706         // everything else is identical, so compare fragments
707         if (fragment != null && uri.fragment == null) {
708             return 1;
709         } else if (fragment == null && uri.fragment != null) {
710             return -1;
711         } else if (fragment != null && uri.fragment != null) {
712             ret = fragment.compareTo(uri.fragment);
713             if (ret != 0) {
714                 return ret;
715             }
716         }
717 
718         // identical
719         return 0;
720     }
721 
722     /**
723      * Returns the URI formed by parsing {@code uri}. This method behaves
724      * identically to the string constructor but throws a different exception
725      * on failure. The constructor fails with a checked {@link
726      * URISyntaxException}; this method fails with an unchecked {@link
727      * IllegalArgumentException}.
728      */
create(String uri)729     public static URI create(String uri) {
730         try {
731             return new URI(uri);
732         } catch (URISyntaxException e) {
733             throw new IllegalArgumentException(e.getMessage());
734         }
735     }
736 
duplicate()737     private URI duplicate() {
738         URI clone = new URI();
739         clone.absolute = absolute;
740         clone.authority = authority;
741         clone.fragment = fragment;
742         clone.host = host;
743         clone.opaque = opaque;
744         clone.path = path;
745         clone.port = port;
746         clone.query = query;
747         clone.scheme = scheme;
748         clone.schemeSpecificPart = schemeSpecificPart;
749         clone.userInfo = userInfo;
750         clone.serverAuthority = serverAuthority;
751         return clone;
752     }
753 
754     /*
755      * Takes a string that may contain hex sequences like %F1 or %2b and
756      * converts the hex values following the '%' to lowercase
757      */
convertHexToLowerCase(String s)758     private String convertHexToLowerCase(String s) {
759         StringBuilder result = new StringBuilder("");
760         if (s.indexOf('%') == -1) {
761             return s;
762         }
763 
764         int index, prevIndex = 0;
765         while ((index = s.indexOf('%', prevIndex)) != -1) {
766             result.append(s.substring(prevIndex, index + 1));
767             result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
768             index += 3;
769             prevIndex = index;
770         }
771         return result.toString();
772     }
773 
774     /**
775      * Returns true if the given URI escaped strings {@code first} and {@code second} are
776      * equal.
777      *
778      * TODO: This method assumes that both strings are escaped using the same escape rules
779      * yet it still performs case insensitive comparison of the escaped sequences.
780      * Why is this necessary ? We can just replace it with first.equals(second)
781      * otherwise.
782      */
escapedEquals(String first, String second)783     private boolean escapedEquals(String first, String second) {
784         // This length test isn't a micro-optimization. We need it because we sometimes
785         // calculate the number of characters to match based on the length of the second
786         // string. If the second string is shorter than the first, we might attempt to match
787         // 0 chars, and regionMatches is specified to return true in that case.
788         if (first.length() != second.length()) {
789             return false;
790         }
791 
792         int prevIndex = 0;
793         while (true) {
794             int index = first.indexOf('%', prevIndex);
795             int index1 = second.indexOf('%', prevIndex);
796             if (index != index1) {
797                 return false;
798             }
799 
800             // index == index1 from this point on.
801 
802             if (index == -1) {
803                 // No more escapes, match the remainder of the string
804                 // normally.
805                return first.regionMatches(prevIndex, second, prevIndex,
806                        second.length() - prevIndex);
807             }
808 
809             if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) {
810                 return false;
811             }
812 
813             if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) {
814                 return false;
815             }
816 
817             index += 3;
818             prevIndex = index;
819         }
820     }
821 
equals(Object o)822     @Override public boolean equals(Object o) {
823         if (!(o instanceof URI)) {
824             return false;
825         }
826         URI uri = (URI) o;
827 
828         if (uri.fragment == null && fragment != null || uri.fragment != null
829                 && fragment == null) {
830             return false;
831         } else if (uri.fragment != null && fragment != null) {
832             if (!escapedEquals(uri.fragment, fragment)) {
833                 return false;
834             }
835         }
836 
837         if (uri.scheme == null && scheme != null || uri.scheme != null
838                 && scheme == null) {
839             return false;
840         } else if (uri.scheme != null && scheme != null) {
841             if (!uri.scheme.equalsIgnoreCase(scheme)) {
842                 return false;
843             }
844         }
845 
846         if (uri.opaque && opaque) {
847             return escapedEquals(uri.schemeSpecificPart,
848                     schemeSpecificPart);
849         } else if (!uri.opaque && !opaque) {
850             if (!escapedEquals(path, uri.path)) {
851                 return false;
852             }
853 
854             if (uri.query != null && query == null || uri.query == null
855                     && query != null) {
856                 return false;
857             } else if (uri.query != null && query != null) {
858                 if (!escapedEquals(uri.query, query)) {
859                     return false;
860                 }
861             }
862 
863             if (uri.authority != null && authority == null
864                     || uri.authority == null && authority != null) {
865                 return false;
866             } else if (uri.authority != null && authority != null) {
867                 if (uri.host != null && host == null || uri.host == null
868                         && host != null) {
869                     return false;
870                 } else if (uri.host == null && host == null) {
871                     // both are registry based, so compare the whole authority
872                     return escapedEquals(uri.authority, authority);
873                 } else { // uri.host != null && host != null, so server-based
874                     if (!host.equalsIgnoreCase(uri.host)) {
875                         return false;
876                     }
877 
878                     if (port != uri.port) {
879                         return false;
880                     }
881 
882                     if (uri.userInfo != null && userInfo == null
883                             || uri.userInfo == null && userInfo != null) {
884                         return false;
885                     } else if (uri.userInfo != null && userInfo != null) {
886                         return escapedEquals(userInfo, uri.userInfo);
887                     } else {
888                         return true;
889                     }
890                 }
891             } else {
892                 // no authority
893                 return true;
894             }
895 
896         } else {
897             // one is opaque, the other hierarchical
898             return false;
899         }
900     }
901 
902     /**
903      * Returns the scheme of this URI, or null if this URI has no scheme. This
904      * is also known as the protocol.
905      */
getScheme()906     public String getScheme() {
907         return scheme;
908     }
909 
910     /**
911      * Returns the decoded scheme-specific part of this URI, or null if this URI
912      * has no scheme-specific part.
913      */
getSchemeSpecificPart()914     public String getSchemeSpecificPart() {
915         return decode(schemeSpecificPart);
916     }
917 
918     /**
919      * Returns the encoded scheme-specific part of this URI, or null if this URI
920      * has no scheme-specific part.
921      */
getRawSchemeSpecificPart()922     public String getRawSchemeSpecificPart() {
923         return schemeSpecificPart;
924     }
925 
926     /**
927      * Returns the decoded authority part of this URI, or null if this URI has
928      * no authority.
929      */
getAuthority()930     public String getAuthority() {
931         return decode(authority);
932     }
933 
934     /**
935      * Returns the encoded authority of this URI, or null if this URI has no
936      * authority.
937      */
getRawAuthority()938     public String getRawAuthority() {
939         return authority;
940     }
941 
942     /**
943      * Returns the decoded user info of this URI, or null if this URI has no
944      * user info.
945      */
getUserInfo()946     public String getUserInfo() {
947         return decode(userInfo);
948     }
949 
950     /**
951      * Returns the encoded user info of this URI, or null if this URI has no
952      * user info.
953      */
getRawUserInfo()954     public String getRawUserInfo() {
955         return userInfo;
956     }
957 
958     /**
959      * Returns the host of this URI, or null if this URI has no host.
960      */
getHost()961     public String getHost() {
962         return host;
963     }
964 
965     /**
966      * Returns the port number of this URI, or {@code -1} if this URI has no
967      * explicit port.
968      */
getPort()969     public int getPort() {
970         return port;
971     }
972 
973     /** @hide */
getEffectivePort()974     public int getEffectivePort() {
975         return getEffectivePort(scheme, port);
976     }
977 
978     /**
979      * Returns the port to use for {@code scheme} connections will use when
980      * {@link #getPort} returns {@code specifiedPort}.
981      *
982      * @hide
983      */
getEffectivePort(String scheme, int specifiedPort)984     public static int getEffectivePort(String scheme, int specifiedPort) {
985         if (specifiedPort != -1) {
986             return specifiedPort;
987         }
988 
989         if ("http".equalsIgnoreCase(scheme)) {
990             return 80;
991         } else if ("https".equalsIgnoreCase(scheme)) {
992             return 443;
993         } else {
994             return -1;
995         }
996     }
997 
998     /**
999      * Returns the decoded path of this URI, or null if this URI has no path.
1000      */
getPath()1001     public String getPath() {
1002         return decode(path);
1003     }
1004 
1005     /**
1006      * Returns the encoded path of this URI, or null if this URI has no path.
1007      */
getRawPath()1008     public String getRawPath() {
1009         return path;
1010     }
1011 
1012     /**
1013      * Returns the decoded query of this URI, or null if this URI has no query.
1014      */
getQuery()1015     public String getQuery() {
1016         return decode(query);
1017     }
1018 
1019     /**
1020      * Returns the encoded query of this URI, or null if this URI has no query.
1021      */
getRawQuery()1022     public String getRawQuery() {
1023         return query;
1024     }
1025 
1026     /**
1027      * Returns the decoded fragment of this URI, or null if this URI has no
1028      * fragment.
1029      */
getFragment()1030     public String getFragment() {
1031         return decode(fragment);
1032     }
1033 
1034     /**
1035      * Gets the encoded fragment of this URI, or null if this URI has no
1036      * fragment.
1037      */
getRawFragment()1038     public String getRawFragment() {
1039         return fragment;
1040     }
1041 
hashCode()1042     @Override public int hashCode() {
1043         if (hash == -1) {
1044             hash = getHashString().hashCode();
1045         }
1046         return hash;
1047     }
1048 
1049     /**
1050      * Returns true if this URI is absolute, which means that a scheme is
1051      * defined.
1052      */
isAbsolute()1053     public boolean isAbsolute() {
1054         // TODO: simplify to 'scheme != null' ?
1055         return absolute;
1056     }
1057 
1058     /**
1059      * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1060      * scheme-specific part that does not start with a slash character. All
1061      * parts except scheme, scheme-specific and fragment are undefined.
1062      */
isOpaque()1063     public boolean isOpaque() {
1064         return opaque;
1065     }
1066 
1067     /**
1068      * Returns the normalized path.
1069      */
normalize(String path, boolean discardRelativePrefix)1070     private String normalize(String path, boolean discardRelativePrefix) {
1071         path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1072 
1073         /*
1074          * If the path contains a colon before the first colon, prepend
1075          * "./" to differentiate the path from a scheme prefix.
1076          */
1077         int colon = path.indexOf(':');
1078         if (colon != -1) {
1079             int slash = path.indexOf('/');
1080             if (slash == -1 || colon < slash) {
1081                 path = "./" + path;
1082             }
1083         }
1084 
1085         return path;
1086     }
1087 
1088     /**
1089      * Normalizes the path part of this URI.
1090      *
1091      * @return an URI object which represents this instance with a normalized
1092      *         path.
1093      */
normalize()1094     public URI normalize() {
1095         if (opaque) {
1096             return this;
1097         }
1098         String normalizedPath = normalize(path, false);
1099         // if the path is already normalized, return this
1100         if (path.equals(normalizedPath)) {
1101             return this;
1102         }
1103         // get an exact copy of the URI re-calculate the scheme specific part
1104         // since the path of the normalized URI is different from this URI.
1105         URI result = duplicate();
1106         result.path = normalizedPath;
1107         result.setSchemeSpecificPart();
1108         return result;
1109     }
1110 
1111     /**
1112      * Tries to parse the authority component of this URI to divide it into the
1113      * host, port, and user-info. If this URI is already determined as a
1114      * ServerAuthority this instance will be returned without changes.
1115      *
1116      * @return this instance with the components of the parsed server authority.
1117      * @throws URISyntaxException
1118      *             if the authority part could not be parsed as a server-based
1119      *             authority.
1120      */
parseServerAuthority()1121     public URI parseServerAuthority() throws URISyntaxException {
1122         if (!serverAuthority) {
1123             parseAuthority(true);
1124         }
1125         return this;
1126     }
1127 
1128     /**
1129      * Makes the given URI {@code relative} to a relative URI against the URI
1130      * represented by this instance.
1131      *
1132      * @param relative
1133      *            the URI which has to be relativized against this URI.
1134      * @return the relative URI.
1135      */
relativize(URI relative)1136     public URI relativize(URI relative) {
1137         if (relative.opaque || opaque) {
1138             return relative;
1139         }
1140 
1141         if (scheme == null ? relative.scheme != null : !scheme
1142                 .equals(relative.scheme)) {
1143             return relative;
1144         }
1145 
1146         if (authority == null ? relative.authority != null : !authority
1147                 .equals(relative.authority)) {
1148             return relative;
1149         }
1150 
1151         // normalize both paths
1152         String thisPath = normalize(path, false);
1153         String relativePath = normalize(relative.path, false);
1154 
1155         /*
1156          * if the paths aren't equal, then we need to determine if this URI's
1157          * path is a parent path (begins with) the relative URI's path
1158          */
1159         if (!thisPath.equals(relativePath)) {
1160             // drop everything after the last slash in this path
1161             thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1162 
1163             /*
1164              * if the relative URI's path doesn't start with this URI's path,
1165              * then just return the relative URI; the URIs have nothing in
1166              * common
1167              */
1168             if (!relativePath.startsWith(thisPath)) {
1169                 return relative;
1170             }
1171         }
1172 
1173         URI result = new URI();
1174         result.fragment = relative.fragment;
1175         result.query = relative.query;
1176         // the result URI is the remainder of the relative URI's path
1177         result.path = relativePath.substring(thisPath.length());
1178         result.setSchemeSpecificPart();
1179         return result;
1180     }
1181 
1182     /**
1183      * Resolves the given URI {@code relative} against the URI represented by
1184      * this instance.
1185      *
1186      * @param relative
1187      *            the URI which has to be resolved against this URI.
1188      * @return the resolved URI.
1189      */
resolve(URI relative)1190     public URI resolve(URI relative) {
1191         if (relative.absolute || opaque) {
1192             return relative;
1193         }
1194 
1195         if (relative.authority != null) {
1196             // If the relative URI has an authority, the result is the relative
1197             // with this URI's scheme.
1198             URI result = relative.duplicate();
1199             result.scheme = scheme;
1200             result.absolute = absolute;
1201             return result;
1202         }
1203 
1204         if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1205             // if the relative URI only consists of at most a fragment,
1206             URI result = duplicate();
1207             result.fragment = relative.fragment;
1208             return result;
1209         }
1210 
1211         URI result = duplicate();
1212         result.fragment = relative.fragment;
1213         result.query = relative.query;
1214         String resolvedPath;
1215         if (relative.path.startsWith("/")) {
1216             // The relative URI has an absolute path; use it.
1217             resolvedPath = relative.path;
1218         } else if (relative.path.isEmpty()) {
1219             // The relative URI has no path; use the base path.
1220             resolvedPath = path;
1221         } else {
1222             // The relative URI has a relative path; combine the paths.
1223             int endIndex = path.lastIndexOf('/') + 1;
1224             resolvedPath = path.substring(0, endIndex) + relative.path;
1225         }
1226         result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1227         result.setSchemeSpecificPart();
1228         return result;
1229     }
1230 
1231     /**
1232      * Helper method used to re-calculate the scheme specific part of the
1233      * resolved or normalized URIs
1234      */
setSchemeSpecificPart()1235     private void setSchemeSpecificPart() {
1236         // ssp = [//authority][path][?query]
1237         StringBuilder ssp = new StringBuilder();
1238         if (authority != null) {
1239             ssp.append("//" + authority);
1240         }
1241         if (path != null) {
1242             ssp.append(path);
1243         }
1244         if (query != null) {
1245             ssp.append("?" + query);
1246         }
1247         schemeSpecificPart = ssp.toString();
1248         // reset string, so that it can be re-calculated correctly when asked.
1249         string = null;
1250     }
1251 
1252     /**
1253      * Creates a new URI instance by parsing the given string {@code relative}
1254      * and resolves the created URI against the URI represented by this
1255      * instance.
1256      *
1257      * @param relative
1258      *            the given string to create the new URI instance which has to
1259      *            be resolved later on.
1260      * @return the created and resolved URI.
1261      */
resolve(String relative)1262     public URI resolve(String relative) {
1263         return resolve(create(relative));
1264     }
1265 
decode(String s)1266     private String decode(String s) {
1267         return s != null ? UriCodec.decode(s) : null;
1268     }
1269 
1270     /**
1271      * Returns the textual string representation of this URI instance using the
1272      * US-ASCII encoding.
1273      *
1274      * @return the US-ASCII string representation of this URI.
1275      */
toASCIIString()1276     public String toASCIIString() {
1277         StringBuilder result = new StringBuilder();
1278         ASCII_ONLY.appendEncoded(result, toString());
1279         return result.toString();
1280     }
1281 
1282     /**
1283      * Returns the encoded URI.
1284      */
toString()1285     @Override public String toString() {
1286         if (string != null) {
1287             return string;
1288         }
1289 
1290         StringBuilder result = new StringBuilder();
1291         if (scheme != null) {
1292             result.append(scheme);
1293             result.append(':');
1294         }
1295         if (opaque) {
1296             result.append(schemeSpecificPart);
1297         } else {
1298             if (authority != null) {
1299                 result.append("//");
1300                 result.append(authority);
1301             }
1302 
1303             if (path != null) {
1304                 result.append(path);
1305             }
1306 
1307             if (query != null) {
1308                 result.append('?');
1309                 result.append(query);
1310             }
1311         }
1312 
1313         if (fragment != null) {
1314             result.append('#');
1315             result.append(fragment);
1316         }
1317 
1318         string = result.toString();
1319         return string;
1320     }
1321 
1322     /*
1323      * Form a string from the components of this URI, similarly to the
1324      * toString() method. But this method converts scheme and host to lowercase,
1325      * and converts escaped octets to lowercase.
1326      */
getHashString()1327     private String getHashString() {
1328         StringBuilder result = new StringBuilder();
1329         if (scheme != null) {
1330             result.append(scheme.toLowerCase(Locale.US));
1331             result.append(':');
1332         }
1333         if (opaque) {
1334             result.append(schemeSpecificPart);
1335         } else {
1336             if (authority != null) {
1337                 result.append("//");
1338                 if (host == null) {
1339                     result.append(authority);
1340                 } else {
1341                     if (userInfo != null) {
1342                         result.append(userInfo + "@");
1343                     }
1344                     result.append(host.toLowerCase(Locale.US));
1345                     if (port != -1) {
1346                         result.append(":" + port);
1347                     }
1348                 }
1349             }
1350 
1351             if (path != null) {
1352                 result.append(path);
1353             }
1354 
1355             if (query != null) {
1356                 result.append('?');
1357                 result.append(query);
1358             }
1359         }
1360 
1361         if (fragment != null) {
1362             result.append('#');
1363             result.append(fragment);
1364         }
1365 
1366         return convertHexToLowerCase(result.toString());
1367     }
1368 
1369     /**
1370      * Converts this URI instance to a URL.
1371      *
1372      * @return the created URL representing the same resource as this URI.
1373      * @throws MalformedURLException
1374      *             if an error occurs while creating the URL or no protocol
1375      *             handler could be found.
1376      */
toURL()1377     public URL toURL() throws MalformedURLException {
1378         if (!absolute) {
1379             throw new IllegalArgumentException("URI is not absolute: " + toString());
1380         }
1381         return new URL(toString());
1382     }
1383 
readObject(ObjectInputStream in)1384     private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1385         in.defaultReadObject();
1386         try {
1387             parseURI(string, false);
1388         } catch (URISyntaxException e) {
1389             throw new IOException(e.toString());
1390         }
1391     }
1392 
writeObject(ObjectOutputStream out)1393     private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1394         // call toString() to ensure the value of string field is calculated
1395         toString();
1396         out.defaultWriteObject();
1397     }
1398 }
1399