1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.net; 19 20 import java.io.IOException; 21 import java.io.ObjectInputStream; 22 import java.io.ObjectOutputStream; 23 import java.io.Serializable; 24 import java.util.Locale; 25 import libcore.net.UriCodec; 26 import libcore.net.url.UrlUtils; 27 28 /** 29 * A Uniform Resource Identifier that identifies an abstract or physical 30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 31 * 2396</a>. 32 * 33 * <h3>Parts of a URI</h3> 34 * A URI is composed of many parts. This class can both parse URI strings into 35 * parts and compose URI strings from parts. For example, consider the parts of 36 * this URI: 37 * {@code http://username:password@host:8080/directory/file?query#fragment} 38 * <table> 39 * <tr><th>Component </th><th>Example value </th><th>Also known as</th></tr> 40 * <tr><td>{@link #getScheme() Scheme} </td><td>{@code http} </td><td>protocol</td></tr> 41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr> 42 * <tr><td>{@link #getAuthority() Authority} </td><td>{@code username:password@host:8080} </td><td></td></tr> 43 * <tr><td>{@link #getUserInfo() User Info} </td><td>{@code username:password} </td><td></td></tr> 44 * <tr><td>{@link #getHost() Host} </td><td>{@code host} </td><td></td></tr> 45 * <tr><td>{@link #getPort() Port} </td><td>{@code 8080} </td><td></td></tr> 46 * <tr><td>{@link #getPath() Path} </td><td>{@code /directory/file} </td><td></td></tr> 47 * <tr><td>{@link #getQuery() Query} </td><td>{@code query} </td><td></td></tr> 48 * <tr><td>{@link #getFragment() Fragment} </td><td>{@code fragment} </td><td>ref</td></tr> 49 * </table> 50 * 51 * <h3>Absolute vs. Relative URIs</h3> 52 * URIs are either {@link #isAbsolute() absolute or relative}. 53 * <ul> 54 * <li><strong>Absolute:</strong> {@code http://android.com/robots.txt} 55 * <li><strong>Relative:</strong> {@code robots.txt} 56 * </ul> 57 * 58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link 59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL. 60 * 61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you 62 * have the absolute URI that a relative URI is relative to, you can use {@link 63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use 64 * {@link #relativize} to compute the relative URI from one URI to another. 65 * <pre> {@code 66 * URI absolute = new URI("http://android.com/"); 67 * URI relative = new URI("robots.txt"); 68 * URI resolved = new URI("http://android.com/robots.txt"); 69 * 70 * // print "http://android.com/robots.txt" 71 * System.out.println(absolute.resolve(relative)); 72 * 73 * // print "robots.txt" 74 * System.out.println(absolute.relativize(resolved)); 75 * }</pre> 76 * 77 * <h3>Opaque vs. Hierarchical URIs</h3> 78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative 79 * URIs are always hierarchical. 80 * <ul> 81 * <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt} 82 * <li><strong>Opaque:</strong> {@code mailto:robots@example.com} 83 * </ul> 84 * 85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not 86 * begin with the slash character: {@code /}. The contents of the 87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never 88 * has an authority, user info, host, port, path or query. An opaque URIs may 89 * have a fragment, however. A typical opaque URI is 90 * {@code mailto:robots@example.com}. 91 * <table> 92 * <tr><th>Component </th><th>Example value </th></tr> 93 * <tr><td>Scheme </td><td>{@code mailto} </td></tr> 94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr> 95 * <tr><td>Fragment </td><td> </td></tr> 96 * </table> 97 * <p>Hierarchical URIs may have values for any URL component. They always 98 * have a non-null path, though that path may be the empty string. 99 * 100 * <h3>Encoding and Decoding URI Components</h3> 101 * Each component of a URI permits a limited set of legal characters. Other 102 * characters must first be <i>encoded</i> before they can be embedded in a URI. 103 * To recover the original characters from a URI, they may be <i>decoded</i>. 104 * <strong>Contrary to what you might expect,</strong> this class uses the 105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors 106 * return decoded strings. For example, consider how this URI is decoded: 107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22} 108 * <table> 109 * <tr><th>Component </th><th>Legal Characters </th><th>Other Constraints </th><th>Raw Value </th><th>Value</th></tr> 110 * <tr><td>Scheme </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.} </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td> </td><td>{@code http}</td></tr> 111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr> 112 * <tr><td>Authority </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd@host:80} </td><td>{@code user:pa55w?rd@host:80}</td></tr> 113 * <tr><td>User Info </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd} </td><td>{@code user:pa55w?rd}</td></tr> 114 * <tr><td>Host </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]} </td><td>Domain name, IPv4 address or [IPv6 address] </td><td> </td><td>host</td></tr> 115 * <tr><td>Port </td><td>{@code 0-9} </td><td> </td><td> </td><td>{@code 80}</td></tr> 116 * <tr><td>Path </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@} </td><td>Non-ASCII characters okay </td><td>{@code /doc%7Csearch} </td><td>{@code /doc|search}</td></tr> 117 * <tr><td>Query </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code q=green%20robots} </td><td>{@code q=green robots}</td></tr> 118 * <tr><td>Fragment </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code over%206%22} </td><td>{@code over 6"}</td></tr> 119 * </table> 120 * A URI's host, port and scheme are not eligible for encoding and must not 121 * contain illegal characters. 122 * 123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this 124 * class. These constructors accept your original strings and encode them into 125 * their raw form. 126 * 127 * <p>To decode a URI, invoke the single-string constructor, and then use the 128 * appropriate accessor methods to get the decoded components. 129 * 130 * <p>The {@link URL} class can be used to retrieve resources by their URI. 131 */ 132 public final class URI implements Comparable<URI>, Serializable { 133 134 private static final long serialVersionUID = -6052424284110960213l; 135 136 static final String UNRESERVED = "_-!.~\'()*"; 137 static final String PUNCTUATION = ",;:$&+="; 138 139 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 140 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 141 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 142 143 /** for java.net.URL, which foolishly combines these two parts */ 144 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 145 146 /** for query, fragment, and scheme-specific part */ 147 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 148 149 /** Retains all ASCII chars including delimiters. */ 150 private static final UriCodec ASCII_ONLY = new UriCodec() { 151 @Override protected boolean isRetained(char c) { 152 return c <= 127; 153 } 154 }; 155 156 /** 157 * Encodes the unescaped characters of {@code s} that are not permitted. 158 * Permitted characters are: 159 * <ul> 160 * <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. 161 * <li>{@code extraOkayChars}, 162 * <li>non-ASCII, non-control, non-whitespace characters 163 * </ul> 164 */ 165 private static class PartEncoder extends UriCodec { 166 private final String extraLegalCharacters; 167 PartEncoder(String extraLegalCharacters)168 PartEncoder(String extraLegalCharacters) { 169 this.extraLegalCharacters = extraLegalCharacters; 170 } 171 isRetained(char c)172 @Override protected boolean isRetained(char c) { 173 return UNRESERVED.indexOf(c) != -1 174 || PUNCTUATION.indexOf(c) != -1 175 || extraLegalCharacters.indexOf(c) != -1 176 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 177 } 178 } 179 180 private String string; 181 private transient String scheme; 182 private transient String schemeSpecificPart; 183 private transient String authority; 184 private transient String userInfo; 185 private transient String host; 186 private transient int port = -1; 187 private transient String path; 188 private transient String query; 189 private transient String fragment; 190 private transient boolean opaque; 191 private transient boolean absolute; 192 private transient boolean serverAuthority = false; 193 194 private transient int hash = -1; 195 URI()196 private URI() {} 197 198 /** 199 * Creates a new URI instance by parsing {@code spec}. 200 * 201 * @param spec a URI whose illegal characters have all been encoded. 202 */ URI(String spec)203 public URI(String spec) throws URISyntaxException { 204 parseURI(spec, false); 205 } 206 207 /** 208 * Creates a new URI instance of the given unencoded component parts. 209 * 210 * @param scheme the URI scheme, or null for a non-absolute URI. 211 */ URI(String scheme, String schemeSpecificPart, String fragment)212 public URI(String scheme, String schemeSpecificPart, String fragment) 213 throws URISyntaxException { 214 StringBuilder uri = new StringBuilder(); 215 if (scheme != null) { 216 uri.append(scheme); 217 uri.append(':'); 218 } 219 if (schemeSpecificPart != null) { 220 ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart); 221 } 222 if (fragment != null) { 223 uri.append('#'); 224 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 225 } 226 227 parseURI(uri.toString(), false); 228 } 229 230 /** 231 * Creates a new URI instance of the given unencoded component parts. 232 * 233 * @param scheme the URI scheme, or null for a non-absolute URI. 234 */ URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235 public URI(String scheme, String userInfo, String host, int port, String path, String query, 236 String fragment) throws URISyntaxException { 237 if (scheme == null && userInfo == null && host == null && path == null 238 && query == null && fragment == null) { 239 this.path = ""; 240 return; 241 } 242 243 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 244 throw new URISyntaxException(path, "Relative path"); 245 } 246 247 StringBuilder uri = new StringBuilder(); 248 if (scheme != null) { 249 uri.append(scheme); 250 uri.append(':'); 251 } 252 253 if (userInfo != null || host != null || port != -1) { 254 uri.append("//"); 255 } 256 257 if (userInfo != null) { 258 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 259 uri.append('@'); 260 } 261 262 if (host != null) { 263 // check for IPv6 addresses that hasn't been enclosed in square brackets 264 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) { 265 host = "[" + host + "]"; 266 } 267 uri.append(host); 268 } 269 270 if (port != -1) { 271 uri.append(':'); 272 uri.append(port); 273 } 274 275 if (path != null) { 276 PATH_ENCODER.appendEncoded(uri, path); 277 } 278 279 if (query != null) { 280 uri.append('?'); 281 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 282 } 283 284 if (fragment != null) { 285 uri.append('#'); 286 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 287 } 288 289 parseURI(uri.toString(), true); 290 } 291 292 /** 293 * Creates a new URI instance of the given unencoded component parts. 294 * 295 * @param scheme the URI scheme, or null for a non-absolute URI. 296 */ URI(String scheme, String host, String path, String fragment)297 public URI(String scheme, String host, String path, String fragment) throws URISyntaxException { 298 this(scheme, null, host, -1, path, null, fragment); 299 } 300 301 /** 302 * Creates a new URI instance of the given unencoded component parts. 303 * 304 * @param scheme the URI scheme, or null for a non-absolute URI. 305 */ URI(String scheme, String authority, String path, String query, String fragment)306 public URI(String scheme, String authority, String path, String query, 307 String fragment) throws URISyntaxException { 308 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 309 throw new URISyntaxException(path, "Relative path"); 310 } 311 312 StringBuilder uri = new StringBuilder(); 313 if (scheme != null) { 314 uri.append(scheme); 315 uri.append(':'); 316 } 317 if (authority != null) { 318 uri.append("//"); 319 AUTHORITY_ENCODER.appendEncoded(uri, authority); 320 } 321 322 if (path != null) { 323 PATH_ENCODER.appendEncoded(uri, path); 324 } 325 if (query != null) { 326 uri.append('?'); 327 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 328 } 329 if (fragment != null) { 330 uri.append('#'); 331 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 332 } 333 334 parseURI(uri.toString(), false); 335 } 336 337 /** 338 * Breaks uri into its component parts. This first splits URI into scheme, 339 * scheme-specific part and fragment: 340 * [scheme:][scheme-specific part][#fragment] 341 * 342 * Then it breaks the scheme-specific part into authority, path and query: 343 * [//authority][path][?query] 344 * 345 * Finally it delegates to parseAuthority to break the authority into user 346 * info, host and port: 347 * [user-info@][host][:port] 348 */ parseURI(String uri, boolean forceServer)349 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 350 string = uri; 351 352 // "#fragment" 353 int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); 354 if (fragmentStart < uri.length()) { 355 fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); 356 } 357 358 // scheme: 359 int start; 360 int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); 361 if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { 362 absolute = true; 363 scheme = validateScheme(uri, colon); 364 start = colon + 1; 365 366 if (start == fragmentStart) { 367 throw new URISyntaxException(uri, "Scheme-specific part expected", start); 368 } 369 370 // URIs with schemes followed by a non-/ char are opaque and need no further parsing. 371 if (!uri.regionMatches(start, "/", 0, 1)) { 372 opaque = true; 373 schemeSpecificPart = ALL_LEGAL_ENCODER.validate( 374 uri, start, fragmentStart, "scheme specific part"); 375 return; 376 } 377 } else { 378 absolute = false; 379 start = 0; 380 } 381 382 opaque = false; 383 schemeSpecificPart = uri.substring(start, fragmentStart); 384 385 // "//authority" 386 int fileStart; 387 if (uri.regionMatches(start, "//", 0, 2)) { 388 int authorityStart = start + 2; 389 fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); 390 if (authorityStart == uri.length()) { 391 throw new URISyntaxException(uri, "Authority expected", uri.length()); 392 } 393 if (authorityStart < fileStart) { 394 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); 395 } 396 } else { 397 fileStart = start; 398 } 399 400 // "path" 401 int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); 402 path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); 403 404 // "?query" 405 if (queryStart < fragmentStart) { 406 query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); 407 } 408 409 parseAuthority(forceServer); 410 } 411 validateScheme(String uri, int end)412 private String validateScheme(String uri, int end) throws URISyntaxException { 413 if (end == 0) { 414 throw new URISyntaxException(uri, "Scheme expected", 0); 415 } 416 417 for (int i = 0; i < end; i++) { 418 if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { 419 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 420 } 421 } 422 423 return uri.substring(0, end); 424 } 425 426 /** 427 * Breaks this URI's authority into user info, host and port parts. 428 * [user-info@][host][:port] 429 * If any part of this fails this method will give up and potentially leave 430 * these fields with their default values. 431 * 432 * @param forceServer true to always throw if the authority cannot be 433 * parsed. If false, this method may still throw for some kinds of 434 * errors; this unpredictable behavior is consistent with the RI. 435 */ parseAuthority(boolean forceServer)436 private void parseAuthority(boolean forceServer) throws URISyntaxException { 437 if (authority == null) { 438 return; 439 } 440 441 String tempUserInfo = null; 442 String temp = authority; 443 int index = temp.indexOf('@'); 444 int hostIndex = 0; 445 if (index != -1) { 446 // remove user info 447 tempUserInfo = temp.substring(0, index); 448 validateUserInfo(authority, tempUserInfo, 0); 449 temp = temp.substring(index + 1); // host[:port] is left 450 hostIndex = index + 1; 451 } 452 453 index = temp.lastIndexOf(':'); 454 int endIndex = temp.indexOf(']'); 455 456 String tempHost; 457 int tempPort = -1; 458 if (index != -1 && endIndex < index) { 459 // determine port and host 460 tempHost = temp.substring(0, index); 461 462 if (index < (temp.length() - 1)) { // port part is not empty 463 try { 464 char firstPortChar = temp.charAt(index + 1); 465 if (firstPortChar >= '0' && firstPortChar <= '9') { 466 // allow only digits, no signs 467 tempPort = Integer.parseInt(temp.substring(index + 1)); 468 } else { 469 if (forceServer) { 470 throw new URISyntaxException(authority, 471 "Invalid port number", hostIndex + index + 1); 472 } 473 return; 474 } 475 } catch (NumberFormatException e) { 476 if (forceServer) { 477 throw new URISyntaxException(authority, 478 "Invalid port number", hostIndex + index + 1); 479 } 480 return; 481 } 482 } 483 } else { 484 tempHost = temp; 485 } 486 487 if (tempHost.isEmpty()) { 488 if (forceServer) { 489 throw new URISyntaxException(authority, "Expected host", hostIndex); 490 } 491 return; 492 } 493 494 if (!isValidHost(forceServer, tempHost)) { 495 return; 496 } 497 498 // this is a server based uri, 499 // fill in the userInfo, host and port fields 500 userInfo = tempUserInfo; 501 host = tempHost; 502 port = tempPort; 503 serverAuthority = true; 504 } 505 validateUserInfo(String uri, String userInfo, int index)506 private void validateUserInfo(String uri, String userInfo, int index) 507 throws URISyntaxException { 508 for (int i = 0; i < userInfo.length(); i++) { 509 char ch = userInfo.charAt(i); 510 if (ch == ']' || ch == '[') { 511 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 512 } 513 } 514 } 515 516 /** 517 * Returns true if {@code host} is a well-formed host name or IP address. 518 * 519 * @param forceServer true to always throw if the host cannot be parsed. If 520 * false, this method may still throw for some kinds of errors; this 521 * unpredictable behavior is consistent with the RI. 522 */ isValidHost(boolean forceServer, String host)523 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 524 if (host.startsWith("[")) { 525 // IPv6 address 526 if (!host.endsWith("]")) { 527 throw new URISyntaxException(host, 528 "Expected a closing square bracket for IPv6 address", 0); 529 } 530 if (InetAddress.isNumeric(host)) { 531 // If it's numeric, the presence of square brackets guarantees 532 // that it's a numeric IPv6 address. 533 return true; 534 } 535 throw new URISyntaxException(host, "Malformed IPv6 address"); 536 } 537 538 // '[' and ']' can only be the first char and last char 539 // of the host name 540 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 541 throw new URISyntaxException(host, "Illegal character in host name", 0); 542 } 543 544 int index = host.lastIndexOf('.'); 545 if (index < 0 || index == host.length() - 1 546 || !Character.isDigit(host.charAt(index + 1))) { 547 // domain name 548 if (isValidDomainName(host)) { 549 return true; 550 } 551 if (forceServer) { 552 throw new URISyntaxException(host, "Illegal character in host name", 0); 553 } 554 return false; 555 } 556 557 // IPv4 address? 558 try { 559 InetAddress ia = InetAddress.parseNumericAddress(host); 560 if (ia instanceof Inet4Address) { 561 return true; 562 } 563 } catch (IllegalArgumentException ignored) { 564 } 565 566 if (forceServer) { 567 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 568 } 569 return false; 570 } 571 isValidDomainName(String host)572 private boolean isValidDomainName(String host) { 573 try { 574 // The RFCs don't permit underscores in hostnames, but URI has to because 575 // a certain large website doesn't seem to care about standards and specs. 576 // See bugs 18023709, 17579865 and 18016625. 577 UriCodec.validateSimple(host, "_-."); 578 } catch (URISyntaxException e) { 579 return false; 580 } 581 582 String lastLabel = null; 583 for (String token : host.split("\\.")) { 584 lastLabel = token; 585 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 586 return false; 587 } 588 } 589 590 if (lastLabel == null) { 591 return false; 592 } 593 594 if (!lastLabel.equals(host)) { 595 char ch = lastLabel.charAt(0); 596 if (ch >= '0' && ch <= '9') { 597 return false; 598 } 599 } 600 return true; 601 } 602 603 /** 604 * Compares this URI with the given argument {@code uri}. This method will 605 * return a negative value if this URI instance is less than the given 606 * argument and a positive value if this URI instance is greater than the 607 * given argument. The return value {@code 0} indicates that the two 608 * instances represent the same URI. To define the order the single parts of 609 * the URI are compared with each other. String components will be ordered 610 * in the natural case-sensitive way. A hierarchical URI is less than an 611 * opaque URI and if one part is {@code null} the URI with the undefined 612 * part is less than the other one. 613 * 614 * @param uri 615 * the URI this instance has to compare with. 616 * @return the value representing the order of the two instances. 617 */ compareTo(URI uri)618 public int compareTo(URI uri) { 619 int ret; 620 621 // compare schemes 622 if (scheme == null && uri.scheme != null) { 623 return -1; 624 } else if (scheme != null && uri.scheme == null) { 625 return 1; 626 } else if (scheme != null && uri.scheme != null) { 627 ret = scheme.compareToIgnoreCase(uri.scheme); 628 if (ret != 0) { 629 return ret; 630 } 631 } 632 633 // compare opacities 634 if (!opaque && uri.opaque) { 635 return -1; 636 } else if (opaque && !uri.opaque) { 637 return 1; 638 } else if (opaque && uri.opaque) { 639 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 640 if (ret != 0) { 641 return ret; 642 } 643 } else { 644 645 // otherwise both must be hierarchical 646 647 // compare authorities 648 if (authority != null && uri.authority == null) { 649 return 1; 650 } else if (authority == null && uri.authority != null) { 651 return -1; 652 } else if (authority != null && uri.authority != null) { 653 if (host != null && uri.host != null) { 654 // both are server based, so compare userInfo, host, port 655 if (userInfo != null && uri.userInfo == null) { 656 return 1; 657 } else if (userInfo == null && uri.userInfo != null) { 658 return -1; 659 } else if (userInfo != null && uri.userInfo != null) { 660 ret = userInfo.compareTo(uri.userInfo); 661 if (ret != 0) { 662 return ret; 663 } 664 } 665 666 // userInfo's are the same, compare hostname 667 ret = host.compareToIgnoreCase(uri.host); 668 if (ret != 0) { 669 return ret; 670 } 671 672 // compare port 673 if (port != uri.port) { 674 return port - uri.port; 675 } 676 } else { // one or both are registry based, compare the whole 677 // authority 678 ret = authority.compareTo(uri.authority); 679 if (ret != 0) { 680 return ret; 681 } 682 } 683 } 684 685 // authorities are the same 686 // compare paths 687 ret = path.compareTo(uri.path); 688 if (ret != 0) { 689 return ret; 690 } 691 692 // compare queries 693 694 if (query != null && uri.query == null) { 695 return 1; 696 } else if (query == null && uri.query != null) { 697 return -1; 698 } else if (query != null && uri.query != null) { 699 ret = query.compareTo(uri.query); 700 if (ret != 0) { 701 return ret; 702 } 703 } 704 } 705 706 // everything else is identical, so compare fragments 707 if (fragment != null && uri.fragment == null) { 708 return 1; 709 } else if (fragment == null && uri.fragment != null) { 710 return -1; 711 } else if (fragment != null && uri.fragment != null) { 712 ret = fragment.compareTo(uri.fragment); 713 if (ret != 0) { 714 return ret; 715 } 716 } 717 718 // identical 719 return 0; 720 } 721 722 /** 723 * Returns the URI formed by parsing {@code uri}. This method behaves 724 * identically to the string constructor but throws a different exception 725 * on failure. The constructor fails with a checked {@link 726 * URISyntaxException}; this method fails with an unchecked {@link 727 * IllegalArgumentException}. 728 */ create(String uri)729 public static URI create(String uri) { 730 try { 731 return new URI(uri); 732 } catch (URISyntaxException e) { 733 throw new IllegalArgumentException(e.getMessage()); 734 } 735 } 736 duplicate()737 private URI duplicate() { 738 URI clone = new URI(); 739 clone.absolute = absolute; 740 clone.authority = authority; 741 clone.fragment = fragment; 742 clone.host = host; 743 clone.opaque = opaque; 744 clone.path = path; 745 clone.port = port; 746 clone.query = query; 747 clone.scheme = scheme; 748 clone.schemeSpecificPart = schemeSpecificPart; 749 clone.userInfo = userInfo; 750 clone.serverAuthority = serverAuthority; 751 return clone; 752 } 753 754 /* 755 * Takes a string that may contain hex sequences like %F1 or %2b and 756 * converts the hex values following the '%' to lowercase 757 */ convertHexToLowerCase(String s)758 private String convertHexToLowerCase(String s) { 759 StringBuilder result = new StringBuilder(""); 760 if (s.indexOf('%') == -1) { 761 return s; 762 } 763 764 int index, prevIndex = 0; 765 while ((index = s.indexOf('%', prevIndex)) != -1) { 766 result.append(s.substring(prevIndex, index + 1)); 767 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 768 index += 3; 769 prevIndex = index; 770 } 771 return result.toString(); 772 } 773 774 /** 775 * Returns true if the given URI escaped strings {@code first} and {@code second} are 776 * equal. 777 * 778 * TODO: This method assumes that both strings are escaped using the same escape rules 779 * yet it still performs case insensitive comparison of the escaped sequences. 780 * Why is this necessary ? We can just replace it with first.equals(second) 781 * otherwise. 782 */ escapedEquals(String first, String second)783 private boolean escapedEquals(String first, String second) { 784 // This length test isn't a micro-optimization. We need it because we sometimes 785 // calculate the number of characters to match based on the length of the second 786 // string. If the second string is shorter than the first, we might attempt to match 787 // 0 chars, and regionMatches is specified to return true in that case. 788 if (first.length() != second.length()) { 789 return false; 790 } 791 792 int prevIndex = 0; 793 while (true) { 794 int index = first.indexOf('%', prevIndex); 795 int index1 = second.indexOf('%', prevIndex); 796 if (index != index1) { 797 return false; 798 } 799 800 // index == index1 from this point on. 801 802 if (index == -1) { 803 // No more escapes, match the remainder of the string 804 // normally. 805 return first.regionMatches(prevIndex, second, prevIndex, 806 second.length() - prevIndex); 807 } 808 809 if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) { 810 return false; 811 } 812 813 if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) { 814 return false; 815 } 816 817 index += 3; 818 prevIndex = index; 819 } 820 } 821 equals(Object o)822 @Override public boolean equals(Object o) { 823 if (!(o instanceof URI)) { 824 return false; 825 } 826 URI uri = (URI) o; 827 828 if (uri.fragment == null && fragment != null || uri.fragment != null 829 && fragment == null) { 830 return false; 831 } else if (uri.fragment != null && fragment != null) { 832 if (!escapedEquals(uri.fragment, fragment)) { 833 return false; 834 } 835 } 836 837 if (uri.scheme == null && scheme != null || uri.scheme != null 838 && scheme == null) { 839 return false; 840 } else if (uri.scheme != null && scheme != null) { 841 if (!uri.scheme.equalsIgnoreCase(scheme)) { 842 return false; 843 } 844 } 845 846 if (uri.opaque && opaque) { 847 return escapedEquals(uri.schemeSpecificPart, 848 schemeSpecificPart); 849 } else if (!uri.opaque && !opaque) { 850 if (!escapedEquals(path, uri.path)) { 851 return false; 852 } 853 854 if (uri.query != null && query == null || uri.query == null 855 && query != null) { 856 return false; 857 } else if (uri.query != null && query != null) { 858 if (!escapedEquals(uri.query, query)) { 859 return false; 860 } 861 } 862 863 if (uri.authority != null && authority == null 864 || uri.authority == null && authority != null) { 865 return false; 866 } else if (uri.authority != null && authority != null) { 867 if (uri.host != null && host == null || uri.host == null 868 && host != null) { 869 return false; 870 } else if (uri.host == null && host == null) { 871 // both are registry based, so compare the whole authority 872 return escapedEquals(uri.authority, authority); 873 } else { // uri.host != null && host != null, so server-based 874 if (!host.equalsIgnoreCase(uri.host)) { 875 return false; 876 } 877 878 if (port != uri.port) { 879 return false; 880 } 881 882 if (uri.userInfo != null && userInfo == null 883 || uri.userInfo == null && userInfo != null) { 884 return false; 885 } else if (uri.userInfo != null && userInfo != null) { 886 return escapedEquals(userInfo, uri.userInfo); 887 } else { 888 return true; 889 } 890 } 891 } else { 892 // no authority 893 return true; 894 } 895 896 } else { 897 // one is opaque, the other hierarchical 898 return false; 899 } 900 } 901 902 /** 903 * Returns the scheme of this URI, or null if this URI has no scheme. This 904 * is also known as the protocol. 905 */ getScheme()906 public String getScheme() { 907 return scheme; 908 } 909 910 /** 911 * Returns the decoded scheme-specific part of this URI, or null if this URI 912 * has no scheme-specific part. 913 */ getSchemeSpecificPart()914 public String getSchemeSpecificPart() { 915 return decode(schemeSpecificPart); 916 } 917 918 /** 919 * Returns the encoded scheme-specific part of this URI, or null if this URI 920 * has no scheme-specific part. 921 */ getRawSchemeSpecificPart()922 public String getRawSchemeSpecificPart() { 923 return schemeSpecificPart; 924 } 925 926 /** 927 * Returns the decoded authority part of this URI, or null if this URI has 928 * no authority. 929 */ getAuthority()930 public String getAuthority() { 931 return decode(authority); 932 } 933 934 /** 935 * Returns the encoded authority of this URI, or null if this URI has no 936 * authority. 937 */ getRawAuthority()938 public String getRawAuthority() { 939 return authority; 940 } 941 942 /** 943 * Returns the decoded user info of this URI, or null if this URI has no 944 * user info. 945 */ getUserInfo()946 public String getUserInfo() { 947 return decode(userInfo); 948 } 949 950 /** 951 * Returns the encoded user info of this URI, or null if this URI has no 952 * user info. 953 */ getRawUserInfo()954 public String getRawUserInfo() { 955 return userInfo; 956 } 957 958 /** 959 * Returns the host of this URI, or null if this URI has no host. 960 */ getHost()961 public String getHost() { 962 return host; 963 } 964 965 /** 966 * Returns the port number of this URI, or {@code -1} if this URI has no 967 * explicit port. 968 */ getPort()969 public int getPort() { 970 return port; 971 } 972 973 /** @hide */ getEffectivePort()974 public int getEffectivePort() { 975 return getEffectivePort(scheme, port); 976 } 977 978 /** 979 * Returns the port to use for {@code scheme} connections will use when 980 * {@link #getPort} returns {@code specifiedPort}. 981 * 982 * @hide 983 */ getEffectivePort(String scheme, int specifiedPort)984 public static int getEffectivePort(String scheme, int specifiedPort) { 985 if (specifiedPort != -1) { 986 return specifiedPort; 987 } 988 989 if ("http".equalsIgnoreCase(scheme)) { 990 return 80; 991 } else if ("https".equalsIgnoreCase(scheme)) { 992 return 443; 993 } else { 994 return -1; 995 } 996 } 997 998 /** 999 * Returns the decoded path of this URI, or null if this URI has no path. 1000 */ getPath()1001 public String getPath() { 1002 return decode(path); 1003 } 1004 1005 /** 1006 * Returns the encoded path of this URI, or null if this URI has no path. 1007 */ getRawPath()1008 public String getRawPath() { 1009 return path; 1010 } 1011 1012 /** 1013 * Returns the decoded query of this URI, or null if this URI has no query. 1014 */ getQuery()1015 public String getQuery() { 1016 return decode(query); 1017 } 1018 1019 /** 1020 * Returns the encoded query of this URI, or null if this URI has no query. 1021 */ getRawQuery()1022 public String getRawQuery() { 1023 return query; 1024 } 1025 1026 /** 1027 * Returns the decoded fragment of this URI, or null if this URI has no 1028 * fragment. 1029 */ getFragment()1030 public String getFragment() { 1031 return decode(fragment); 1032 } 1033 1034 /** 1035 * Gets the encoded fragment of this URI, or null if this URI has no 1036 * fragment. 1037 */ getRawFragment()1038 public String getRawFragment() { 1039 return fragment; 1040 } 1041 hashCode()1042 @Override public int hashCode() { 1043 if (hash == -1) { 1044 hash = getHashString().hashCode(); 1045 } 1046 return hash; 1047 } 1048 1049 /** 1050 * Returns true if this URI is absolute, which means that a scheme is 1051 * defined. 1052 */ isAbsolute()1053 public boolean isAbsolute() { 1054 // TODO: simplify to 'scheme != null' ? 1055 return absolute; 1056 } 1057 1058 /** 1059 * Returns true if this URI is opaque. Opaque URIs are absolute and have a 1060 * scheme-specific part that does not start with a slash character. All 1061 * parts except scheme, scheme-specific and fragment are undefined. 1062 */ isOpaque()1063 public boolean isOpaque() { 1064 return opaque; 1065 } 1066 1067 /** 1068 * Returns the normalized path. 1069 */ normalize(String path, boolean discardRelativePrefix)1070 private String normalize(String path, boolean discardRelativePrefix) { 1071 path = UrlUtils.canonicalizePath(path, discardRelativePrefix); 1072 1073 /* 1074 * If the path contains a colon before the first colon, prepend 1075 * "./" to differentiate the path from a scheme prefix. 1076 */ 1077 int colon = path.indexOf(':'); 1078 if (colon != -1) { 1079 int slash = path.indexOf('/'); 1080 if (slash == -1 || colon < slash) { 1081 path = "./" + path; 1082 } 1083 } 1084 1085 return path; 1086 } 1087 1088 /** 1089 * Normalizes the path part of this URI. 1090 * 1091 * @return an URI object which represents this instance with a normalized 1092 * path. 1093 */ normalize()1094 public URI normalize() { 1095 if (opaque) { 1096 return this; 1097 } 1098 String normalizedPath = normalize(path, false); 1099 // if the path is already normalized, return this 1100 if (path.equals(normalizedPath)) { 1101 return this; 1102 } 1103 // get an exact copy of the URI re-calculate the scheme specific part 1104 // since the path of the normalized URI is different from this URI. 1105 URI result = duplicate(); 1106 result.path = normalizedPath; 1107 result.setSchemeSpecificPart(); 1108 return result; 1109 } 1110 1111 /** 1112 * Tries to parse the authority component of this URI to divide it into the 1113 * host, port, and user-info. If this URI is already determined as a 1114 * ServerAuthority this instance will be returned without changes. 1115 * 1116 * @return this instance with the components of the parsed server authority. 1117 * @throws URISyntaxException 1118 * if the authority part could not be parsed as a server-based 1119 * authority. 1120 */ parseServerAuthority()1121 public URI parseServerAuthority() throws URISyntaxException { 1122 if (!serverAuthority) { 1123 parseAuthority(true); 1124 } 1125 return this; 1126 } 1127 1128 /** 1129 * Makes the given URI {@code relative} to a relative URI against the URI 1130 * represented by this instance. 1131 * 1132 * @param relative 1133 * the URI which has to be relativized against this URI. 1134 * @return the relative URI. 1135 */ relativize(URI relative)1136 public URI relativize(URI relative) { 1137 if (relative.opaque || opaque) { 1138 return relative; 1139 } 1140 1141 if (scheme == null ? relative.scheme != null : !scheme 1142 .equals(relative.scheme)) { 1143 return relative; 1144 } 1145 1146 if (authority == null ? relative.authority != null : !authority 1147 .equals(relative.authority)) { 1148 return relative; 1149 } 1150 1151 // normalize both paths 1152 String thisPath = normalize(path, false); 1153 String relativePath = normalize(relative.path, false); 1154 1155 /* 1156 * if the paths aren't equal, then we need to determine if this URI's 1157 * path is a parent path (begins with) the relative URI's path 1158 */ 1159 if (!thisPath.equals(relativePath)) { 1160 // drop everything after the last slash in this path 1161 thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1); 1162 1163 /* 1164 * if the relative URI's path doesn't start with this URI's path, 1165 * then just return the relative URI; the URIs have nothing in 1166 * common 1167 */ 1168 if (!relativePath.startsWith(thisPath)) { 1169 return relative; 1170 } 1171 } 1172 1173 URI result = new URI(); 1174 result.fragment = relative.fragment; 1175 result.query = relative.query; 1176 // the result URI is the remainder of the relative URI's path 1177 result.path = relativePath.substring(thisPath.length()); 1178 result.setSchemeSpecificPart(); 1179 return result; 1180 } 1181 1182 /** 1183 * Resolves the given URI {@code relative} against the URI represented by 1184 * this instance. 1185 * 1186 * @param relative 1187 * the URI which has to be resolved against this URI. 1188 * @return the resolved URI. 1189 */ resolve(URI relative)1190 public URI resolve(URI relative) { 1191 if (relative.absolute || opaque) { 1192 return relative; 1193 } 1194 1195 if (relative.authority != null) { 1196 // If the relative URI has an authority, the result is the relative 1197 // with this URI's scheme. 1198 URI result = relative.duplicate(); 1199 result.scheme = scheme; 1200 result.absolute = absolute; 1201 return result; 1202 } 1203 1204 if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) { 1205 // if the relative URI only consists of at most a fragment, 1206 URI result = duplicate(); 1207 result.fragment = relative.fragment; 1208 return result; 1209 } 1210 1211 URI result = duplicate(); 1212 result.fragment = relative.fragment; 1213 result.query = relative.query; 1214 String resolvedPath; 1215 if (relative.path.startsWith("/")) { 1216 // The relative URI has an absolute path; use it. 1217 resolvedPath = relative.path; 1218 } else if (relative.path.isEmpty()) { 1219 // The relative URI has no path; use the base path. 1220 resolvedPath = path; 1221 } else { 1222 // The relative URI has a relative path; combine the paths. 1223 int endIndex = path.lastIndexOf('/') + 1; 1224 resolvedPath = path.substring(0, endIndex) + relative.path; 1225 } 1226 result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true)); 1227 result.setSchemeSpecificPart(); 1228 return result; 1229 } 1230 1231 /** 1232 * Helper method used to re-calculate the scheme specific part of the 1233 * resolved or normalized URIs 1234 */ setSchemeSpecificPart()1235 private void setSchemeSpecificPart() { 1236 // ssp = [//authority][path][?query] 1237 StringBuilder ssp = new StringBuilder(); 1238 if (authority != null) { 1239 ssp.append("//" + authority); 1240 } 1241 if (path != null) { 1242 ssp.append(path); 1243 } 1244 if (query != null) { 1245 ssp.append("?" + query); 1246 } 1247 schemeSpecificPart = ssp.toString(); 1248 // reset string, so that it can be re-calculated correctly when asked. 1249 string = null; 1250 } 1251 1252 /** 1253 * Creates a new URI instance by parsing the given string {@code relative} 1254 * and resolves the created URI against the URI represented by this 1255 * instance. 1256 * 1257 * @param relative 1258 * the given string to create the new URI instance which has to 1259 * be resolved later on. 1260 * @return the created and resolved URI. 1261 */ resolve(String relative)1262 public URI resolve(String relative) { 1263 return resolve(create(relative)); 1264 } 1265 decode(String s)1266 private String decode(String s) { 1267 return s != null ? UriCodec.decode(s) : null; 1268 } 1269 1270 /** 1271 * Returns the textual string representation of this URI instance using the 1272 * US-ASCII encoding. 1273 * 1274 * @return the US-ASCII string representation of this URI. 1275 */ toASCIIString()1276 public String toASCIIString() { 1277 StringBuilder result = new StringBuilder(); 1278 ASCII_ONLY.appendEncoded(result, toString()); 1279 return result.toString(); 1280 } 1281 1282 /** 1283 * Returns the encoded URI. 1284 */ toString()1285 @Override public String toString() { 1286 if (string != null) { 1287 return string; 1288 } 1289 1290 StringBuilder result = new StringBuilder(); 1291 if (scheme != null) { 1292 result.append(scheme); 1293 result.append(':'); 1294 } 1295 if (opaque) { 1296 result.append(schemeSpecificPart); 1297 } else { 1298 if (authority != null) { 1299 result.append("//"); 1300 result.append(authority); 1301 } 1302 1303 if (path != null) { 1304 result.append(path); 1305 } 1306 1307 if (query != null) { 1308 result.append('?'); 1309 result.append(query); 1310 } 1311 } 1312 1313 if (fragment != null) { 1314 result.append('#'); 1315 result.append(fragment); 1316 } 1317 1318 string = result.toString(); 1319 return string; 1320 } 1321 1322 /* 1323 * Form a string from the components of this URI, similarly to the 1324 * toString() method. But this method converts scheme and host to lowercase, 1325 * and converts escaped octets to lowercase. 1326 */ getHashString()1327 private String getHashString() { 1328 StringBuilder result = new StringBuilder(); 1329 if (scheme != null) { 1330 result.append(scheme.toLowerCase(Locale.US)); 1331 result.append(':'); 1332 } 1333 if (opaque) { 1334 result.append(schemeSpecificPart); 1335 } else { 1336 if (authority != null) { 1337 result.append("//"); 1338 if (host == null) { 1339 result.append(authority); 1340 } else { 1341 if (userInfo != null) { 1342 result.append(userInfo + "@"); 1343 } 1344 result.append(host.toLowerCase(Locale.US)); 1345 if (port != -1) { 1346 result.append(":" + port); 1347 } 1348 } 1349 } 1350 1351 if (path != null) { 1352 result.append(path); 1353 } 1354 1355 if (query != null) { 1356 result.append('?'); 1357 result.append(query); 1358 } 1359 } 1360 1361 if (fragment != null) { 1362 result.append('#'); 1363 result.append(fragment); 1364 } 1365 1366 return convertHexToLowerCase(result.toString()); 1367 } 1368 1369 /** 1370 * Converts this URI instance to a URL. 1371 * 1372 * @return the created URL representing the same resource as this URI. 1373 * @throws MalformedURLException 1374 * if an error occurs while creating the URL or no protocol 1375 * handler could be found. 1376 */ toURL()1377 public URL toURL() throws MalformedURLException { 1378 if (!absolute) { 1379 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1380 } 1381 return new URL(toString()); 1382 } 1383 readObject(ObjectInputStream in)1384 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 1385 in.defaultReadObject(); 1386 try { 1387 parseURI(string, false); 1388 } catch (URISyntaxException e) { 1389 throw new IOException(e.toString()); 1390 } 1391 } 1392 writeObject(ObjectOutputStream out)1393 private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { 1394 // call toString() to ensure the value of string field is calculated 1395 toString(); 1396 out.defaultWriteObject(); 1397 } 1398 } 1399