1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 /*!
10 
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13 
14 
15 # URL parsing and data structures
16 
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18 
19 ```
20 use url::{Url, ParseError};
21 
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24 
25 Let’s parse a valid URL and look at its components.
26 
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32     "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34 
35 
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44         Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53 
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57 
58 ```
59 use url::Url;
60 # use url::ParseError;
61 
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64 
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75 
76 ## Serde
77 
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79 
80 # Base URL
81 
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83 
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87 
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89 
90 ```
91 use url::{Url, ParseError};
92 
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95 
96 Use the `join` method on an `Url` to use it as a base URL:
97 
98 ```
99 use url::Url;
100 # use url::ParseError;
101 
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110 
111 # Feature: `serde`
112 
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117 
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121 */
122 
123 #![doc(html_root_url = "https://docs.rs/url/2.2.2")]
124 
125 #[macro_use]
126 extern crate matches;
127 pub use form_urlencoded;
128 
129 #[cfg(feature = "serde")]
130 extern crate serde;
131 
132 use crate::host::HostInternal;
133 use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
134 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
135 use std::borrow::Borrow;
136 use std::cmp;
137 use std::fmt::{self, Write};
138 use std::hash;
139 use std::io;
140 use std::mem;
141 use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
142 use std::ops::{Range, RangeFrom, RangeTo};
143 use std::path::{Path, PathBuf};
144 use std::str;
145 
146 use std::convert::TryFrom;
147 
148 pub use crate::host::Host;
149 pub use crate::origin::{OpaqueOrigin, Origin};
150 pub use crate::parser::{ParseError, SyntaxViolation};
151 pub use crate::path_segments::PathSegmentsMut;
152 pub use crate::slicing::Position;
153 pub use form_urlencoded::EncodingOverride;
154 
155 mod host;
156 mod origin;
157 mod parser;
158 mod path_segments;
159 mod slicing;
160 
161 #[doc(hidden)]
162 pub mod quirks;
163 
164 /// A parsed URL record.
165 #[derive(Clone)]
166 pub struct Url {
167     /// Syntax in pseudo-BNF:
168     ///
169     ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
170     ///   non-hierarchical = non-hierarchical-path
171     ///   non-hierarchical-path = /* Does not start with "/" */
172     ///   hierarchical = authority? hierarchical-path
173     ///   authority = "//" userinfo? host [ ":" port ]?
174     ///   userinfo = username [ ":" password ]? "@"
175     ///   hierarchical-path = [ "/" path-segment ]+
176     serialization: String,
177 
178     // Components
179     scheme_end: u32,   // Before ':'
180     username_end: u32, // Before ':' (if a password is given) or '@' (if not)
181     host_start: u32,
182     host_end: u32,
183     host: HostInternal,
184     port: Option<u16>,
185     path_start: u32,             // Before initial '/', if any
186     query_start: Option<u32>,    // Before '?', unlike Position::QueryStart
187     fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
188 }
189 
190 /// Full configuration for the URL parser.
191 #[derive(Copy, Clone)]
192 pub struct ParseOptions<'a> {
193     base_url: Option<&'a Url>,
194     encoding_override: EncodingOverride<'a>,
195     violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
196 }
197 
198 impl<'a> ParseOptions<'a> {
199     /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self200     pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
201         self.base_url = new;
202         self
203     }
204 
205     /// Override the character encoding of query strings.
206     /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self207     pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
208         self.encoding_override = new;
209         self
210     }
211 
212     /// Call the provided function or closure for a non-fatal `SyntaxViolation`
213     /// when it occurs during parsing. Note that since the provided function is
214     /// `Fn`, the caller might need to utilize _interior mutability_, such as with
215     /// a `RefCell`, to collect the violations.
216     ///
217     /// ## Example
218     /// ```
219     /// use std::cell::RefCell;
220     /// use url::{Url, SyntaxViolation};
221     /// # use url::ParseError;
222     /// # fn run() -> Result<(), url::ParseError> {
223     /// let violations = RefCell::new(Vec::new());
224     /// let url = Url::options()
225     ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
226     ///     .parse("https:////example.com")?;
227     /// assert_eq!(url.as_str(), "https://example.com/");
228     /// assert_eq!(violations.into_inner(),
229     ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
230     /// # Ok(())
231     /// # }
232     /// # run().unwrap();
233     /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self234     pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
235         self.violation_fn = new;
236         self
237     }
238 
239     /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>240     pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
241         Parser {
242             serialization: String::with_capacity(input.len()),
243             base_url: self.base_url,
244             query_encoding_override: self.encoding_override,
245             violation_fn: self.violation_fn,
246             context: Context::UrlParser,
247         }
248         .parse_url(input)
249     }
250 }
251 
252 impl Url {
253     /// Parse an absolute URL from a string.
254     ///
255     /// # Examples
256     ///
257     /// ```rust
258     /// use url::Url;
259     /// # use url::ParseError;
260     ///
261     /// # fn run() -> Result<(), ParseError> {
262     /// let url = Url::parse("https://example.net")?;
263     /// # Ok(())
264     /// # }
265     /// # run().unwrap();
266     /// ```
267     ///
268     /// # Errors
269     ///
270     /// If the function can not parse an absolute URL from the given string,
271     /// a [`ParseError`] variant will be returned.
272     ///
273     /// [`ParseError`]: enum.ParseError.html
274     #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>275     pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
276         Url::options().parse(input)
277     }
278 
279     /// Parse an absolute URL from a string and add params to its query string.
280     ///
281     /// Existing params are not removed.
282     ///
283     /// # Examples
284     ///
285     /// ```rust
286     /// use url::Url;
287     /// # use url::ParseError;
288     ///
289     /// # fn run() -> Result<(), ParseError> {
290     /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
291     ///                                  &[("lang", "rust"), ("browser", "servo")])?;
292     /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
293     /// # Ok(())
294     /// # }
295     /// # run().unwrap();
296     /// ```
297     ///
298     /// # Errors
299     ///
300     /// If the function can not parse an absolute URL from the given string,
301     /// a [`ParseError`] variant will be returned.
302     ///
303     /// [`ParseError`]: enum.ParseError.html
304     #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,305     pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
306     where
307         I: IntoIterator,
308         I::Item: Borrow<(K, V)>,
309         K: AsRef<str>,
310         V: AsRef<str>,
311     {
312         let mut url = Url::options().parse(input);
313 
314         if let Ok(ref mut url) = url {
315             url.query_pairs_mut().extend_pairs(iter);
316         }
317 
318         url
319     }
320 
321     /// Parse a string as an URL, with this URL as the base URL.
322     ///
323     /// The inverse of this is [`make_relative`].
324     ///
325     /// Note: a trailing slash is significant.
326     /// Without it, the last path component is considered to be a “file” name
327     /// to be removed to get at the “directory” that is used as the base:
328     ///
329     /// # Examples
330     ///
331     /// ```rust
332     /// use url::Url;
333     /// # use url::ParseError;
334     ///
335     /// # fn run() -> Result<(), ParseError> {
336     /// let base = Url::parse("https://example.net/a/b.html")?;
337     /// let url = base.join("c.png")?;
338     /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
339     ///
340     /// let base = Url::parse("https://example.net/a/b/")?;
341     /// let url = base.join("c.png")?;
342     /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
343     /// # Ok(())
344     /// # }
345     /// # run().unwrap();
346     /// ```
347     ///
348     /// # Errors
349     ///
350     /// If the function can not parse an URL from the given string
351     /// with this URL as the base URL, a [`ParseError`] variant will be returned.
352     ///
353     /// [`ParseError`]: enum.ParseError.html
354     /// [`make_relative`]: #method.make_relative
355     #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>356     pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
357         Url::options().base_url(Some(self)).parse(input)
358     }
359 
360     /// Creates a relative URL if possible, with this URL as the base URL.
361     ///
362     /// This is the inverse of [`join`].
363     ///
364     /// # Examples
365     ///
366     /// ```rust
367     /// use url::Url;
368     /// # use url::ParseError;
369     ///
370     /// # fn run() -> Result<(), ParseError> {
371     /// let base = Url::parse("https://example.net/a/b.html")?;
372     /// let url = Url::parse("https://example.net/a/c.png")?;
373     /// let relative = base.make_relative(&url);
374     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
375     ///
376     /// let base = Url::parse("https://example.net/a/b/")?;
377     /// let url = Url::parse("https://example.net/a/b/c.png")?;
378     /// let relative = base.make_relative(&url);
379     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
380     ///
381     /// let base = Url::parse("https://example.net/a/b/")?;
382     /// let url = Url::parse("https://example.net/a/d/c.png")?;
383     /// let relative = base.make_relative(&url);
384     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
385     ///
386     /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
387     /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
388     /// let relative = base.make_relative(&url);
389     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
390     /// # Ok(())
391     /// # }
392     /// # run().unwrap();
393     /// ```
394     ///
395     /// # Errors
396     ///
397     /// If this URL can't be a base for the given URL, `None` is returned.
398     /// This is for example the case if the scheme, host or port are not the same.
399     ///
400     /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>401     pub fn make_relative(&self, url: &Url) -> Option<String> {
402         if self.cannot_be_a_base() {
403             return None;
404         }
405 
406         // Scheme, host and port need to be the same
407         if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
408             return None;
409         }
410 
411         // We ignore username/password at this point
412 
413         // The path has to be transformed
414         let mut relative = String::new();
415 
416         // Extract the filename of both URIs, these need to be handled separately
417         fn extract_path_filename(s: &str) -> (&str, &str) {
418             let last_slash_idx = s.rfind('/').unwrap_or(0);
419             let (path, filename) = s.split_at(last_slash_idx);
420             if filename.is_empty() {
421                 (path, "")
422             } else {
423                 (path, &filename[1..])
424             }
425         }
426 
427         let (base_path, base_filename) = extract_path_filename(self.path());
428         let (url_path, url_filename) = extract_path_filename(url.path());
429 
430         let mut base_path = base_path.split('/').peekable();
431         let mut url_path = url_path.split('/').peekable();
432 
433         // Skip over the common prefix
434         while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
435             base_path.next();
436             url_path.next();
437         }
438 
439         // Add `..` segments for the remainder of the base path
440         for base_path_segment in base_path {
441             // Skip empty last segments
442             if base_path_segment.is_empty() {
443                 break;
444             }
445 
446             if !relative.is_empty() {
447                 relative.push('/');
448             }
449 
450             relative.push_str("..");
451         }
452 
453         // Append the remainder of the other URI
454         for url_path_segment in url_path {
455             if !relative.is_empty() {
456                 relative.push('/');
457             }
458 
459             relative.push_str(url_path_segment);
460         }
461 
462         // Add the filename if they are not the same
463         if base_filename != url_filename {
464             // If the URIs filename is empty this means that it was a directory
465             // so we'll have to append a '/'.
466             //
467             // Otherwise append it directly as the new filename.
468             if url_filename.is_empty() {
469                 relative.push('/');
470             } else {
471                 if !relative.is_empty() {
472                     relative.push('/');
473                 }
474                 relative.push_str(url_filename);
475             }
476         }
477 
478         // Query and fragment are only taken from the other URI
479         if let Some(query) = url.query() {
480             relative.push('?');
481             relative.push_str(query);
482         }
483 
484         if let Some(fragment) = url.fragment() {
485             relative.push('#');
486             relative.push_str(fragment);
487         }
488 
489         Some(relative)
490     }
491 
492     /// Return a default `ParseOptions` that can fully configure the URL parser.
493     ///
494     /// # Examples
495     ///
496     /// Get default `ParseOptions`, then change base url
497     ///
498     /// ```rust
499     /// use url::Url;
500     /// # use url::ParseError;
501     /// # fn run() -> Result<(), ParseError> {
502     /// let options = Url::options();
503     /// let api = Url::parse("https://api.example.com")?;
504     /// let base_url = options.base_url(Some(&api));
505     /// let version_url = base_url.parse("version.json")?;
506     /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
507     /// # Ok(())
508     /// # }
509     /// # run().unwrap();
510     /// ```
options<'a>() -> ParseOptions<'a>511     pub fn options<'a>() -> ParseOptions<'a> {
512         ParseOptions {
513             base_url: None,
514             encoding_override: None,
515             violation_fn: None,
516         }
517     }
518 
519     /// Return the serialization of this URL.
520     ///
521     /// This is fast since that serialization is already stored in the `Url` struct.
522     ///
523     /// # Examples
524     ///
525     /// ```rust
526     /// use url::Url;
527     /// # use url::ParseError;
528     ///
529     /// # fn run() -> Result<(), ParseError> {
530     /// let url_str = "https://example.net/";
531     /// let url = Url::parse(url_str)?;
532     /// assert_eq!(url.as_str(), url_str);
533     /// # Ok(())
534     /// # }
535     /// # run().unwrap();
536     /// ```
537     #[inline]
as_str(&self) -> &str538     pub fn as_str(&self) -> &str {
539         &self.serialization
540     }
541 
542     /// Return the serialization of this URL.
543     ///
544     /// This consumes the `Url` and takes ownership of the `String` stored in it.
545     ///
546     /// # Examples
547     ///
548     /// ```rust
549     /// use url::Url;
550     /// # use url::ParseError;
551     ///
552     /// # fn run() -> Result<(), ParseError> {
553     /// let url_str = "https://example.net/";
554     /// let url = Url::parse(url_str)?;
555     /// assert_eq!(String::from(url), url_str);
556     /// # Ok(())
557     /// # }
558     /// # run().unwrap();
559     /// ```
560     #[inline]
561     #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String562     pub fn into_string(self) -> String {
563         self.into()
564     }
565 
566     /// For internal testing, not part of the public API.
567     ///
568     /// Methods of the `Url` struct assume a number of invariants.
569     /// This checks each of these invariants and panic if one is not met.
570     /// This is for testing rust-url itself.
571     #[doc(hidden)]
check_invariants(&self) -> Result<(), String>572     pub fn check_invariants(&self) -> Result<(), String> {
573         macro_rules! assert {
574             ($x: expr) => {
575                 if !$x {
576                     return Err(format!(
577                         "!( {} ) for URL {:?}",
578                         stringify!($x),
579                         self.serialization
580                     ));
581                 }
582             };
583         }
584 
585         macro_rules! assert_eq {
586             ($a: expr, $b: expr) => {
587                 {
588                     let a = $a;
589                     let b = $b;
590                     if a != b {
591                         return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
592                                            a, b, stringify!($a), stringify!($b),
593                                            self.serialization))
594                     }
595                 }
596             }
597         }
598 
599         assert!(self.scheme_end >= 1);
600         assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
601         assert!(self
602             .slice(1..self.scheme_end)
603             .chars()
604             .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
605         assert_eq!(self.byte_at(self.scheme_end), b':');
606 
607         if self.slice(self.scheme_end + 1..).starts_with("//") {
608             // URL with authority
609             if self.username_end != self.serialization.len() as u32 {
610                 match self.byte_at(self.username_end) {
611                     b':' => {
612                         assert!(self.host_start >= self.username_end + 2);
613                         assert_eq!(self.byte_at(self.host_start - 1), b'@');
614                     }
615                     b'@' => assert!(self.host_start == self.username_end + 1),
616                     _ => assert_eq!(self.username_end, self.scheme_end + 3),
617                 }
618             }
619             assert!(self.host_start >= self.username_end);
620             assert!(self.host_end >= self.host_start);
621             let host_str = self.slice(self.host_start..self.host_end);
622             match self.host {
623                 HostInternal::None => assert_eq!(host_str, ""),
624                 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
625                 HostInternal::Ipv6(address) => {
626                     let h: Host<String> = Host::Ipv6(address);
627                     assert_eq!(host_str, h.to_string())
628                 }
629                 HostInternal::Domain => {
630                     if SchemeType::from(self.scheme()).is_special() {
631                         assert!(!host_str.is_empty())
632                     }
633                 }
634             }
635             if self.path_start == self.host_end {
636                 assert_eq!(self.port, None);
637             } else {
638                 assert_eq!(self.byte_at(self.host_end), b':');
639                 let port_str = self.slice(self.host_end + 1..self.path_start);
640                 assert_eq!(
641                     self.port,
642                     Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
643                 );
644             }
645             assert!(
646                 self.path_start as usize == self.serialization.len()
647                     || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
648             );
649         } else {
650             // Anarchist URL (no authority)
651             assert_eq!(self.username_end, self.scheme_end + 1);
652             assert_eq!(self.host_start, self.scheme_end + 1);
653             assert_eq!(self.host_end, self.scheme_end + 1);
654             assert_eq!(self.host, HostInternal::None);
655             assert_eq!(self.port, None);
656             assert_eq!(self.path_start, self.scheme_end + 1);
657         }
658         if let Some(start) = self.query_start {
659             assert!(start >= self.path_start);
660             assert_eq!(self.byte_at(start), b'?');
661         }
662         if let Some(start) = self.fragment_start {
663             assert!(start >= self.path_start);
664             assert_eq!(self.byte_at(start), b'#');
665         }
666         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
667             assert!(fragment_start > query_start);
668         }
669 
670         let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
671         assert_eq!(&self.serialization, &other.serialization);
672         assert_eq!(self.scheme_end, other.scheme_end);
673         assert_eq!(self.username_end, other.username_end);
674         assert_eq!(self.host_start, other.host_start);
675         assert_eq!(self.host_end, other.host_end);
676         assert!(
677             self.host == other.host ||
678                 // XXX No host round-trips to empty host.
679                 // See https://github.com/whatwg/url/issues/79
680                 (self.host_str(), other.host_str()) == (None, Some(""))
681         );
682         assert_eq!(self.port, other.port);
683         assert_eq!(self.path_start, other.path_start);
684         assert_eq!(self.query_start, other.query_start);
685         assert_eq!(self.fragment_start, other.fragment_start);
686         Ok(())
687     }
688 
689     /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
690     ///
691     /// Note: this returns an opaque origin for `file:` URLs, which causes
692     /// `url.origin() != url.origin()`.
693     ///
694     /// # Examples
695     ///
696     /// URL with `ftp` scheme:
697     ///
698     /// ```rust
699     /// use url::{Host, Origin, Url};
700     /// # use url::ParseError;
701     ///
702     /// # fn run() -> Result<(), ParseError> {
703     /// let url = Url::parse("ftp://example.com/foo")?;
704     /// assert_eq!(url.origin(),
705     ///            Origin::Tuple("ftp".into(),
706     ///                          Host::Domain("example.com".into()),
707     ///                          21));
708     /// # Ok(())
709     /// # }
710     /// # run().unwrap();
711     /// ```
712     ///
713     /// URL with `blob` scheme:
714     ///
715     /// ```rust
716     /// use url::{Host, Origin, Url};
717     /// # use url::ParseError;
718     ///
719     /// # fn run() -> Result<(), ParseError> {
720     /// let url = Url::parse("blob:https://example.com/foo")?;
721     /// assert_eq!(url.origin(),
722     ///            Origin::Tuple("https".into(),
723     ///                          Host::Domain("example.com".into()),
724     ///                          443));
725     /// # Ok(())
726     /// # }
727     /// # run().unwrap();
728     /// ```
729     ///
730     /// URL with `file` scheme:
731     ///
732     /// ```rust
733     /// use url::{Host, Origin, Url};
734     /// # use url::ParseError;
735     ///
736     /// # fn run() -> Result<(), ParseError> {
737     /// let url = Url::parse("file:///tmp/foo")?;
738     /// assert!(!url.origin().is_tuple());
739     ///
740     /// let other_url = Url::parse("file:///tmp/foo")?;
741     /// assert!(url.origin() != other_url.origin());
742     /// # Ok(())
743     /// # }
744     /// # run().unwrap();
745     /// ```
746     ///
747     /// URL with other scheme:
748     ///
749     /// ```rust
750     /// use url::{Host, Origin, Url};
751     /// # use url::ParseError;
752     ///
753     /// # fn run() -> Result<(), ParseError> {
754     /// let url = Url::parse("foo:bar")?;
755     /// assert!(!url.origin().is_tuple());
756     /// # Ok(())
757     /// # }
758     /// # run().unwrap();
759     /// ```
760     #[inline]
origin(&self) -> Origin761     pub fn origin(&self) -> Origin {
762         origin::url_origin(self)
763     }
764 
765     /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
766     ///
767     /// # Examples
768     ///
769     /// ```
770     /// use url::Url;
771     /// # use url::ParseError;
772     ///
773     /// # fn run() -> Result<(), ParseError> {
774     /// let url = Url::parse("file:///tmp/foo")?;
775     /// assert_eq!(url.scheme(), "file");
776     /// # Ok(())
777     /// # }
778     /// # run().unwrap();
779     /// ```
780     #[inline]
scheme(&self) -> &str781     pub fn scheme(&self) -> &str {
782         self.slice(..self.scheme_end)
783     }
784 
785     /// Return whether the URL has an 'authority',
786     /// which can contain a username, password, host, and port number.
787     ///
788     /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
789     /// or cannot-be-a-base like `data:text/plain,Stuff`.
790     ///
791     /// # Examples
792     ///
793     /// ```
794     /// use url::Url;
795     /// # use url::ParseError;
796     ///
797     /// # fn run() -> Result<(), ParseError> {
798     /// let url = Url::parse("ftp://rms@example.com")?;
799     /// assert!(url.has_authority());
800     ///
801     /// let url = Url::parse("unix:/run/foo.socket")?;
802     /// assert!(!url.has_authority());
803     ///
804     /// let url = Url::parse("data:text/plain,Stuff")?;
805     /// assert!(!url.has_authority());
806     /// # Ok(())
807     /// # }
808     /// # run().unwrap();
809     /// ```
810     #[inline]
has_authority(&self) -> bool811     pub fn has_authority(&self) -> bool {
812         debug_assert!(self.byte_at(self.scheme_end) == b':');
813         self.slice(self.scheme_end..).starts_with("://")
814     }
815 
816     /// Return whether this URL is a cannot-be-a-base URL,
817     /// meaning that parsing a relative URL string with this URL as the base will return an error.
818     ///
819     /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
820     /// as is typically the case of `data:` and `mailto:` URLs.
821     ///
822     /// # Examples
823     ///
824     /// ```
825     /// use url::Url;
826     /// # use url::ParseError;
827     ///
828     /// # fn run() -> Result<(), ParseError> {
829     /// let url = Url::parse("ftp://rms@example.com")?;
830     /// assert!(!url.cannot_be_a_base());
831     ///
832     /// let url = Url::parse("unix:/run/foo.socket")?;
833     /// assert!(!url.cannot_be_a_base());
834     ///
835     /// let url = Url::parse("data:text/plain,Stuff")?;
836     /// assert!(url.cannot_be_a_base());
837     /// # Ok(())
838     /// # }
839     /// # run().unwrap();
840     /// ```
841     #[inline]
cannot_be_a_base(&self) -> bool842     pub fn cannot_be_a_base(&self) -> bool {
843         !self.slice(self.scheme_end + 1..).starts_with('/')
844     }
845 
846     /// Return the username for this URL (typically the empty string)
847     /// as a percent-encoded ASCII string.
848     ///
849     /// # Examples
850     ///
851     /// ```
852     /// use url::Url;
853     /// # use url::ParseError;
854     ///
855     /// # fn run() -> Result<(), ParseError> {
856     /// let url = Url::parse("ftp://rms@example.com")?;
857     /// assert_eq!(url.username(), "rms");
858     ///
859     /// let url = Url::parse("ftp://:secret123@example.com")?;
860     /// assert_eq!(url.username(), "");
861     ///
862     /// let url = Url::parse("https://example.com")?;
863     /// assert_eq!(url.username(), "");
864     /// # Ok(())
865     /// # }
866     /// # run().unwrap();
867     /// ```
username(&self) -> &str868     pub fn username(&self) -> &str {
869         let scheme_separator_len = "://".len() as u32;
870         if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
871             self.slice(self.scheme_end + scheme_separator_len..self.username_end)
872         } else {
873             ""
874         }
875     }
876 
877     /// Return the password for this URL, if any, as a percent-encoded ASCII string.
878     ///
879     /// # Examples
880     ///
881     /// ```
882     /// use url::Url;
883     /// # use url::ParseError;
884     ///
885     /// # fn run() -> Result<(), ParseError> {
886     /// let url = Url::parse("ftp://rms:secret123@example.com")?;
887     /// assert_eq!(url.password(), Some("secret123"));
888     ///
889     /// let url = Url::parse("ftp://:secret123@example.com")?;
890     /// assert_eq!(url.password(), Some("secret123"));
891     ///
892     /// let url = Url::parse("ftp://rms@example.com")?;
893     /// assert_eq!(url.password(), None);
894     ///
895     /// let url = Url::parse("https://example.com")?;
896     /// assert_eq!(url.password(), None);
897     /// # Ok(())
898     /// # }
899     /// # run().unwrap();
900     /// ```
password(&self) -> Option<&str>901     pub fn password(&self) -> Option<&str> {
902         // This ':' is not the one marking a port number since a host can not be empty.
903         // (Except for file: URLs, which do not have port numbers.)
904         if self.has_authority()
905             && self.username_end != self.serialization.len() as u32
906             && self.byte_at(self.username_end) == b':'
907         {
908             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
909             Some(self.slice(self.username_end + 1..self.host_start - 1))
910         } else {
911             None
912         }
913     }
914 
915     /// Equivalent to `url.host().is_some()`.
916     ///
917     /// # Examples
918     ///
919     /// ```
920     /// use url::Url;
921     /// # use url::ParseError;
922     ///
923     /// # fn run() -> Result<(), ParseError> {
924     /// let url = Url::parse("ftp://rms@example.com")?;
925     /// assert!(url.has_host());
926     ///
927     /// let url = Url::parse("unix:/run/foo.socket")?;
928     /// assert!(!url.has_host());
929     ///
930     /// let url = Url::parse("data:text/plain,Stuff")?;
931     /// assert!(!url.has_host());
932     /// # Ok(())
933     /// # }
934     /// # run().unwrap();
935     /// ```
has_host(&self) -> bool936     pub fn has_host(&self) -> bool {
937         !matches!(self.host, HostInternal::None)
938     }
939 
940     /// Return the string representation of the host (domain or IP address) for this URL, if any.
941     ///
942     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
943     /// of a special URL, or percent encoded for non-special URLs.
944     /// IPv6 addresses are given between `[` and `]` brackets.
945     ///
946     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
947     /// don’t have a host.
948     ///
949     /// See also the `host` method.
950     ///
951     /// # Examples
952     ///
953     /// ```
954     /// use url::Url;
955     /// # use url::ParseError;
956     ///
957     /// # fn run() -> Result<(), ParseError> {
958     /// let url = Url::parse("https://127.0.0.1/index.html")?;
959     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
960     ///
961     /// let url = Url::parse("ftp://rms@example.com")?;
962     /// assert_eq!(url.host_str(), Some("example.com"));
963     ///
964     /// let url = Url::parse("unix:/run/foo.socket")?;
965     /// assert_eq!(url.host_str(), None);
966     ///
967     /// let url = Url::parse("data:text/plain,Stuff")?;
968     /// assert_eq!(url.host_str(), None);
969     /// # Ok(())
970     /// # }
971     /// # run().unwrap();
972     /// ```
host_str(&self) -> Option<&str>973     pub fn host_str(&self) -> Option<&str> {
974         if self.has_host() {
975             Some(self.slice(self.host_start..self.host_end))
976         } else {
977             None
978         }
979     }
980 
981     /// Return the parsed representation of the host for this URL.
982     /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
983     /// of a special URL, or percent encoded for non-special URLs.
984     ///
985     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
986     /// don’t have a host.
987     ///
988     /// See also the `host_str` method.
989     ///
990     /// # Examples
991     ///
992     /// ```
993     /// use url::Url;
994     /// # use url::ParseError;
995     ///
996     /// # fn run() -> Result<(), ParseError> {
997     /// let url = Url::parse("https://127.0.0.1/index.html")?;
998     /// assert!(url.host().is_some());
999     ///
1000     /// let url = Url::parse("ftp://rms@example.com")?;
1001     /// assert!(url.host().is_some());
1002     ///
1003     /// let url = Url::parse("unix:/run/foo.socket")?;
1004     /// assert!(url.host().is_none());
1005     ///
1006     /// let url = Url::parse("data:text/plain,Stuff")?;
1007     /// assert!(url.host().is_none());
1008     /// # Ok(())
1009     /// # }
1010     /// # run().unwrap();
1011     /// ```
host(&self) -> Option<Host<&str>>1012     pub fn host(&self) -> Option<Host<&str>> {
1013         match self.host {
1014             HostInternal::None => None,
1015             HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1016             HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1017             HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1018         }
1019     }
1020 
1021     /// If this URL has a host and it is a domain name (not an IP address), return it.
1022     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1023     /// of a special URL, or percent encoded for non-special URLs.
1024     ///
1025     /// # Examples
1026     ///
1027     /// ```
1028     /// use url::Url;
1029     /// # use url::ParseError;
1030     ///
1031     /// # fn run() -> Result<(), ParseError> {
1032     /// let url = Url::parse("https://127.0.0.1/")?;
1033     /// assert_eq!(url.domain(), None);
1034     ///
1035     /// let url = Url::parse("mailto:rms@example.net")?;
1036     /// assert_eq!(url.domain(), None);
1037     ///
1038     /// let url = Url::parse("https://example.com/")?;
1039     /// assert_eq!(url.domain(), Some("example.com"));
1040     /// # Ok(())
1041     /// # }
1042     /// # run().unwrap();
1043     /// ```
domain(&self) -> Option<&str>1044     pub fn domain(&self) -> Option<&str> {
1045         match self.host {
1046             HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1047             _ => None,
1048         }
1049     }
1050 
1051     /// Return the port number for this URL, if any.
1052     ///
1053     /// Note that default port numbers are never reflected by the serialization,
1054     /// use the `port_or_known_default()` method if you want a default port number returned.
1055     ///
1056     /// # Examples
1057     ///
1058     /// ```
1059     /// use url::Url;
1060     /// # use url::ParseError;
1061     ///
1062     /// # fn run() -> Result<(), ParseError> {
1063     /// let url = Url::parse("https://example.com")?;
1064     /// assert_eq!(url.port(), None);
1065     ///
1066     /// let url = Url::parse("https://example.com:443/")?;
1067     /// assert_eq!(url.port(), None);
1068     ///
1069     /// let url = Url::parse("ssh://example.com:22")?;
1070     /// assert_eq!(url.port(), Some(22));
1071     /// # Ok(())
1072     /// # }
1073     /// # run().unwrap();
1074     /// ```
1075     #[inline]
port(&self) -> Option<u16>1076     pub fn port(&self) -> Option<u16> {
1077         self.port
1078     }
1079 
1080     /// Return the port number for this URL, or the default port number if it is known.
1081     ///
1082     /// This method only knows the default port number
1083     /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1084     ///
1085     /// For URLs in these schemes, this method always returns `Some(_)`.
1086     /// For other schemes, it is the same as `Url::port()`.
1087     ///
1088     /// # Examples
1089     ///
1090     /// ```
1091     /// use url::Url;
1092     /// # use url::ParseError;
1093     ///
1094     /// # fn run() -> Result<(), ParseError> {
1095     /// let url = Url::parse("foo://example.com")?;
1096     /// assert_eq!(url.port_or_known_default(), None);
1097     ///
1098     /// let url = Url::parse("foo://example.com:1456")?;
1099     /// assert_eq!(url.port_or_known_default(), Some(1456));
1100     ///
1101     /// let url = Url::parse("https://example.com")?;
1102     /// assert_eq!(url.port_or_known_default(), Some(443));
1103     /// # Ok(())
1104     /// # }
1105     /// # run().unwrap();
1106     /// ```
1107     #[inline]
port_or_known_default(&self) -> Option<u16>1108     pub fn port_or_known_default(&self) -> Option<u16> {
1109         self.port.or_else(|| parser::default_port(self.scheme()))
1110     }
1111 
1112     /// Resolve a URL’s host and port number to `SocketAddr`.
1113     ///
1114     /// If the URL has the default port number of a scheme that is unknown to this library,
1115     /// `default_port_number` provides an opportunity to provide the actual port number.
1116     /// In non-example code this should be implemented either simply as `|| None`,
1117     /// or by matching on the URL’s `.scheme()`.
1118     ///
1119     /// If the host is a domain, it is resolved using the standard library’s DNS support.
1120     ///
1121     /// # Examples
1122     ///
1123     /// ```no_run
1124     /// let url = url::Url::parse("https://example.net/").unwrap();
1125     /// let addrs = url.socket_addrs(|| None).unwrap();
1126     /// std::net::TcpStream::connect(&*addrs)
1127     /// # ;
1128     /// ```
1129     ///
1130     /// ```
1131     /// /// With application-specific known default port numbers
1132     /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1133     ///     url.socket_addrs(|| match url.scheme() {
1134     ///         "socks5" | "socks5h" => Some(1080),
1135     ///         _ => None,
1136     ///     })
1137     /// }
1138     /// ```
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1139     pub fn socket_addrs(
1140         &self,
1141         default_port_number: impl Fn() -> Option<u16>,
1142     ) -> io::Result<Vec<SocketAddr>> {
1143         // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1144         // causes borrowck issues because the return value borrows `default_port_number`:
1145         //
1146         // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1147         //
1148         // > This RFC proposes that *all* type parameters are considered in scope
1149         // > for `impl Trait` in return position
1150 
1151         fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1152             opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1153         }
1154 
1155         let host = io_result(self.host(), "No host name in the URL")?;
1156         let port = io_result(
1157             self.port_or_known_default().or_else(default_port_number),
1158             "No port number in the URL",
1159         )?;
1160         Ok(match host {
1161             Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1162             Host::Ipv4(ip) => vec![(ip, port).into()],
1163             Host::Ipv6(ip) => vec![(ip, port).into()],
1164         })
1165     }
1166 
1167     /// Return the path for this URL, as a percent-encoded ASCII string.
1168     /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1169     /// For other URLs, this starts with a '/' slash
1170     /// and continues with slash-separated path segments.
1171     ///
1172     /// # Examples
1173     ///
1174     /// ```rust
1175     /// use url::{Url, ParseError};
1176     ///
1177     /// # fn run() -> Result<(), ParseError> {
1178     /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1179     /// assert_eq!(url.path(), "/api/versions");
1180     ///
1181     /// let url = Url::parse("https://example.com")?;
1182     /// assert_eq!(url.path(), "/");
1183     ///
1184     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1185     /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1186     /// # Ok(())
1187     /// # }
1188     /// # run().unwrap();
1189     /// ```
path(&self) -> &str1190     pub fn path(&self) -> &str {
1191         match (self.query_start, self.fragment_start) {
1192             (None, None) => self.slice(self.path_start..),
1193             (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1194                 self.slice(self.path_start..next_component_start)
1195             }
1196         }
1197     }
1198 
1199     /// Unless this URL is cannot-be-a-base,
1200     /// return an iterator of '/' slash-separated path segments,
1201     /// each as a percent-encoded ASCII string.
1202     ///
1203     /// Return `None` for cannot-be-a-base URLs.
1204     ///
1205     /// When `Some` is returned, the iterator always contains at least one string
1206     /// (which may be empty).
1207     ///
1208     /// # Examples
1209     ///
1210     /// ```
1211     /// use url::Url;
1212     /// # use std::error::Error;
1213     ///
1214     /// # fn run() -> Result<(), Box<dyn Error>> {
1215     /// let url = Url::parse("https://example.com/foo/bar")?;
1216     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1217     /// assert_eq!(path_segments.next(), Some("foo"));
1218     /// assert_eq!(path_segments.next(), Some("bar"));
1219     /// assert_eq!(path_segments.next(), None);
1220     ///
1221     /// let url = Url::parse("https://example.com")?;
1222     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1223     /// assert_eq!(path_segments.next(), Some(""));
1224     /// assert_eq!(path_segments.next(), None);
1225     ///
1226     /// let url = Url::parse("data:text/plain,HelloWorld")?;
1227     /// assert!(url.path_segments().is_none());
1228     ///
1229     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1230     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1231     /// assert_eq!(path_segments.next(), Some("countries"));
1232     /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1233     /// # Ok(())
1234     /// # }
1235     /// # run().unwrap();
1236     /// ```
1237     #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36
path_segments(&self) -> Option<str::Split<'_, char>>1238     pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1239         let path = self.path();
1240         if path.starts_with('/') {
1241             Some(path[1..].split('/'))
1242         } else {
1243             None
1244         }
1245     }
1246 
1247     /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1248     ///
1249     /// # Examples
1250     ///
1251     /// ```rust
1252     /// use url::Url;
1253     /// # use url::ParseError;
1254     ///
1255     /// fn run() -> Result<(), ParseError> {
1256     /// let url = Url::parse("https://example.com/products?page=2")?;
1257     /// let query = url.query();
1258     /// assert_eq!(query, Some("page=2"));
1259     ///
1260     /// let url = Url::parse("https://example.com/products")?;
1261     /// let query = url.query();
1262     /// assert!(query.is_none());
1263     ///
1264     /// let url = Url::parse("https://example.com/?country=español")?;
1265     /// let query = url.query();
1266     /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1267     /// # Ok(())
1268     /// # }
1269     /// # run().unwrap();
1270     /// ```
query(&self) -> Option<&str>1271     pub fn query(&self) -> Option<&str> {
1272         match (self.query_start, self.fragment_start) {
1273             (None, _) => None,
1274             (Some(query_start), None) => {
1275                 debug_assert!(self.byte_at(query_start) == b'?');
1276                 Some(self.slice(query_start + 1..))
1277             }
1278             (Some(query_start), Some(fragment_start)) => {
1279                 debug_assert!(self.byte_at(query_start) == b'?');
1280                 Some(self.slice(query_start + 1..fragment_start))
1281             }
1282         }
1283     }
1284 
1285     /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1286     /// and return an iterator of (key, value) pairs.
1287     ///
1288     /// # Examples
1289     ///
1290     /// ```rust
1291     /// use std::borrow::Cow;
1292     ///
1293     /// use url::Url;
1294     /// # use url::ParseError;
1295     ///
1296     /// # fn run() -> Result<(), ParseError> {
1297     /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1298     /// let mut pairs = url.query_pairs();
1299     ///
1300     /// assert_eq!(pairs.count(), 2);
1301     ///
1302     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1303     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1304     /// # Ok(())
1305     /// # }
1306     /// # run().unwrap();
1307     ///
1308 
1309     #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1310     pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1311         form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1312     }
1313 
1314     /// Return this URL’s fragment identifier, if any.
1315     ///
1316     /// A fragment is the part of the URL after the `#` symbol.
1317     /// The fragment is optional and, if present, contains a fragment identifier
1318     /// that identifies a secondary resource, such as a section heading
1319     /// of a document.
1320     ///
1321     /// In HTML, the fragment identifier is usually the id attribute of a an element
1322     /// that is scrolled to on load. Browsers typically will not send the fragment portion
1323     /// of a URL to the server.
1324     ///
1325     /// **Note:** the parser did *not* percent-encode this component,
1326     /// but the input may have been percent-encoded already.
1327     ///
1328     /// # Examples
1329     ///
1330     /// ```rust
1331     /// use url::Url;
1332     /// # use url::ParseError;
1333     ///
1334     /// # fn run() -> Result<(), ParseError> {
1335     /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1336     ///
1337     /// assert_eq!(url.fragment(), Some("row=4"));
1338     ///
1339     /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1340     ///
1341     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1342     /// # Ok(())
1343     /// # }
1344     /// # run().unwrap();
1345     /// ```
fragment(&self) -> Option<&str>1346     pub fn fragment(&self) -> Option<&str> {
1347         self.fragment_start.map(|start| {
1348             debug_assert!(self.byte_at(start) == b'#');
1349             self.slice(start + 1..)
1350         })
1351     }
1352 
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1353     fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1354         let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
1355         let result = f(&mut parser);
1356         self.serialization = parser.serialization;
1357         result
1358     }
1359 
1360     /// Change this URL’s fragment identifier.
1361     ///
1362     /// # Examples
1363     ///
1364     /// ```rust
1365     /// use url::Url;
1366     /// # use url::ParseError;
1367     ///
1368     /// # fn run() -> Result<(), ParseError> {
1369     /// let mut url = Url::parse("https://example.com/data.csv")?;
1370     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1371 
1372     /// url.set_fragment(Some("cell=4,1-6,2"));
1373     /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1374     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1375     ///
1376     /// url.set_fragment(None);
1377     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1378     /// assert!(url.fragment().is_none());
1379     /// # Ok(())
1380     /// # }
1381     /// # run().unwrap();
1382     /// ```
set_fragment(&mut self, fragment: Option<&str>)1383     pub fn set_fragment(&mut self, fragment: Option<&str>) {
1384         // Remove any previous fragment
1385         if let Some(start) = self.fragment_start {
1386             debug_assert!(self.byte_at(start) == b'#');
1387             self.serialization.truncate(start as usize);
1388         }
1389         // Write the new one
1390         if let Some(input) = fragment {
1391             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1392             self.serialization.push('#');
1393             self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
1394         } else {
1395             self.fragment_start = None
1396         }
1397     }
1398 
take_fragment(&mut self) -> Option<String>1399     fn take_fragment(&mut self) -> Option<String> {
1400         self.fragment_start.take().map(|start| {
1401             debug_assert!(self.byte_at(start) == b'#');
1402             let fragment = self.slice(start + 1..).to_owned();
1403             self.serialization.truncate(start as usize);
1404             fragment
1405         })
1406     }
1407 
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1408     fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1409         if let Some(ref fragment) = fragment {
1410             assert!(self.fragment_start.is_none());
1411             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1412             self.serialization.push('#');
1413             self.serialization.push_str(fragment);
1414         }
1415     }
1416 
1417     /// Change this URL’s query string.
1418     ///
1419     /// # Examples
1420     ///
1421     /// ```rust
1422     /// use url::Url;
1423     /// # use url::ParseError;
1424     ///
1425     /// # fn run() -> Result<(), ParseError> {
1426     /// let mut url = Url::parse("https://example.com/products")?;
1427     /// assert_eq!(url.as_str(), "https://example.com/products");
1428     ///
1429     /// url.set_query(Some("page=2"));
1430     /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1431     /// assert_eq!(url.query(), Some("page=2"));
1432     /// # Ok(())
1433     /// # }
1434     /// # run().unwrap();
1435     /// ```
set_query(&mut self, query: Option<&str>)1436     pub fn set_query(&mut self, query: Option<&str>) {
1437         let fragment = self.take_fragment();
1438 
1439         // Remove any previous query
1440         if let Some(start) = self.query_start.take() {
1441             debug_assert!(self.byte_at(start) == b'?');
1442             self.serialization.truncate(start as usize);
1443         }
1444         // Write the new query, if any
1445         if let Some(input) = query {
1446             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1447             self.serialization.push('?');
1448             let scheme_type = SchemeType::from(self.scheme());
1449             let scheme_end = self.scheme_end;
1450             self.mutate(|parser| {
1451                 let vfn = parser.violation_fn;
1452                 parser.parse_query(
1453                     scheme_type,
1454                     scheme_end,
1455                     parser::Input::trim_tab_and_newlines(input, vfn),
1456                 )
1457             });
1458         }
1459 
1460         self.restore_already_parsed_fragment(fragment);
1461     }
1462 
1463     /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1464     /// in `application/x-www-form-urlencoded` syntax.
1465     ///
1466     /// The return value has a method-chaining API:
1467     ///
1468     /// ```rust
1469     /// # use url::{Url, ParseError};
1470     ///
1471     /// # fn run() -> Result<(), ParseError> {
1472     /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1473     /// assert_eq!(url.query(), Some("lang=fr"));
1474     ///
1475     /// url.query_pairs_mut().append_pair("foo", "bar");
1476     /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1477     /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1478     ///
1479     /// url.query_pairs_mut()
1480     ///     .clear()
1481     ///     .append_pair("foo", "bar & baz")
1482     ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1483     /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1484     /// assert_eq!(url.as_str(),
1485     ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1486     /// # Ok(())
1487     /// # }
1488     /// # run().unwrap();
1489     /// ```
1490     ///
1491     /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1492     /// not `url.set_query(None)`.
1493     ///
1494     /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1495     pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1496         let fragment = self.take_fragment();
1497 
1498         let query_start;
1499         if let Some(start) = self.query_start {
1500             debug_assert!(self.byte_at(start) == b'?');
1501             query_start = start as usize;
1502         } else {
1503             query_start = self.serialization.len();
1504             self.query_start = Some(to_u32(query_start).unwrap());
1505             self.serialization.push('?');
1506         }
1507 
1508         let query = UrlQuery {
1509             url: Some(self),
1510             fragment,
1511         };
1512         form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1513     }
1514 
take_after_path(&mut self) -> String1515     fn take_after_path(&mut self) -> String {
1516         match (self.query_start, self.fragment_start) {
1517             (Some(i), _) | (None, Some(i)) => {
1518                 let after_path = self.slice(i..).to_owned();
1519                 self.serialization.truncate(i as usize);
1520                 after_path
1521             }
1522             (None, None) => String::new(),
1523         }
1524     }
1525 
1526     /// Change this URL’s path.
1527     ///
1528     /// # Examples
1529     ///
1530     /// ```rust
1531     /// use url::Url;
1532     /// # use url::ParseError;
1533     ///
1534     /// # fn run() -> Result<(), ParseError> {
1535     /// let mut url = Url::parse("https://example.com")?;
1536     /// url.set_path("api/comments");
1537     /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1538     /// assert_eq!(url.path(), "/api/comments");
1539     ///
1540     /// let mut url = Url::parse("https://example.com/api")?;
1541     /// url.set_path("data/report.csv");
1542     /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1543     /// assert_eq!(url.path(), "/data/report.csv");
1544     /// # Ok(())
1545     /// # }
1546     /// # run().unwrap();
1547     /// ```
set_path(&mut self, mut path: &str)1548     pub fn set_path(&mut self, mut path: &str) {
1549         let after_path = self.take_after_path();
1550         let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1551         let cannot_be_a_base = self.cannot_be_a_base();
1552         let scheme_type = SchemeType::from(self.scheme());
1553         self.serialization.truncate(self.path_start as usize);
1554         self.mutate(|parser| {
1555             if cannot_be_a_base {
1556                 if path.starts_with('/') {
1557                     parser.serialization.push_str("%2F");
1558                     path = &path[1..];
1559                 }
1560                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1561             } else {
1562                 let mut has_host = true; // FIXME
1563                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1564             }
1565         });
1566         self.restore_after_path(old_after_path_pos, &after_path);
1567     }
1568 
1569     /// Return an object with methods to manipulate this URL’s path segments.
1570     ///
1571     /// Return `Err(())` if this URL is cannot-be-a-base.
1572     #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1573     pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1574         if self.cannot_be_a_base() {
1575             Err(())
1576         } else {
1577             Ok(path_segments::new(self))
1578         }
1579     }
1580 
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1581     fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1582         let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1583         let adjust = |index: &mut u32| {
1584             *index -= old_after_path_position;
1585             *index += new_after_path_position;
1586         };
1587         if let Some(ref mut index) = self.query_start {
1588             adjust(index)
1589         }
1590         if let Some(ref mut index) = self.fragment_start {
1591             adjust(index)
1592         }
1593         self.serialization.push_str(after_path)
1594     }
1595 
1596     /// Change this URL’s port number.
1597     ///
1598     /// Note that default port numbers are not reflected in the serialization.
1599     ///
1600     /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1601     /// do nothing and return `Err`.
1602     ///
1603     /// # Examples
1604     ///
1605     /// ```
1606     /// use url::Url;
1607     /// # use std::error::Error;
1608     ///
1609     /// # fn run() -> Result<(), Box<dyn Error>> {
1610     /// let mut url = Url::parse("ssh://example.net:2048/")?;
1611     ///
1612     /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1613     /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1614     ///
1615     /// url.set_port(None).map_err(|_| "cannot be base")?;
1616     /// assert_eq!(url.as_str(), "ssh://example.net/");
1617     /// # Ok(())
1618     /// # }
1619     /// # run().unwrap();
1620     /// ```
1621     ///
1622     /// Known default port numbers are not reflected:
1623     ///
1624     /// ```rust
1625     /// use url::Url;
1626     /// # use std::error::Error;
1627     ///
1628     /// # fn run() -> Result<(), Box<dyn Error>> {
1629     /// let mut url = Url::parse("https://example.org/")?;
1630     ///
1631     /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1632     /// assert!(url.port().is_none());
1633     /// # Ok(())
1634     /// # }
1635     /// # run().unwrap();
1636     /// ```
1637     ///
1638     /// Cannot set port for cannot-be-a-base URLs:
1639     ///
1640     /// ```
1641     /// use url::Url;
1642     /// # use url::ParseError;
1643     ///
1644     /// # fn run() -> Result<(), ParseError> {
1645     /// let mut url = Url::parse("mailto:rms@example.net")?;
1646     ///
1647     /// let result = url.set_port(Some(80));
1648     /// assert!(result.is_err());
1649     ///
1650     /// let result = url.set_port(None);
1651     /// assert!(result.is_err());
1652     /// # Ok(())
1653     /// # }
1654     /// # run().unwrap();
1655     /// ```
1656     #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1657     pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1658         // has_host implies !cannot_be_a_base
1659         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1660             return Err(());
1661         }
1662         if port.is_some() && port == parser::default_port(self.scheme()) {
1663             port = None
1664         }
1665         self.set_port_internal(port);
1666         Ok(())
1667     }
1668 
set_port_internal(&mut self, port: Option<u16>)1669     fn set_port_internal(&mut self, port: Option<u16>) {
1670         match (self.port, port) {
1671             (None, None) => {}
1672             (Some(_), None) => {
1673                 self.serialization
1674                     .drain(self.host_end as usize..self.path_start as usize);
1675                 let offset = self.path_start - self.host_end;
1676                 self.path_start = self.host_end;
1677                 if let Some(ref mut index) = self.query_start {
1678                     *index -= offset
1679                 }
1680                 if let Some(ref mut index) = self.fragment_start {
1681                     *index -= offset
1682                 }
1683             }
1684             (Some(old), Some(new)) if old == new => {}
1685             (_, Some(new)) => {
1686                 let path_and_after = self.slice(self.path_start..).to_owned();
1687                 self.serialization.truncate(self.host_end as usize);
1688                 write!(&mut self.serialization, ":{}", new).unwrap();
1689                 let old_path_start = self.path_start;
1690                 let new_path_start = to_u32(self.serialization.len()).unwrap();
1691                 self.path_start = new_path_start;
1692                 let adjust = |index: &mut u32| {
1693                     *index -= old_path_start;
1694                     *index += new_path_start;
1695                 };
1696                 if let Some(ref mut index) = self.query_start {
1697                     adjust(index)
1698                 }
1699                 if let Some(ref mut index) = self.fragment_start {
1700                     adjust(index)
1701                 }
1702                 self.serialization.push_str(&path_and_after);
1703             }
1704         }
1705         self.port = port;
1706     }
1707 
1708     /// Change this URL’s host.
1709     ///
1710     /// Removing the host (calling this with `None`)
1711     /// will also remove any username, password, and port number.
1712     ///
1713     /// # Examples
1714     ///
1715     /// Change host:
1716     ///
1717     /// ```
1718     /// use url::Url;
1719     /// # use url::ParseError;
1720     ///
1721     /// # fn run() -> Result<(), ParseError> {
1722     /// let mut url = Url::parse("https://example.net")?;
1723     /// let result = url.set_host(Some("rust-lang.org"));
1724     /// assert!(result.is_ok());
1725     /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1726     /// # Ok(())
1727     /// # }
1728     /// # run().unwrap();
1729     /// ```
1730     ///
1731     /// Remove host:
1732     ///
1733     /// ```
1734     /// use url::Url;
1735     /// # use url::ParseError;
1736     ///
1737     /// # fn run() -> Result<(), ParseError> {
1738     /// let mut url = Url::parse("foo://example.net")?;
1739     /// let result = url.set_host(None);
1740     /// assert!(result.is_ok());
1741     /// assert_eq!(url.as_str(), "foo:/");
1742     /// # Ok(())
1743     /// # }
1744     /// # run().unwrap();
1745     /// ```
1746     ///
1747     /// Cannot remove host for 'special' schemes (e.g. `http`):
1748     ///
1749     /// ```
1750     /// use url::Url;
1751     /// # use url::ParseError;
1752     ///
1753     /// # fn run() -> Result<(), ParseError> {
1754     /// let mut url = Url::parse("https://example.net")?;
1755     /// let result = url.set_host(None);
1756     /// assert!(result.is_err());
1757     /// assert_eq!(url.as_str(), "https://example.net/");
1758     /// # Ok(())
1759     /// # }
1760     /// # run().unwrap();
1761     /// ```
1762     ///
1763     /// Cannot change or remove host for cannot-be-a-base URLs:
1764     ///
1765     /// ```
1766     /// use url::Url;
1767     /// # use url::ParseError;
1768     ///
1769     /// # fn run() -> Result<(), ParseError> {
1770     /// let mut url = Url::parse("mailto:rms@example.net")?;
1771     ///
1772     /// let result = url.set_host(Some("rust-lang.org"));
1773     /// assert!(result.is_err());
1774     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1775     ///
1776     /// let result = url.set_host(None);
1777     /// assert!(result.is_err());
1778     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1779     /// # Ok(())
1780     /// # }
1781     /// # run().unwrap();
1782     /// ```
1783     ///
1784     /// # Errors
1785     ///
1786     /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1787     /// a [`ParseError`] variant will be returned.
1788     ///
1789     /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1790     pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1791         if self.cannot_be_a_base() {
1792             return Err(ParseError::SetHostOnCannotBeABaseUrl);
1793         }
1794 
1795         if let Some(host) = host {
1796             if host.is_empty() && SchemeType::from(self.scheme()).is_special() {
1797                 return Err(ParseError::EmptyHost);
1798             }
1799             let mut host_substr = host;
1800             // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1801             if !host.starts_with('[') || !host.ends_with(']') {
1802                 match host.find(':') {
1803                     Some(0) => {
1804                         // If buffer is the empty string, validation error, return failure.
1805                         return Err(ParseError::InvalidDomainCharacter);
1806                     }
1807                     // Let host be the result of host parsing buffer
1808                     Some(colon_index) => {
1809                         host_substr = &host[..colon_index];
1810                     }
1811                     None => {}
1812                 }
1813             }
1814             if SchemeType::from(self.scheme()).is_special() {
1815                 self.set_host_internal(Host::parse(host_substr)?, None);
1816             } else {
1817                 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1818             }
1819         } else if self.has_host() {
1820             let scheme_type = SchemeType::from(self.scheme());
1821             if scheme_type.is_special() {
1822                 return Err(ParseError::EmptyHost);
1823             } else if self.serialization.len() == self.path_start as usize {
1824                 self.serialization.push('/');
1825             }
1826             debug_assert!(self.byte_at(self.scheme_end) == b':');
1827             debug_assert!(self.byte_at(self.path_start) == b'/');
1828             let new_path_start = self.scheme_end + 1;
1829             self.serialization
1830                 .drain(new_path_start as usize..self.path_start as usize);
1831             let offset = self.path_start - new_path_start;
1832             self.path_start = new_path_start;
1833             self.username_end = new_path_start;
1834             self.host_start = new_path_start;
1835             self.host_end = new_path_start;
1836             self.port = None;
1837             if let Some(ref mut index) = self.query_start {
1838                 *index -= offset
1839             }
1840             if let Some(ref mut index) = self.fragment_start {
1841                 *index -= offset
1842             }
1843         }
1844         Ok(())
1845     }
1846 
1847     /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1848     fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1849         let old_suffix_pos = if opt_new_port.is_some() {
1850             self.path_start
1851         } else {
1852             self.host_end
1853         };
1854         let suffix = self.slice(old_suffix_pos..).to_owned();
1855         self.serialization.truncate(self.host_start as usize);
1856         if !self.has_authority() {
1857             debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1858             debug_assert!(self.username_end == self.host_start);
1859             self.serialization.push('/');
1860             self.serialization.push('/');
1861             self.username_end += 2;
1862             self.host_start += 2;
1863         }
1864         write!(&mut self.serialization, "{}", host).unwrap();
1865         self.host_end = to_u32(self.serialization.len()).unwrap();
1866         self.host = host.into();
1867 
1868         if let Some(new_port) = opt_new_port {
1869             self.port = new_port;
1870             if let Some(port) = new_port {
1871                 write!(&mut self.serialization, ":{}", port).unwrap();
1872             }
1873         }
1874         let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1875         self.serialization.push_str(&suffix);
1876 
1877         let adjust = |index: &mut u32| {
1878             *index -= old_suffix_pos;
1879             *index += new_suffix_pos;
1880         };
1881         adjust(&mut self.path_start);
1882         if let Some(ref mut index) = self.query_start {
1883             adjust(index)
1884         }
1885         if let Some(ref mut index) = self.fragment_start {
1886             adjust(index)
1887         }
1888     }
1889 
1890     /// Change this URL’s host to the given IP address.
1891     ///
1892     /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1893     ///
1894     /// Compared to `Url::set_host`, this skips the host parser.
1895     ///
1896     /// # Examples
1897     ///
1898     /// ```rust
1899     /// use url::{Url, ParseError};
1900     ///
1901     /// # fn run() -> Result<(), ParseError> {
1902     /// let mut url = Url::parse("http://example.com")?;
1903     /// url.set_ip_host("127.0.0.1".parse().unwrap());
1904     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1905     /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1906     /// # Ok(())
1907     /// # }
1908     /// # run().unwrap();
1909     /// ```
1910     ///
1911     /// Cannot change URL's from mailto(cannot-be-base) to ip:
1912     ///
1913     /// ```rust
1914     /// use url::{Url, ParseError};
1915     ///
1916     /// # fn run() -> Result<(), ParseError> {
1917     /// let mut url = Url::parse("mailto:rms@example.com")?;
1918     /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1919     ///
1920     /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1921     /// assert!(result.is_err());
1922     /// # Ok(())
1923     /// # }
1924     /// # run().unwrap();
1925     /// ```
1926     ///
1927     #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1928     pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1929         if self.cannot_be_a_base() {
1930             return Err(());
1931         }
1932 
1933         let address = match address {
1934             IpAddr::V4(address) => Host::Ipv4(address),
1935             IpAddr::V6(address) => Host::Ipv6(address),
1936         };
1937         self.set_host_internal(address, None);
1938         Ok(())
1939     }
1940 
1941     /// Change this URL’s password.
1942     ///
1943     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1944     ///
1945     /// # Examples
1946     ///
1947     /// ```rust
1948     /// use url::{Url, ParseError};
1949     ///
1950     /// # fn run() -> Result<(), ParseError> {
1951     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1952     /// let result = url.set_password(Some("secret_password"));
1953     /// assert!(result.is_err());
1954     ///
1955     /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1956     /// let result = url.set_password(Some("secret_password"));
1957     /// assert_eq!(url.password(), Some("secret_password"));
1958     ///
1959     /// let mut url = Url::parse("ftp://user2:@example.com")?;
1960     /// let result = url.set_password(Some("secret2"));
1961     /// assert!(result.is_ok());
1962     /// assert_eq!(url.password(), Some("secret2"));
1963     /// # Ok(())
1964     /// # }
1965     /// # run().unwrap();
1966     /// ```
1967     #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1968     pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1969         // has_host implies !cannot_be_a_base
1970         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1971             return Err(());
1972         }
1973         if let Some(password) = password {
1974             let host_and_after = self.slice(self.host_start..).to_owned();
1975             self.serialization.truncate(self.username_end as usize);
1976             self.serialization.push(':');
1977             self.serialization
1978                 .extend(utf8_percent_encode(password, USERINFO));
1979             self.serialization.push('@');
1980 
1981             let old_host_start = self.host_start;
1982             let new_host_start = to_u32(self.serialization.len()).unwrap();
1983             let adjust = |index: &mut u32| {
1984                 *index -= old_host_start;
1985                 *index += new_host_start;
1986             };
1987             self.host_start = new_host_start;
1988             adjust(&mut self.host_end);
1989             adjust(&mut self.path_start);
1990             if let Some(ref mut index) = self.query_start {
1991                 adjust(index)
1992             }
1993             if let Some(ref mut index) = self.fragment_start {
1994                 adjust(index)
1995             }
1996 
1997             self.serialization.push_str(&host_and_after);
1998         } else if self.byte_at(self.username_end) == b':' {
1999             // If there is a password to remove
2000             let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2001             debug_assert!(has_username_or_password);
2002             let username_start = self.scheme_end + 3;
2003             let empty_username = username_start == self.username_end;
2004             let start = self.username_end; // Remove the ':'
2005             let end = if empty_username {
2006                 self.host_start // Remove the '@' as well
2007             } else {
2008                 self.host_start - 1 // Keep the '@' to separate the username from the host
2009             };
2010             self.serialization.drain(start as usize..end as usize);
2011             let offset = end - start;
2012             self.host_start -= offset;
2013             self.host_end -= offset;
2014             self.path_start -= offset;
2015             if let Some(ref mut index) = self.query_start {
2016                 *index -= offset
2017             }
2018             if let Some(ref mut index) = self.fragment_start {
2019                 *index -= offset
2020             }
2021         }
2022         Ok(())
2023     }
2024 
2025     /// Change this URL’s username.
2026     ///
2027     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2028     /// # Examples
2029     ///
2030     /// Cannot setup username from mailto(cannot-be-base)
2031     ///
2032     /// ```rust
2033     /// use url::{Url, ParseError};
2034     ///
2035     /// # fn run() -> Result<(), ParseError> {
2036     /// let mut url = Url::parse("mailto:rmz@example.com")?;
2037     /// let result = url.set_username("user1");
2038     /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2039     /// assert!(result.is_err());
2040     /// # Ok(())
2041     /// # }
2042     /// # run().unwrap();
2043     /// ```
2044     ///
2045     /// Setup username to user1
2046     ///
2047     /// ```rust
2048     /// use url::{Url, ParseError};
2049     ///
2050     /// # fn run() -> Result<(), ParseError> {
2051     /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2052     /// let result = url.set_username("user1");
2053     /// assert!(result.is_ok());
2054     /// assert_eq!(url.username(), "user1");
2055     /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2056     /// # Ok(())
2057     /// # }
2058     /// # run().unwrap();
2059     /// ```
2060     #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2061     pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2062         // has_host implies !cannot_be_a_base
2063         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2064             return Err(());
2065         }
2066         let username_start = self.scheme_end + 3;
2067         debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2068         if self.slice(username_start..self.username_end) == username {
2069             return Ok(());
2070         }
2071         let after_username = self.slice(self.username_end..).to_owned();
2072         self.serialization.truncate(username_start as usize);
2073         self.serialization
2074             .extend(utf8_percent_encode(username, USERINFO));
2075 
2076         let mut removed_bytes = self.username_end;
2077         self.username_end = to_u32(self.serialization.len()).unwrap();
2078         let mut added_bytes = self.username_end;
2079 
2080         let new_username_is_empty = self.username_end == username_start;
2081         match (new_username_is_empty, after_username.chars().next()) {
2082             (true, Some('@')) => {
2083                 removed_bytes += 1;
2084                 self.serialization.push_str(&after_username[1..]);
2085             }
2086             (false, Some('@')) | (_, Some(':')) | (true, _) => {
2087                 self.serialization.push_str(&after_username);
2088             }
2089             (false, _) => {
2090                 added_bytes += 1;
2091                 self.serialization.push('@');
2092                 self.serialization.push_str(&after_username);
2093             }
2094         }
2095 
2096         let adjust = |index: &mut u32| {
2097             *index -= removed_bytes;
2098             *index += added_bytes;
2099         };
2100         adjust(&mut self.host_start);
2101         adjust(&mut self.host_end);
2102         adjust(&mut self.path_start);
2103         if let Some(ref mut index) = self.query_start {
2104             adjust(index)
2105         }
2106         if let Some(ref mut index) = self.fragment_start {
2107             adjust(index)
2108         }
2109         Ok(())
2110     }
2111 
2112     /// Change this URL’s scheme.
2113     ///
2114     /// Do nothing and return `Err` under the following circumstances:
2115     ///
2116     /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2117     /// * If this URL is cannot-be-a-base and the new scheme is one of
2118     ///   `http`, `https`, `ws`, `wss` or `ftp`
2119     /// * If either the old or new scheme is `http`, `https`, `ws`,
2120     ///   `wss` or `ftp` and the other is not one of these
2121     /// * If the new scheme is `file` and this URL includes credentials
2122     ///   or has a non-null port
2123     /// * If this URL's scheme is `file` and its host is empty or null
2124     ///
2125     /// See also [the URL specification's section on legal scheme state
2126     /// overrides](https://url.spec.whatwg.org/#scheme-state).
2127     ///
2128     /// # Examples
2129     ///
2130     /// Change the URL’s scheme from `https` to `foo`:
2131     ///
2132     /// ```
2133     /// use url::Url;
2134     /// # use url::ParseError;
2135     ///
2136     /// # fn run() -> Result<(), ParseError> {
2137     /// let mut url = Url::parse("https://example.net")?;
2138     /// let result = url.set_scheme("http");
2139     /// assert_eq!(url.as_str(), "http://example.net/");
2140     /// assert!(result.is_ok());
2141     /// # Ok(())
2142     /// # }
2143     /// # run().unwrap();
2144     /// ```
2145     /// Change the URL’s scheme from `foo` to `bar`:
2146     ///
2147     /// ```
2148     /// use url::Url;
2149     /// # use url::ParseError;
2150     ///
2151     /// # fn run() -> Result<(), ParseError> {
2152     /// let mut url = Url::parse("foo://example.net")?;
2153     /// let result = url.set_scheme("bar");
2154     /// assert_eq!(url.as_str(), "bar://example.net");
2155     /// assert!(result.is_ok());
2156     /// # Ok(())
2157     /// # }
2158     /// # run().unwrap();
2159     /// ```
2160     ///
2161     /// Cannot change URL’s scheme from `https` to `foõ`:
2162     ///
2163     /// ```
2164     /// use url::Url;
2165     /// # use url::ParseError;
2166     ///
2167     /// # fn run() -> Result<(), ParseError> {
2168     /// let mut url = Url::parse("https://example.net")?;
2169     /// let result = url.set_scheme("foõ");
2170     /// assert_eq!(url.as_str(), "https://example.net/");
2171     /// assert!(result.is_err());
2172     /// # Ok(())
2173     /// # }
2174     /// # run().unwrap();
2175     /// ```
2176     ///
2177     /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2178     ///
2179     /// ```
2180     /// use url::Url;
2181     /// # use url::ParseError;
2182     ///
2183     /// # fn run() -> Result<(), ParseError> {
2184     /// let mut url = Url::parse("mailto:rms@example.net")?;
2185     /// let result = url.set_scheme("https");
2186     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2187     /// assert!(result.is_err());
2188     /// # Ok(())
2189     /// # }
2190     /// # run().unwrap();
2191     /// ```
2192     /// Cannot change the URL’s scheme from `foo` to `https`:
2193     ///
2194     /// ```
2195     /// use url::Url;
2196     /// # use url::ParseError;
2197     ///
2198     /// # fn run() -> Result<(), ParseError> {
2199     /// let mut url = Url::parse("foo://example.net")?;
2200     /// let result = url.set_scheme("https");
2201     /// assert_eq!(url.as_str(), "foo://example.net");
2202     /// assert!(result.is_err());
2203     /// # Ok(())
2204     /// # }
2205     /// # run().unwrap();
2206     /// ```
2207     /// Cannot change the URL’s scheme from `http` to `foo`:
2208     ///
2209     /// ```
2210     /// use url::Url;
2211     /// # use url::ParseError;
2212     ///
2213     /// # fn run() -> Result<(), ParseError> {
2214     /// let mut url = Url::parse("http://example.net")?;
2215     /// let result = url.set_scheme("foo");
2216     /// assert_eq!(url.as_str(), "http://example.net/");
2217     /// assert!(result.is_err());
2218     /// # Ok(())
2219     /// # }
2220     /// # run().unwrap();
2221     /// ```
2222     #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2223     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2224         let mut parser = Parser::for_setter(String::new());
2225         let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
2226         let new_scheme_type = SchemeType::from(&parser.serialization);
2227         let old_scheme_type = SchemeType::from(self.scheme());
2228         // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2229         if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2230             // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2231             (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2232             // If url includes credentials or has a non-null port, and buffer is "file", then return.
2233             // If url’s scheme is "file" and its host is an empty host or null, then return.
2234             (new_scheme_type.is_file() && self.has_authority())
2235         {
2236             return Err(());
2237         }
2238 
2239         if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2240             return Err(());
2241         }
2242         let old_scheme_end = self.scheme_end;
2243         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2244         let adjust = |index: &mut u32| {
2245             *index -= old_scheme_end;
2246             *index += new_scheme_end;
2247         };
2248 
2249         self.scheme_end = new_scheme_end;
2250         adjust(&mut self.username_end);
2251         adjust(&mut self.host_start);
2252         adjust(&mut self.host_end);
2253         adjust(&mut self.path_start);
2254         if let Some(ref mut index) = self.query_start {
2255             adjust(index)
2256         }
2257         if let Some(ref mut index) = self.fragment_start {
2258             adjust(index)
2259         }
2260 
2261         parser.serialization.push_str(self.slice(old_scheme_end..));
2262         self.serialization = parser.serialization;
2263 
2264         // Update the port so it can be removed
2265         // If it is the scheme's default
2266         // we don't mind it silently failing
2267         // if there was no port in the first place
2268         let previous_port = self.port();
2269         let _ = self.set_port(previous_port);
2270 
2271         Ok(())
2272     }
2273 
2274     /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2275     ///
2276     /// This returns `Err` if the given path is not absolute or,
2277     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2278     ///
2279     /// # Examples
2280     ///
2281     /// On Unix-like platforms:
2282     ///
2283     /// ```
2284     /// # if cfg!(unix) {
2285     /// use url::Url;
2286     ///
2287     /// # fn run() -> Result<(), ()> {
2288     /// let url = Url::from_file_path("/tmp/foo.txt")?;
2289     /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2290     ///
2291     /// let url = Url::from_file_path("../foo.txt");
2292     /// assert!(url.is_err());
2293     ///
2294     /// let url = Url::from_file_path("https://google.com/");
2295     /// assert!(url.is_err());
2296     /// # Ok(())
2297     /// # }
2298     /// # run().unwrap();
2299     /// # }
2300     /// ```
2301     #[cfg(any(unix, windows, target_os = "redox"))]
2302     #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2303     pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2304         let mut serialization = "file://".to_owned();
2305         let host_start = serialization.len() as u32;
2306         let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2307         Ok(Url {
2308             serialization,
2309             scheme_end: "file".len() as u32,
2310             username_end: host_start,
2311             host_start,
2312             host_end,
2313             host,
2314             port: None,
2315             path_start: host_end,
2316             query_start: None,
2317             fragment_start: None,
2318         })
2319     }
2320 
2321     /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2322     ///
2323     /// This returns `Err` if the given path is not absolute or,
2324     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2325     ///
2326     /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2327     /// so that the entire path is considered when using this URL as a base URL.
2328     ///
2329     /// For example:
2330     ///
2331     /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2332     ///   as the base URL is `file:///var/www/index.html`
2333     /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2334     ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2335     ///
2336     /// Note that `std::path` does not consider trailing slashes significant
2337     /// and usually does not include them (e.g. in `Path::parent()`).
2338     #[cfg(any(unix, windows, target_os = "redox"))]
2339     #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2340     pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2341         let mut url = Url::from_file_path(path)?;
2342         if !url.serialization.ends_with('/') {
2343             url.serialization.push('/')
2344         }
2345         Ok(url)
2346     }
2347 
2348     /// Serialize with Serde using the internal representation of the `Url` struct.
2349     ///
2350     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2351     /// for speed, compared to the `Deserialize` trait impl.
2352     ///
2353     /// This method is only available if the `serde` Cargo feature is enabled.
2354     #[cfg(feature = "serde")]
2355     #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2356     pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2357     where
2358         S: serde::Serializer,
2359     {
2360         use serde::Serialize;
2361         // Destructuring first lets us ensure that adding or removing fields forces this method
2362         // to be updated
2363         let Url {
2364             ref serialization,
2365             ref scheme_end,
2366             ref username_end,
2367             ref host_start,
2368             ref host_end,
2369             ref host,
2370             ref port,
2371             ref path_start,
2372             ref query_start,
2373             ref fragment_start,
2374         } = *self;
2375         (
2376             serialization,
2377             scheme_end,
2378             username_end,
2379             host_start,
2380             host_end,
2381             host,
2382             port,
2383             path_start,
2384             query_start,
2385             fragment_start,
2386         )
2387             .serialize(serializer)
2388     }
2389 
2390     /// Serialize with Serde using the internal representation of the `Url` struct.
2391     ///
2392     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2393     /// for speed, compared to the `Deserialize` trait impl.
2394     ///
2395     /// This method is only available if the `serde` Cargo feature is enabled.
2396     #[cfg(feature = "serde")]
2397     #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2398     pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2399     where
2400         D: serde::Deserializer<'de>,
2401     {
2402         use serde::de::{Deserialize, Error, Unexpected};
2403         let (
2404             serialization,
2405             scheme_end,
2406             username_end,
2407             host_start,
2408             host_end,
2409             host,
2410             port,
2411             path_start,
2412             query_start,
2413             fragment_start,
2414         ) = Deserialize::deserialize(deserializer)?;
2415         let url = Url {
2416             serialization,
2417             scheme_end,
2418             username_end,
2419             host_start,
2420             host_end,
2421             host,
2422             port,
2423             path_start,
2424             query_start,
2425             fragment_start,
2426         };
2427         if cfg!(debug_assertions) {
2428             url.check_invariants().map_err(|reason| {
2429                 let reason: &str = &reason;
2430                 Error::invalid_value(Unexpected::Other("value"), &reason)
2431             })?
2432         }
2433         Ok(url)
2434     }
2435 
2436     /// Assuming the URL is in the `file` scheme or similar,
2437     /// convert its path to an absolute `std::path::Path`.
2438     ///
2439     /// **Note:** This does not actually check the URL’s `scheme`,
2440     /// and may give nonsensical results for other schemes.
2441     /// It is the user’s responsibility to check the URL’s scheme before calling this.
2442     ///
2443     /// ```
2444     /// # use url::Url;
2445     /// # let url = Url::parse("file:///etc/passwd").unwrap();
2446     /// let path = url.to_file_path();
2447     /// ```
2448     ///
2449     /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2450     /// `file:` URLs may have a non-local host),
2451     /// or if `Path::new_opt()` returns `None`.
2452     /// (That is, if the percent-decoded path contains a NUL byte or,
2453     /// for a Windows path, is not UTF-8.)
2454     #[inline]
2455     #[cfg(any(unix, windows, target_os = "redox"))]
2456     #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2457     pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2458         if let Some(segments) = self.path_segments() {
2459             let host = match self.host() {
2460                 None | Some(Host::Domain("localhost")) => None,
2461                 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2462                     Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2463                 }
2464                 _ => return Err(()),
2465             };
2466 
2467             return file_url_segments_to_pathbuf(host, segments);
2468         }
2469         Err(())
2470     }
2471 
2472     // Private helper methods:
2473 
2474     #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2475     fn slice<R>(&self, range: R) -> &str
2476     where
2477         R: RangeArg,
2478     {
2479         range.slice_of(&self.serialization)
2480     }
2481 
2482     #[inline]
byte_at(&self, i: u32) -> u82483     fn byte_at(&self, i: u32) -> u8 {
2484         self.serialization.as_bytes()[i as usize]
2485     }
2486 }
2487 
2488 /// Parse a string as an URL, without a base URL or encoding override.
2489 impl str::FromStr for Url {
2490     type Err = ParseError;
2491 
2492     #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2493     fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2494         Url::parse(input)
2495     }
2496 }
2497 
2498 impl<'a> TryFrom<&'a str> for Url {
2499     type Error = ParseError;
2500 
try_from(s: &'a str) -> Result<Self, Self::Error>2501     fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2502         Url::parse(s)
2503     }
2504 }
2505 
2506 /// Display the serialization of this URL.
2507 impl fmt::Display for Url {
2508     #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2509     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2510         fmt::Display::fmt(&self.serialization, formatter)
2511     }
2512 }
2513 
2514 /// String converstion.
2515 impl From<Url> for String {
from(value: Url) -> String2516     fn from(value: Url) -> String {
2517         value.serialization
2518     }
2519 }
2520 
2521 /// Debug the serialization of this URL.
2522 impl fmt::Debug for Url {
2523     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2524     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2525         formatter
2526             .debug_struct("Url")
2527             .field("scheme", &self.scheme())
2528             .field("cannot_be_a_base", &self.cannot_be_a_base())
2529             .field("username", &self.username())
2530             .field("password", &self.password())
2531             .field("host", &self.host())
2532             .field("port", &self.port())
2533             .field("path", &self.path())
2534             .field("query", &self.query())
2535             .field("fragment", &self.fragment())
2536             .finish()
2537     }
2538 }
2539 
2540 /// URLs compare like their serialization.
2541 impl Eq for Url {}
2542 
2543 /// URLs compare like their serialization.
2544 impl PartialEq for Url {
2545     #[inline]
eq(&self, other: &Self) -> bool2546     fn eq(&self, other: &Self) -> bool {
2547         self.serialization == other.serialization
2548     }
2549 }
2550 
2551 /// URLs compare like their serialization.
2552 impl Ord for Url {
2553     #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2554     fn cmp(&self, other: &Self) -> cmp::Ordering {
2555         self.serialization.cmp(&other.serialization)
2556     }
2557 }
2558 
2559 /// URLs compare like their serialization.
2560 impl PartialOrd for Url {
2561     #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2562     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2563         self.serialization.partial_cmp(&other.serialization)
2564     }
2565 }
2566 
2567 /// URLs hash like their serialization.
2568 impl hash::Hash for Url {
2569     #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2570     fn hash<H>(&self, state: &mut H)
2571     where
2572         H: hash::Hasher,
2573     {
2574         hash::Hash::hash(&self.serialization, state)
2575     }
2576 }
2577 
2578 /// Return the serialization of this URL.
2579 impl AsRef<str> for Url {
2580     #[inline]
as_ref(&self) -> &str2581     fn as_ref(&self) -> &str {
2582         &self.serialization
2583     }
2584 }
2585 
2586 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2587     fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2588 }
2589 
2590 impl RangeArg for Range<u32> {
2591     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2592     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2593         &s[self.start as usize..self.end as usize]
2594     }
2595 }
2596 
2597 impl RangeArg for RangeFrom<u32> {
2598     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2599     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2600         &s[self.start as usize..]
2601     }
2602 }
2603 
2604 impl RangeArg for RangeTo<u32> {
2605     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2606     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2607         &s[..self.end as usize]
2608     }
2609 }
2610 
2611 /// Serializes this URL into a `serde` stream.
2612 ///
2613 /// This implementation is only available if the `serde` Cargo feature is enabled.
2614 #[cfg(feature = "serde")]
2615 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2616     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2617     where
2618         S: serde::Serializer,
2619     {
2620         serializer.serialize_str(self.as_str())
2621     }
2622 }
2623 
2624 /// Deserializes this URL from a `serde` stream.
2625 ///
2626 /// This implementation is only available if the `serde` Cargo feature is enabled.
2627 #[cfg(feature = "serde")]
2628 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2629     fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2630     where
2631         D: serde::Deserializer<'de>,
2632     {
2633         use serde::de::{Error, Unexpected, Visitor};
2634 
2635         struct UrlVisitor;
2636 
2637         impl<'de> Visitor<'de> for UrlVisitor {
2638             type Value = Url;
2639 
2640             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2641                 formatter.write_str("a string representing an URL")
2642             }
2643 
2644             fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2645             where
2646                 E: Error,
2647             {
2648                 Url::parse(s).map_err(|err| {
2649                     let err_s = format!("{}", err);
2650                     Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2651                 })
2652             }
2653         }
2654 
2655         deserializer.deserialize_str(UrlVisitor)
2656     }
2657 }
2658 
2659 #[cfg(any(unix, target_os = "redox"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2660 fn path_to_file_url_segments(
2661     path: &Path,
2662     serialization: &mut String,
2663 ) -> Result<(u32, HostInternal), ()> {
2664     use std::os::unix::prelude::OsStrExt;
2665     if !path.is_absolute() {
2666         return Err(());
2667     }
2668     let host_end = to_u32(serialization.len()).unwrap();
2669     let mut empty = true;
2670     // skip the root component
2671     for component in path.components().skip(1) {
2672         empty = false;
2673         serialization.push('/');
2674         serialization.extend(percent_encode(
2675             component.as_os_str().as_bytes(),
2676             PATH_SEGMENT,
2677         ));
2678     }
2679     if empty {
2680         // An URL’s path must not be empty.
2681         serialization.push('/');
2682     }
2683     Ok((host_end, HostInternal::None))
2684 }
2685 
2686 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2687 fn path_to_file_url_segments(
2688     path: &Path,
2689     serialization: &mut String,
2690 ) -> Result<(u32, HostInternal), ()> {
2691     path_to_file_url_segments_windows(path, serialization)
2692 }
2693 
2694 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2695 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2696 fn path_to_file_url_segments_windows(
2697     path: &Path,
2698     serialization: &mut String,
2699 ) -> Result<(u32, HostInternal), ()> {
2700     use std::path::{Component, Prefix};
2701     if !path.is_absolute() {
2702         return Err(());
2703     }
2704     let mut components = path.components();
2705 
2706     let host_start = serialization.len() + 1;
2707     let host_end;
2708     let host_internal;
2709     match components.next() {
2710         Some(Component::Prefix(ref p)) => match p.kind() {
2711             Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2712                 host_end = to_u32(serialization.len()).unwrap();
2713                 host_internal = HostInternal::None;
2714                 serialization.push('/');
2715                 serialization.push(letter as char);
2716                 serialization.push(':');
2717             }
2718             Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2719                 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2720                 write!(serialization, "{}", host).unwrap();
2721                 host_end = to_u32(serialization.len()).unwrap();
2722                 host_internal = host.into();
2723                 serialization.push('/');
2724                 let share = share.to_str().ok_or(())?;
2725                 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2726             }
2727             _ => return Err(()),
2728         },
2729 
2730         _ => return Err(()),
2731     }
2732 
2733     let mut path_only_has_prefix = true;
2734     for component in components {
2735         if component == Component::RootDir {
2736             continue;
2737         }
2738         path_only_has_prefix = false;
2739         // FIXME: somehow work with non-unicode?
2740         let component = component.as_os_str().to_str().ok_or(())?;
2741         serialization.push('/');
2742         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2743     }
2744     // A windows drive letter must end with a slash.
2745     if serialization.len() > host_start
2746         && parser::is_windows_drive_letter(&serialization[host_start..])
2747         && path_only_has_prefix
2748     {
2749         serialization.push('/');
2750     }
2751     Ok((host_end, host_internal))
2752 }
2753 
2754 #[cfg(any(unix, target_os = "redox"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2755 fn file_url_segments_to_pathbuf(
2756     host: Option<&str>,
2757     segments: str::Split<'_, char>,
2758 ) -> Result<PathBuf, ()> {
2759     use std::ffi::OsStr;
2760     use std::os::unix::prelude::OsStrExt;
2761 
2762     if host.is_some() {
2763         return Err(());
2764     }
2765 
2766     let mut bytes = if cfg!(target_os = "redox") {
2767         b"file:".to_vec()
2768     } else {
2769         Vec::new()
2770     };
2771     for segment in segments {
2772         bytes.push(b'/');
2773         bytes.extend(percent_decode(segment.as_bytes()));
2774     }
2775     // A windows drive letter must end with a slash.
2776     if bytes.len() > 2
2777         && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2778         && matches!(bytes[bytes.len() - 1], b':' | b'|')
2779     {
2780         bytes.push(b'/');
2781     }
2782     let os_str = OsStr::from_bytes(&bytes);
2783     let path = PathBuf::from(os_str);
2784     debug_assert!(
2785         path.is_absolute(),
2786         "to_file_path() failed to produce an absolute Path"
2787     );
2788     Ok(path)
2789 }
2790 
2791 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2792 fn file_url_segments_to_pathbuf(
2793     host: Option<&str>,
2794     segments: str::Split<char>,
2795 ) -> Result<PathBuf, ()> {
2796     file_url_segments_to_pathbuf_windows(host, segments)
2797 }
2798 
2799 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2800 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2801 fn file_url_segments_to_pathbuf_windows(
2802     host: Option<&str>,
2803     mut segments: str::Split<'_, char>,
2804 ) -> Result<PathBuf, ()> {
2805     let mut string = if let Some(host) = host {
2806         r"\\".to_owned() + host
2807     } else {
2808         let first = segments.next().ok_or(())?;
2809 
2810         match first.len() {
2811             2 => {
2812                 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2813                     return Err(());
2814                 }
2815 
2816                 first.to_owned()
2817             }
2818 
2819             4 => {
2820                 if !first.starts_with(parser::ascii_alpha) {
2821                     return Err(());
2822                 }
2823                 let bytes = first.as_bytes();
2824                 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2825                     return Err(());
2826                 }
2827 
2828                 first[0..1].to_owned() + ":"
2829             }
2830 
2831             _ => return Err(()),
2832         }
2833     };
2834 
2835     for segment in segments {
2836         string.push('\\');
2837 
2838         // Currently non-unicode windows paths cannot be represented
2839         match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2840             Ok(s) => string.push_str(&s),
2841             Err(..) => return Err(()),
2842         }
2843     }
2844     let path = PathBuf::from(string);
2845     debug_assert!(
2846         path.is_absolute(),
2847         "to_file_path() failed to produce an absolute Path"
2848     );
2849     Ok(path)
2850 }
2851 
2852 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2853 #[derive(Debug)]
2854 pub struct UrlQuery<'a> {
2855     url: Option<&'a mut Url>,
2856     fragment: Option<String>,
2857 }
2858 
2859 // `as_mut_string` string here exposes the internal serialization of an `Url`,
2860 // which should not be exposed to users.
2861 // We achieve that by not giving users direct access to `UrlQuery`:
2862 // * Its fields are private
2863 //   (and so can not be constructed with struct literal syntax outside of this crate),
2864 // * It has no constructor
2865 // * It is only visible (on the type level) to users in the return type of
2866 //   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
2867 // * `Serializer` keeps its target in a private field
2868 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
2869 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String2870     fn as_mut_string(&mut self) -> &mut String {
2871         &mut self.url.as_mut().unwrap().serialization
2872     }
2873 
finish(mut self) -> &'a mut Url2874     fn finish(mut self) -> &'a mut Url {
2875         let url = self.url.take().unwrap();
2876         url.restore_already_parsed_fragment(self.fragment.take());
2877         url
2878     }
2879 
2880     type Finished = &'a mut Url;
2881 }
2882 
2883 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2884     fn drop(&mut self) {
2885         if let Some(url) = self.url.take() {
2886             url.restore_already_parsed_fragment(self.fragment.take())
2887         }
2888     }
2889 }
2890