1 use std::ffi::OsStr;
2 #[cfg(not(any(target_os = "windows", target_arch = "wasm32")))]
3 use std::os::unix::ffi::OsStrExt;
4 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
5 use INVALID_UTF8;
6 
7 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
8 pub trait OsStrExt3 {
from_bytes(b: &[u8]) -> &Self9     fn from_bytes(b: &[u8]) -> &Self;
as_bytes(&self) -> &[u8]10     fn as_bytes(&self) -> &[u8];
11 }
12 
13 #[doc(hidden)]
14 pub trait OsStrExt2 {
starts_with(&self, s: &[u8]) -> bool15     fn starts_with(&self, s: &[u8]) -> bool;
split_at_byte(&self, b: u8) -> (&OsStr, &OsStr)16     fn split_at_byte(&self, b: u8) -> (&OsStr, &OsStr);
split_at(&self, i: usize) -> (&OsStr, &OsStr)17     fn split_at(&self, i: usize) -> (&OsStr, &OsStr);
trim_left_matches(&self, b: u8) -> &OsStr18     fn trim_left_matches(&self, b: u8) -> &OsStr;
contains_byte(&self, b: u8) -> bool19     fn contains_byte(&self, b: u8) -> bool;
split(&self, b: u8) -> OsSplit20     fn split(&self, b: u8) -> OsSplit;
21 }
22 
23 // A starts-with implementation that does not panic when the OsStr contains
24 // invalid Unicode.
25 //
26 // A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can
27 // re-encode it as UTF-16, and ask whether `osstr` starts with the same series
28 // of u16 code units. If `prefix` is not valid UTF-8, then this comparison
29 // isn't meaningful, and we just return false.
30 #[cfg(target_os = "windows")]
windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool31 fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool {
32     use std::os::windows::ffi::OsStrExt;
33     let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) {
34         s
35     } else {
36         return false;
37     };
38     let mut osstr_units = osstr.encode_wide();
39     let mut prefix_units = prefix_str.encode_utf16();
40     loop {
41         match (osstr_units.next(), prefix_units.next()) {
42             // These code units match. Keep looping.
43             (Some(o), Some(p)) if o == p => continue,
44             // We've reached the end of the prefix. It's a match.
45             (_, None) => return true,
46             // Otherwise, it's not a match.
47             _ => return false,
48         }
49     }
50 }
51 
52 #[test]
53 #[cfg(target_os = "windows")]
test_windows_osstr_starts_with()54 fn test_windows_osstr_starts_with() {
55     use std::ffi::OsString;
56     use std::os::windows::ffi::OsStringExt;
57 
58     fn from_ascii(ascii: &[u8]) -> OsString {
59         let u16_vec: Vec<u16> = ascii.iter().map(|&c| c as u16).collect();
60         OsString::from_wide(&u16_vec)
61     }
62 
63     // Test all the basic cases.
64     assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc"));
65     assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef"));
66     assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def"));
67     assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd"));
68 
69     // Test the case where the candidate prefix is not valid UTF-8. Note that a
70     // standalone \xff byte is valid ASCII but not valid UTF-8. Thus although
71     // these strings look identical, they do not match.
72     assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff"));
73 
74     // Test the case where the OsString is not valid UTF-16. It should still be
75     // possible to match the valid characters at the front.
76     //
77     // UTF-16 surrogate characters are only valid in pairs. Including one on
78     // the end by itself makes this invalid UTF-16.
79     let surrogate_char: u16 = 0xDC00;
80     let mut invalid_unicode =
81         OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]);
82     assert!(
83         invalid_unicode.to_str().is_none(),
84         "This string is invalid Unicode, and conversion to &str should fail.",
85     );
86     assert!(windows_osstr_starts_with(&invalid_unicode, b"abc"));
87     assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd"));
88 }
89 
90 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
91 impl OsStrExt3 for OsStr {
from_bytes(b: &[u8]) -> &Self92     fn from_bytes(b: &[u8]) -> &Self {
93         use std::mem;
94         unsafe { mem::transmute(b) }
95     }
as_bytes(&self) -> &[u8]96     fn as_bytes(&self) -> &[u8] {
97         self.to_str().map(|s| s.as_bytes()).expect(INVALID_UTF8)
98     }
99 }
100 
101 impl OsStrExt2 for OsStr {
starts_with(&self, s: &[u8]) -> bool102     fn starts_with(&self, s: &[u8]) -> bool {
103         #[cfg(target_os = "windows")]
104         {
105             // On Windows, the as_bytes() method will panic if the OsStr
106             // contains invalid Unicode. To avoid this, we use a
107             // Windows-specific starts-with function that doesn't rely on
108             // as_bytes(). This is necessary for Windows command line
109             // applications to handle non-Unicode arguments successfully. This
110             // allows common cases like `clap.exe [invalid]` to succeed, though
111             // cases that require string splitting will still fail, like
112             // `clap.exe --arg=[invalid]`. Note that this entire module is
113             // replaced in Clap 3.x, so this workaround is specific to the 2.x
114             // branch.
115             return windows_osstr_starts_with(self, s);
116         }
117         self.as_bytes().starts_with(s)
118     }
119 
contains_byte(&self, byte: u8) -> bool120     fn contains_byte(&self, byte: u8) -> bool {
121         for b in self.as_bytes() {
122             if b == &byte {
123                 return true;
124             }
125         }
126         false
127     }
128 
split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr)129     fn split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr) {
130         for (i, b) in self.as_bytes().iter().enumerate() {
131             if b == &byte {
132                 return (
133                     OsStr::from_bytes(&self.as_bytes()[..i]),
134                     OsStr::from_bytes(&self.as_bytes()[i + 1..]),
135                 );
136             }
137         }
138         (
139             &*self,
140             OsStr::from_bytes(&self.as_bytes()[self.len()..self.len()]),
141         )
142     }
143 
trim_left_matches(&self, byte: u8) -> &OsStr144     fn trim_left_matches(&self, byte: u8) -> &OsStr {
145         let mut found = false;
146         for (i, b) in self.as_bytes().iter().enumerate() {
147             if b != &byte {
148                 return OsStr::from_bytes(&self.as_bytes()[i..]);
149             } else {
150                 found = true;
151             }
152         }
153         if found {
154             return OsStr::from_bytes(&self.as_bytes()[self.len()..]);
155         }
156         &*self
157     }
158 
split_at(&self, i: usize) -> (&OsStr, &OsStr)159     fn split_at(&self, i: usize) -> (&OsStr, &OsStr) {
160         (
161             OsStr::from_bytes(&self.as_bytes()[..i]),
162             OsStr::from_bytes(&self.as_bytes()[i..]),
163         )
164     }
165 
split(&self, b: u8) -> OsSplit166     fn split(&self, b: u8) -> OsSplit {
167         OsSplit {
168             sep: b,
169             val: self.as_bytes(),
170             pos: 0,
171         }
172     }
173 }
174 
175 #[doc(hidden)]
176 #[derive(Clone, Debug)]
177 pub struct OsSplit<'a> {
178     sep: u8,
179     val: &'a [u8],
180     pos: usize,
181 }
182 
183 impl<'a> Iterator for OsSplit<'a> {
184     type Item = &'a OsStr;
185 
next(&mut self) -> Option<&'a OsStr>186     fn next(&mut self) -> Option<&'a OsStr> {
187         debugln!("OsSplit::next: self={:?}", self);
188         if self.pos == self.val.len() {
189             return None;
190         }
191         let start = self.pos;
192         for b in &self.val[start..] {
193             self.pos += 1;
194             if *b == self.sep {
195                 return Some(OsStr::from_bytes(&self.val[start..self.pos - 1]));
196             }
197         }
198         Some(OsStr::from_bytes(&self.val[start..]))
199     }
200 }
201