1 #[cfg(feature = "std")]
2 use std::borrow::Cow;
3 #[cfg(feature = "std")]
4 use std::ffi::OsStr;
5 #[cfg(feature = "std")]
6 use std::path::Path;
7 
8 use core::{cmp, iter, ops, ptr, slice, str};
9 use memchr::{memchr, memrchr};
10 
11 use ascii;
12 use bstr::BStr;
13 use byteset;
14 #[cfg(feature = "std")]
15 use ext_vec::ByteVec;
16 use search::{PrefilterState, TwoWay};
17 #[cfg(feature = "unicode")]
18 use unicode::{
19     whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
20     SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
21     WordsWithBreaks,
22 };
23 use utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
24 
25 /// A short-hand constructor for building a `&[u8]`.
26 ///
27 /// This idiosyncratic constructor is useful for concisely building byte string
28 /// slices. Its primary utility is in conveniently writing byte string literals
29 /// in a uniform way. For example, consider this code that does not compile:
30 ///
31 /// ```ignore
32 /// let strs = vec![b"a", b"xy"];
33 /// ```
34 ///
35 /// The above code doesn't compile because the type of the byte string literal
36 /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
37 /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
38 /// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
39 /// where both `"a"` and `"xy"` have the same type of `&'static str`.)
40 ///
41 /// One way of getting the above code to compile is to convert byte strings to
42 /// slices. You might try this:
43 ///
44 /// ```ignore
45 /// let strs = vec![&b"a", &b"xy"];
46 /// ```
47 ///
48 /// But this just creates values with type `& &'static [u8; 1]` and
49 /// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
50 ///
51 /// ```
52 /// let strs = vec![&b"a"[..], &b"xy"[..]];
53 /// // or
54 /// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
55 /// ```
56 ///
57 /// But neither of these are particularly convenient to type, especially when
58 /// it's something as common as a string literal. Thus, this constructor
59 /// permits writing the following instead:
60 ///
61 /// ```
62 /// use bstr::B;
63 ///
64 /// let strs = vec![B("a"), B(b"xy")];
65 /// ```
66 ///
67 /// Notice that this also lets you mix and match both string literals and byte
68 /// string literals. This can be quite convenient!
69 #[allow(non_snake_case)]
70 #[inline]
B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8]71 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
72     bytes.as_ref()
73 }
74 
75 impl ByteSlice for [u8] {
76     #[inline]
as_bytes(&self) -> &[u8]77     fn as_bytes(&self) -> &[u8] {
78         self
79     }
80 
81     #[inline]
as_bytes_mut(&mut self) -> &mut [u8]82     fn as_bytes_mut(&mut self) -> &mut [u8] {
83         self
84     }
85 }
86 
87 /// Ensure that callers cannot implement `ByteSlice` by making an
88 /// umplementable trait its super trait.
89 pub trait Sealed {}
90 impl Sealed for [u8] {}
91 
92 /// A trait that extends `&[u8]` with string oriented methods.
93 pub trait ByteSlice: Sealed {
94     /// A method for accessing the raw bytes of this type. This is always a
95     /// no-op and callers shouldn't care about it. This only exists for making
96     /// the extension trait work.
97     #[doc(hidden)]
as_bytes(&self) -> &[u8]98     fn as_bytes(&self) -> &[u8];
99 
100     /// A method for accessing the raw bytes of this type, mutably. This is
101     /// always a no-op and callers shouldn't care about it. This only exists
102     /// for making the extension trait work.
103     #[doc(hidden)]
as_bytes_mut(&mut self) -> &mut [u8]104     fn as_bytes_mut(&mut self) -> &mut [u8];
105 
106     /// Return this byte slice as a `&BStr`.
107     ///
108     /// Use `&BStr` is useful because of its `fmt::Debug` representation
109     /// and various other trait implementations (such as `PartialEq` and
110     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
111     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
112     /// sequences are used.
113     ///
114     /// # Examples
115     ///
116     /// Basic usage:
117     ///
118     /// ```
119     /// use bstr::ByteSlice;
120     ///
121     /// println!("{:?}", b"foo\xFFbar".as_bstr());
122     /// ```
123     #[inline]
as_bstr(&self) -> &BStr124     fn as_bstr(&self) -> &BStr {
125         BStr::new(self.as_bytes())
126     }
127 
128     /// Return this byte slice as a `&mut BStr`.
129     ///
130     /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
131     /// and various other trait implementations (such as `PartialEq` and
132     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
133     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
134     /// sequences are used.
135     ///
136     /// # Examples
137     ///
138     /// Basic usage:
139     ///
140     /// ```
141     /// use bstr::ByteSlice;
142     ///
143     /// let mut bytes = *b"foo\xFFbar";
144     /// println!("{:?}", &mut bytes.as_bstr_mut());
145     /// ```
146     #[inline]
as_bstr_mut(&mut self) -> &mut BStr147     fn as_bstr_mut(&mut self) -> &mut BStr {
148         BStr::new_mut(self.as_bytes_mut())
149     }
150 
151     /// Create an immutable byte string from an OS string slice.
152     ///
153     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
154     /// this returns `None` if the given OS string is not valid UTF-8. (For
155     /// example, on Windows, file paths are allowed to be a sequence of
156     /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
157     /// valid UTF-8.)
158     ///
159     /// # Examples
160     ///
161     /// Basic usage:
162     ///
163     /// ```
164     /// use std::ffi::OsStr;
165     ///
166     /// use bstr::{B, ByteSlice};
167     ///
168     /// let os_str = OsStr::new("foo");
169     /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
170     /// assert_eq!(bs, B("foo"));
171     /// ```
172     #[cfg(feature = "std")]
173     #[inline]
from_os_str(os_str: &OsStr) -> Option<&[u8]>174     fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
175         #[cfg(unix)]
176         #[inline]
177         fn imp(os_str: &OsStr) -> Option<&[u8]> {
178             use std::os::unix::ffi::OsStrExt;
179 
180             Some(os_str.as_bytes())
181         }
182 
183         #[cfg(not(unix))]
184         #[inline]
185         fn imp(os_str: &OsStr) -> Option<&[u8]> {
186             os_str.to_str().map(|s| s.as_bytes())
187         }
188 
189         imp(os_str)
190     }
191 
192     /// Create an immutable byte string from a file path.
193     ///
194     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
195     /// this returns `None` if the given path is not valid UTF-8. (For example,
196     /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
197     /// integers. Not all such sequences can be transcoded to valid UTF-8.)
198     ///
199     /// # Examples
200     ///
201     /// Basic usage:
202     ///
203     /// ```
204     /// use std::path::Path;
205     ///
206     /// use bstr::{B, ByteSlice};
207     ///
208     /// let path = Path::new("foo");
209     /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
210     /// assert_eq!(bs, B("foo"));
211     /// ```
212     #[cfg(feature = "std")]
213     #[inline]
from_path(path: &Path) -> Option<&[u8]>214     fn from_path(path: &Path) -> Option<&[u8]> {
215         Self::from_os_str(path.as_os_str())
216     }
217 
218     /// Safely convert this byte string into a `&str` if it's valid UTF-8.
219     ///
220     /// If this byte string is not valid UTF-8, then an error is returned. The
221     /// error returned indicates the first invalid byte found and the length
222     /// of the error.
223     ///
224     /// In cases where a lossy conversion to `&str` is acceptable, then use one
225     /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
226     /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
227     /// methods.
228     ///
229     /// # Examples
230     ///
231     /// Basic usage:
232     ///
233     /// ```
234     /// use bstr::{B, ByteSlice, ByteVec};
235     ///
236     /// # fn example() -> Result<(), bstr::Utf8Error> {
237     /// let s = B("☃βツ").to_str()?;
238     /// assert_eq!("☃βツ", s);
239     ///
240     /// let mut bstring = <Vec<u8>>::from("☃βツ");
241     /// bstring.push(b'\xFF');
242     /// let err = bstring.to_str().unwrap_err();
243     /// assert_eq!(8, err.valid_up_to());
244     /// # Ok(()) }; example().unwrap()
245     /// ```
246     #[inline]
to_str(&self) -> Result<&str, Utf8Error>247     fn to_str(&self) -> Result<&str, Utf8Error> {
248         utf8::validate(self.as_bytes()).map(|_| {
249             // SAFETY: This is safe because of the guarantees provided by
250             // utf8::validate.
251             unsafe { str::from_utf8_unchecked(self.as_bytes()) }
252         })
253     }
254 
255     /// Unsafely convert this byte string into a `&str`, without checking for
256     /// valid UTF-8.
257     ///
258     /// # Safety
259     ///
260     /// Callers *must* ensure that this byte string is valid UTF-8 before
261     /// calling this method. Converting a byte string into a `&str` that is
262     /// not valid UTF-8 is considered undefined behavior.
263     ///
264     /// This routine is useful in performance sensitive contexts where the
265     /// UTF-8 validity of the byte string is already known and it is
266     /// undesirable to pay the cost of an additional UTF-8 validation check
267     /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
268     ///
269     /// # Examples
270     ///
271     /// Basic usage:
272     ///
273     /// ```
274     /// use bstr::{B, ByteSlice};
275     ///
276     /// // SAFETY: This is safe because string literals are guaranteed to be
277     /// // valid UTF-8 by the Rust compiler.
278     /// let s = unsafe { B("☃βツ").to_str_unchecked() };
279     /// assert_eq!("☃βツ", s);
280     /// ```
281     #[inline]
to_str_unchecked(&self) -> &str282     unsafe fn to_str_unchecked(&self) -> &str {
283         str::from_utf8_unchecked(self.as_bytes())
284     }
285 
286     /// Convert this byte string to a valid UTF-8 string by replacing invalid
287     /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
288     ///
289     /// If the byte string is already valid UTF-8, then no copying or
290     /// allocation is performed and a borrrowed string slice is returned. If
291     /// the byte string is not valid UTF-8, then an owned string buffer is
292     /// returned with invalid bytes replaced by the replacement codepoint.
293     ///
294     /// This method uses the "substitution of maximal subparts" (Unicode
295     /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
296     /// codepoint. Specifically, a replacement codepoint is inserted whenever a
297     /// byte is found that cannot possibly lead to a valid code unit sequence.
298     /// If there were previous bytes that represented a prefix of a well-formed
299     /// code unit sequence, then all of those bytes are substituted with a
300     /// single replacement codepoint. The "substitution of maximal subparts"
301     /// strategy is the same strategy used by
302     /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
303     /// For a more precise description of the maximal subpart strategy, see
304     /// the Unicode Standard, Chapter 3, Section 9. See also
305     /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
306     ///
307     /// N.B. Rust's standard library also appears to use the same strategy,
308     /// but it does not appear to be an API guarantee.
309     ///
310     /// # Examples
311     ///
312     /// Basic usage:
313     ///
314     /// ```
315     /// use std::borrow::Cow;
316     ///
317     /// use bstr::ByteSlice;
318     ///
319     /// let mut bstring = <Vec<u8>>::from("☃βツ");
320     /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
321     ///
322     /// // Add a byte that makes the sequence invalid.
323     /// bstring.push(b'\xFF');
324     /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
325     /// ```
326     ///
327     /// This demonstrates the "maximal subpart" substitution logic.
328     ///
329     /// ```
330     /// use bstr::{B, ByteSlice};
331     ///
332     /// // \x61 is the ASCII codepoint for 'a'.
333     /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
334     /// // \xE1\x80 is a valid 2-byte code unit prefix.
335     /// // \xC2 is a valid 1-byte code unit prefix.
336     /// // \x62 is the ASCII codepoint for 'b'.
337     /// //
338     /// // In sum, each of the prefixes is replaced by a single replacement
339     /// // codepoint since none of the prefixes are properly completed. This
340     /// // is in contrast to other strategies that might insert a replacement
341     /// // codepoint for every single byte.
342     /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
343     /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
344     /// ```
345     #[cfg(feature = "std")]
346     #[inline]
to_str_lossy(&self) -> Cow<str>347     fn to_str_lossy(&self) -> Cow<str> {
348         match utf8::validate(self.as_bytes()) {
349             Ok(()) => {
350                 // SAFETY: This is safe because of the guarantees provided by
351                 // utf8::validate.
352                 unsafe {
353                     Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
354                 }
355             }
356             Err(err) => {
357                 let mut lossy = String::with_capacity(self.as_bytes().len());
358                 let (valid, after) =
359                     self.as_bytes().split_at(err.valid_up_to());
360                 // SAFETY: This is safe because utf8::validate guarantees
361                 // that all of `valid` is valid UTF-8.
362                 lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
363                 lossy.push_str("\u{FFFD}");
364                 if let Some(len) = err.error_len() {
365                     after[len..].to_str_lossy_into(&mut lossy);
366                 }
367                 Cow::Owned(lossy)
368             }
369         }
370     }
371 
372     /// Copy the contents of this byte string into the given owned string
373     /// buffer, while replacing invalid UTF-8 code unit sequences with the
374     /// Unicode replacement codepoint (`U+FFFD`).
375     ///
376     /// This method uses the same "substitution of maximal subparts" strategy
377     /// for inserting the replacement codepoint as the
378     /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
379     ///
380     /// This routine is useful for amortizing allocation. However, unlike
381     /// `to_str_lossy`, this routine will _always_ copy the contents of this
382     /// byte string into the destination buffer, even if this byte string is
383     /// valid UTF-8.
384     ///
385     /// # Examples
386     ///
387     /// Basic usage:
388     ///
389     /// ```
390     /// use std::borrow::Cow;
391     ///
392     /// use bstr::ByteSlice;
393     ///
394     /// let mut bstring = <Vec<u8>>::from("☃βツ");
395     /// // Add a byte that makes the sequence invalid.
396     /// bstring.push(b'\xFF');
397     ///
398     /// let mut dest = String::new();
399     /// bstring.to_str_lossy_into(&mut dest);
400     /// assert_eq!("☃βツ\u{FFFD}", dest);
401     /// ```
402     #[cfg(feature = "std")]
403     #[inline]
to_str_lossy_into(&self, dest: &mut String)404     fn to_str_lossy_into(&self, dest: &mut String) {
405         let mut bytes = self.as_bytes();
406         dest.reserve(bytes.len());
407         loop {
408             match utf8::validate(bytes) {
409                 Ok(()) => {
410                     // SAFETY: This is safe because utf8::validate guarantees
411                     // that all of `bytes` is valid UTF-8.
412                     dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
413                     break;
414                 }
415                 Err(err) => {
416                     let (valid, after) = bytes.split_at(err.valid_up_to());
417                     // SAFETY: This is safe because utf8::validate guarantees
418                     // that all of `valid` is valid UTF-8.
419                     dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
420                     dest.push_str("\u{FFFD}");
421                     match err.error_len() {
422                         None => break,
423                         Some(len) => bytes = &after[len..],
424                     }
425                 }
426             }
427         }
428     }
429 
430     /// Create an OS string slice from this byte string.
431     ///
432     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
433     /// this returns a UTF-8 decoding error if this byte string is not valid
434     /// UTF-8. (For example, on Windows, file paths are allowed to be a
435     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
436     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
437     /// 16-bit integers.)
438     ///
439     /// # Examples
440     ///
441     /// Basic usage:
442     ///
443     /// ```
444     /// use bstr::{B, ByteSlice};
445     ///
446     /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
447     /// assert_eq!(os_str, "foo");
448     /// ```
449     #[cfg(feature = "std")]
450     #[inline]
to_os_str(&self) -> Result<&OsStr, Utf8Error>451     fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
452         #[cfg(unix)]
453         #[inline]
454         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
455             use std::os::unix::ffi::OsStrExt;
456 
457             Ok(OsStr::from_bytes(bytes))
458         }
459 
460         #[cfg(not(unix))]
461         #[inline]
462         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
463             bytes.to_str().map(OsStr::new)
464         }
465 
466         imp(self.as_bytes())
467     }
468 
469     /// Lossily create an OS string slice from this byte string.
470     ///
471     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
472     /// this will perform a UTF-8 check and lossily convert this byte string
473     /// into valid UTF-8 using the Unicode replacement codepoint.
474     ///
475     /// Note that this can prevent the correct roundtripping of file paths on
476     /// non-Unix systems such as Windows, where file paths are an arbitrary
477     /// sequence of 16-bit integers.
478     ///
479     /// # Examples
480     ///
481     /// Basic usage:
482     ///
483     /// ```
484     /// use bstr::ByteSlice;
485     ///
486     /// let os_str = b"foo\xFFbar".to_os_str_lossy();
487     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
488     /// ```
489     #[cfg(feature = "std")]
490     #[inline]
to_os_str_lossy(&self) -> Cow<OsStr>491     fn to_os_str_lossy(&self) -> Cow<OsStr> {
492         #[cfg(unix)]
493         #[inline]
494         fn imp(bytes: &[u8]) -> Cow<OsStr> {
495             use std::os::unix::ffi::OsStrExt;
496 
497             Cow::Borrowed(OsStr::from_bytes(bytes))
498         }
499 
500         #[cfg(not(unix))]
501         #[inline]
502         fn imp(bytes: &[u8]) -> Cow<OsStr> {
503             use std::ffi::OsString;
504 
505             match bytes.to_str_lossy() {
506                 Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
507                 Cow::Owned(x) => Cow::Owned(OsString::from(x)),
508             }
509         }
510 
511         imp(self.as_bytes())
512     }
513 
514     /// Create a path slice from this byte string.
515     ///
516     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
517     /// this returns a UTF-8 decoding error if this byte string is not valid
518     /// UTF-8. (For example, on Windows, file paths are allowed to be a
519     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
520     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
521     /// 16-bit integers.)
522     ///
523     /// # Examples
524     ///
525     /// Basic usage:
526     ///
527     /// ```
528     /// use bstr::ByteSlice;
529     ///
530     /// let path = b"foo".to_path().expect("should be valid UTF-8");
531     /// assert_eq!(path.as_os_str(), "foo");
532     /// ```
533     #[cfg(feature = "std")]
534     #[inline]
to_path(&self) -> Result<&Path, Utf8Error>535     fn to_path(&self) -> Result<&Path, Utf8Error> {
536         self.to_os_str().map(Path::new)
537     }
538 
539     /// Lossily create a path slice from this byte string.
540     ///
541     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
542     /// this will perform a UTF-8 check and lossily convert this byte string
543     /// into valid UTF-8 using the Unicode replacement codepoint.
544     ///
545     /// Note that this can prevent the correct roundtripping of file paths on
546     /// non-Unix systems such as Windows, where file paths are an arbitrary
547     /// sequence of 16-bit integers.
548     ///
549     /// # Examples
550     ///
551     /// Basic usage:
552     ///
553     /// ```
554     /// use bstr::ByteSlice;
555     ///
556     /// let bs = b"foo\xFFbar";
557     /// let path = bs.to_path_lossy();
558     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
559     /// ```
560     #[cfg(feature = "std")]
561     #[inline]
to_path_lossy(&self) -> Cow<Path>562     fn to_path_lossy(&self) -> Cow<Path> {
563         use std::path::PathBuf;
564 
565         match self.to_os_str_lossy() {
566             Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
567             Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
568         }
569     }
570 
571     /// Create a new byte string by repeating this byte string `n` times.
572     ///
573     /// # Panics
574     ///
575     /// This function panics if the capacity of the new byte string would
576     /// overflow.
577     ///
578     /// # Examples
579     ///
580     /// Basic usage:
581     ///
582     /// ```
583     /// use bstr::{B, ByteSlice};
584     ///
585     /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
586     /// assert_eq!(b"foo".repeatn(0), B(""));
587     /// ```
588     #[cfg(feature = "std")]
589     #[inline]
repeatn(&self, n: usize) -> Vec<u8>590     fn repeatn(&self, n: usize) -> Vec<u8> {
591         let bs = self.as_bytes();
592         let mut dst = vec![0; bs.len() * n];
593         for i in 0..n {
594             dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
595         }
596         dst
597     }
598 
599     /// Returns true if and only if this byte string contains the given needle.
600     ///
601     /// # Examples
602     ///
603     /// Basic usage:
604     ///
605     /// ```
606     /// use bstr::ByteSlice;
607     ///
608     /// assert!(b"foo bar".contains_str("foo"));
609     /// assert!(b"foo bar".contains_str("bar"));
610     /// assert!(!b"foo".contains_str("foobar"));
611     /// ```
612     #[inline]
contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool613     fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
614         self.find(needle).is_some()
615     }
616 
617     /// Returns true if and only if this byte string has the given prefix.
618     ///
619     /// # Examples
620     ///
621     /// Basic usage:
622     ///
623     /// ```
624     /// use bstr::ByteSlice;
625     ///
626     /// assert!(b"foo bar".starts_with_str("foo"));
627     /// assert!(!b"foo bar".starts_with_str("bar"));
628     /// assert!(!b"foo".starts_with_str("foobar"));
629     /// ```
630     #[inline]
starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool631     fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
632         self.as_bytes().starts_with(prefix.as_ref())
633     }
634 
635     /// Returns true if and only if this byte string has the given suffix.
636     ///
637     /// # Examples
638     ///
639     /// Basic usage:
640     ///
641     /// ```
642     /// use bstr::ByteSlice;
643     ///
644     /// assert!(b"foo bar".ends_with_str("bar"));
645     /// assert!(!b"foo bar".ends_with_str("foo"));
646     /// assert!(!b"bar".ends_with_str("foobar"));
647     /// ```
648     #[inline]
ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool649     fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
650         self.as_bytes().ends_with(suffix.as_ref())
651     }
652 
653     /// Returns the index of the first occurrence of the given needle.
654     ///
655     /// The needle may be any type that can be cheaply converted into a
656     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
657     ///
658     /// Note that if you're are searching for the same needle in many
659     /// different small haystacks, it may be faster to initialize a
660     /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
661     ///
662     /// # Complexity
663     ///
664     /// This routine is guaranteed to have worst case linear time complexity
665     /// with respect to both the needle and the haystack. That is, this runs
666     /// in `O(needle.len() + haystack.len())` time.
667     ///
668     /// This routine is also guaranteed to have worst case constant space
669     /// complexity.
670     ///
671     /// # Examples
672     ///
673     /// Basic usage:
674     ///
675     /// ```
676     /// use bstr::ByteSlice;
677     ///
678     /// let s = b"foo bar baz";
679     /// assert_eq!(Some(0), s.find("foo"));
680     /// assert_eq!(Some(4), s.find("bar"));
681     /// assert_eq!(None, s.find("quux"));
682     /// ```
683     #[inline]
find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>684     fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
685         Finder::new(needle.as_ref()).find(self.as_bytes())
686     }
687 
688     /// Returns the index of the last occurrence of the given needle.
689     ///
690     /// The needle may be any type that can be cheaply converted into a
691     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
692     ///
693     /// Note that if you're are searching for the same needle in many
694     /// different small haystacks, it may be faster to initialize a
695     /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
696     /// each search.
697     ///
698     /// # Complexity
699     ///
700     /// This routine is guaranteed to have worst case linear time complexity
701     /// with respect to both the needle and the haystack. That is, this runs
702     /// in `O(needle.len() + haystack.len())` time.
703     ///
704     /// This routine is also guaranteed to have worst case constant space
705     /// complexity.
706     ///
707     /// # Examples
708     ///
709     /// Basic usage:
710     ///
711     /// ```
712     /// use bstr::ByteSlice;
713     ///
714     /// let s = b"foo bar baz";
715     /// assert_eq!(Some(0), s.rfind("foo"));
716     /// assert_eq!(Some(4), s.rfind("bar"));
717     /// assert_eq!(Some(8), s.rfind("ba"));
718     /// assert_eq!(None, s.rfind("quux"));
719     /// ```
720     #[inline]
rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>721     fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
722         FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
723     }
724 
725     /// Returns an iterator of the non-overlapping occurrences of the given
726     /// needle. The iterator yields byte offset positions indicating the start
727     /// of each match.
728     ///
729     /// # Complexity
730     ///
731     /// This routine is guaranteed to have worst case linear time complexity
732     /// with respect to both the needle and the haystack. That is, this runs
733     /// in `O(needle.len() + haystack.len())` time.
734     ///
735     /// This routine is also guaranteed to have worst case constant space
736     /// complexity.
737     ///
738     /// # Examples
739     ///
740     /// Basic usage:
741     ///
742     /// ```
743     /// use bstr::ByteSlice;
744     ///
745     /// let s = b"foo bar foo foo quux foo";
746     /// let matches: Vec<usize> = s.find_iter("foo").collect();
747     /// assert_eq!(matches, vec![0, 8, 12, 21]);
748     /// ```
749     ///
750     /// An empty string matches at every position, including the position
751     /// immediately following the last byte:
752     ///
753     /// ```
754     /// use bstr::ByteSlice;
755     ///
756     /// let matches: Vec<usize> = b"foo".find_iter("").collect();
757     /// assert_eq!(matches, vec![0, 1, 2, 3]);
758     ///
759     /// let matches: Vec<usize> = b"".find_iter("").collect();
760     /// assert_eq!(matches, vec![0]);
761     /// ```
762     #[inline]
find_iter<'a, B: ?Sized + AsRef<[u8]>>( &'a self, needle: &'a B, ) -> Find<'a>763     fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
764         &'a self,
765         needle: &'a B,
766     ) -> Find<'a> {
767         Find::new(self.as_bytes(), needle.as_ref())
768     }
769 
770     /// Returns an iterator of the non-overlapping occurrences of the given
771     /// needle in reverse. The iterator yields byte offset positions indicating
772     /// the start of each match.
773     ///
774     /// # Complexity
775     ///
776     /// This routine is guaranteed to have worst case linear time complexity
777     /// with respect to both the needle and the haystack. That is, this runs
778     /// in `O(needle.len() + haystack.len())` time.
779     ///
780     /// This routine is also guaranteed to have worst case constant space
781     /// complexity.
782     ///
783     /// # Examples
784     ///
785     /// Basic usage:
786     ///
787     /// ```
788     /// use bstr::ByteSlice;
789     ///
790     /// let s = b"foo bar foo foo quux foo";
791     /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
792     /// assert_eq!(matches, vec![21, 12, 8, 0]);
793     /// ```
794     ///
795     /// An empty string matches at every position, including the position
796     /// immediately following the last byte:
797     ///
798     /// ```
799     /// use bstr::ByteSlice;
800     ///
801     /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
802     /// assert_eq!(matches, vec![3, 2, 1, 0]);
803     ///
804     /// let matches: Vec<usize> = b"".rfind_iter("").collect();
805     /// assert_eq!(matches, vec![0]);
806     /// ```
807     #[inline]
rfind_iter<'a, B: ?Sized + AsRef<[u8]>>( &'a self, needle: &'a B, ) -> FindReverse<'a>808     fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
809         &'a self,
810         needle: &'a B,
811     ) -> FindReverse<'a> {
812         FindReverse::new(self.as_bytes(), needle.as_ref())
813     }
814 
815     /// Returns the index of the first occurrence of the given byte. If the
816     /// byte does not occur in this byte string, then `None` is returned.
817     ///
818     /// # Examples
819     ///
820     /// Basic usage:
821     ///
822     /// ```
823     /// use bstr::ByteSlice;
824     ///
825     /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
826     /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
827     /// ```
828     #[inline]
find_byte(&self, byte: u8) -> Option<usize>829     fn find_byte(&self, byte: u8) -> Option<usize> {
830         memchr(byte, self.as_bytes())
831     }
832 
833     /// Returns the index of the last occurrence of the given byte. If the
834     /// byte does not occur in this byte string, then `None` is returned.
835     ///
836     /// # Examples
837     ///
838     /// Basic usage:
839     ///
840     /// ```
841     /// use bstr::ByteSlice;
842     ///
843     /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
844     /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
845     /// ```
846     #[inline]
rfind_byte(&self, byte: u8) -> Option<usize>847     fn rfind_byte(&self, byte: u8) -> Option<usize> {
848         memrchr(byte, self.as_bytes())
849     }
850 
851     /// Returns the index of the first occurrence of the given codepoint.
852     /// If the codepoint does not occur in this byte string, then `None` is
853     /// returned.
854     ///
855     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
856     /// then only explicit occurrences of that encoding will be found. Invalid
857     /// UTF-8 sequences will not be matched.
858     ///
859     /// # Examples
860     ///
861     /// Basic usage:
862     ///
863     /// ```
864     /// use bstr::{B, ByteSlice};
865     ///
866     /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
867     /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
868     /// assert_eq!(None, b"foo bar baz".find_char('y'));
869     /// ```
870     #[inline]
find_char(&self, ch: char) -> Option<usize>871     fn find_char(&self, ch: char) -> Option<usize> {
872         self.find(ch.encode_utf8(&mut [0; 4]))
873     }
874 
875     /// Returns the index of the last occurrence of the given codepoint.
876     /// If the codepoint does not occur in this byte string, then `None` is
877     /// returned.
878     ///
879     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
880     /// then only explicit occurrences of that encoding will be found. Invalid
881     /// UTF-8 sequences will not be matched.
882     ///
883     /// # Examples
884     ///
885     /// Basic usage:
886     ///
887     /// ```
888     /// use bstr::{B, ByteSlice};
889     ///
890     /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
891     /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
892     /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
893     /// ```
894     #[inline]
rfind_char(&self, ch: char) -> Option<usize>895     fn rfind_char(&self, ch: char) -> Option<usize> {
896         self.rfind(ch.encode_utf8(&mut [0; 4]))
897     }
898 
899     /// Returns the index of the first occurrence of any of the bytes in the
900     /// provided set.
901     ///
902     /// The `byteset` may be any type that can be cheaply converted into a
903     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
904     /// note that passing a `&str` which contains multibyte characters may not
905     /// behave as you expect: each byte in the `&str` is treated as an
906     /// individual member of the byte set.
907     ///
908     /// Note that order is irrelevant for the `byteset` parameter, and
909     /// duplicate bytes present in its body are ignored.
910     ///
911     /// # Complexity
912     ///
913     /// This routine is guaranteed to have worst case linear time complexity
914     /// with respect to both the set of bytes and the haystack. That is, this
915     /// runs in `O(byteset.len() + haystack.len())` time.
916     ///
917     /// This routine is also guaranteed to have worst case constant space
918     /// complexity.
919     ///
920     /// # Examples
921     ///
922     /// Basic usage:
923     ///
924     /// ```
925     /// use bstr::ByteSlice;
926     ///
927     /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
928     /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
929     /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
930     /// ```
931     #[inline]
find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>932     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
933         byteset::find(self.as_bytes(), byteset.as_ref())
934     }
935 
936     /// Returns the index of the first occurrence of a byte that is not a member
937     /// of the provided set.
938     ///
939     /// The `byteset` may be any type that can be cheaply converted into a
940     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
941     /// note that passing a `&str` which contains multibyte characters may not
942     /// behave as you expect: each byte in the `&str` is treated as an
943     /// individual member of the byte set.
944     ///
945     /// Note that order is irrelevant for the `byteset` parameter, and
946     /// duplicate bytes present in its body are ignored.
947     ///
948     /// # Complexity
949     ///
950     /// This routine is guaranteed to have worst case linear time complexity
951     /// with respect to both the set of bytes and the haystack. That is, this
952     /// runs in `O(byteset.len() + haystack.len())` time.
953     ///
954     /// This routine is also guaranteed to have worst case constant space
955     /// complexity.
956     ///
957     /// # Examples
958     ///
959     /// Basic usage:
960     ///
961     /// ```
962     /// use bstr::ByteSlice;
963     ///
964     /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
965     /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
966     /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
967     /// ```
968     #[inline]
find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>969     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
970         byteset::find_not(self.as_bytes(), byteset.as_ref())
971     }
972 
973     /// Returns the index of the last occurrence of any of the bytes in the
974     /// provided set.
975     ///
976     /// The `byteset` may be any type that can be cheaply converted into a
977     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
978     /// note that passing a `&str` which contains multibyte characters may not
979     /// behave as you expect: each byte in the `&str` is treated as an
980     /// individual member of the byte set.
981     ///
982     /// Note that order is irrelevant for the `byteset` parameter, and duplicate
983     /// bytes present in its body are ignored.
984     ///
985     /// # Complexity
986     ///
987     /// This routine is guaranteed to have worst case linear time complexity
988     /// with respect to both the set of bytes and the haystack. That is, this
989     /// runs in `O(byteset.len() + haystack.len())` time.
990     ///
991     /// This routine is also guaranteed to have worst case constant space
992     /// complexity.
993     ///
994     /// # Examples
995     ///
996     /// Basic usage:
997     ///
998     /// ```
999     /// use bstr::ByteSlice;
1000     ///
1001     /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1002     /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1003     /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1004     /// ```
1005     #[inline]
rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>1006     fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1007         byteset::rfind(self.as_bytes(), byteset.as_ref())
1008     }
1009 
1010     /// Returns the index of the last occurrence of a byte that is not a member
1011     /// of the provided set.
1012     ///
1013     /// The `byteset` may be any type that can be cheaply converted into a
1014     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1015     /// note that passing a `&str` which contains multibyte characters may not
1016     /// behave as you expect: each byte in the `&str` is treated as an
1017     /// individual member of the byte set.
1018     ///
1019     /// Note that order is irrelevant for the `byteset` parameter, and
1020     /// duplicate bytes present in its body are ignored.
1021     ///
1022     /// # Complexity
1023     ///
1024     /// This routine is guaranteed to have worst case linear time complexity
1025     /// with respect to both the set of bytes and the haystack. That is, this
1026     /// runs in `O(byteset.len() + haystack.len())` time.
1027     ///
1028     /// This routine is also guaranteed to have worst case constant space
1029     /// complexity.
1030     ///
1031     /// # Examples
1032     ///
1033     /// Basic usage:
1034     ///
1035     /// ```
1036     /// use bstr::ByteSlice;
1037     ///
1038     /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1039     /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1040     /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1041     /// ```
1042     #[inline]
rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>1043     fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1044         byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1045     }
1046 
1047     /// Returns an iterator over the fields in a byte string, separated by
1048     /// contiguous whitespace.
1049     ///
1050     /// # Example
1051     ///
1052     /// Basic usage:
1053     ///
1054     /// ```
1055     /// use bstr::{B, ByteSlice};
1056     ///
1057     /// let s = B("  foo\tbar\t\u{2003}\nquux   \n");
1058     /// let fields: Vec<&[u8]> = s.fields().collect();
1059     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1060     /// ```
1061     ///
1062     /// A byte string consisting of just whitespace yields no elements:
1063     ///
1064     /// ```
1065     /// use bstr::{B, ByteSlice};
1066     ///
1067     /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
1068     /// ```
1069     #[inline]
fields(&self) -> Fields1070     fn fields(&self) -> Fields {
1071         Fields::new(self.as_bytes())
1072     }
1073 
1074     /// Returns an iterator over the fields in a byte string, separated by
1075     /// contiguous codepoints satisfying the given predicate.
1076     ///
1077     /// If this byte string is not valid UTF-8, then the given closure will
1078     /// be called with a Unicode replacement codepoint when invalid UTF-8
1079     /// bytes are seen.
1080     ///
1081     /// # Example
1082     ///
1083     /// Basic usage:
1084     ///
1085     /// ```
1086     /// use bstr::{B, ByteSlice};
1087     ///
1088     /// let s = b"123foo999999bar1quux123456";
1089     /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1090     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1091     /// ```
1092     ///
1093     /// A byte string consisting of all codepoints satisfying the predicate
1094     /// yields no elements:
1095     ///
1096     /// ```
1097     /// use bstr::ByteSlice;
1098     ///
1099     /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1100     /// ```
1101     #[inline]
fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F>1102     fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F> {
1103         FieldsWith::new(self.as_bytes(), f)
1104     }
1105 
1106     /// Returns an iterator over substrings of this byte string, separated
1107     /// by the given byte string. Each element yielded is guaranteed not to
1108     /// include the splitter substring.
1109     ///
1110     /// The splitter may be any type that can be cheaply converted into a
1111     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1112     ///
1113     /// # Examples
1114     ///
1115     /// Basic usage:
1116     ///
1117     /// ```
1118     /// use bstr::{B, ByteSlice};
1119     ///
1120     /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1121     /// assert_eq!(x, vec![
1122     ///     B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1123     /// ]);
1124     ///
1125     /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1126     /// assert_eq!(x, vec![b""]);
1127     ///
1128     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1129     /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1130     ///
1131     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1132     /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1133     /// ```
1134     ///
1135     /// If a string contains multiple contiguous separators, you will end up
1136     /// with empty strings yielded by the iterator:
1137     ///
1138     /// ```
1139     /// use bstr::{B, ByteSlice};
1140     ///
1141     /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1142     /// assert_eq!(x, vec![
1143     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1144     /// ]);
1145     ///
1146     /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1147     /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1148     /// ```
1149     ///
1150     /// Separators at the start or end of a string are neighbored by empty
1151     /// strings.
1152     ///
1153     /// ```
1154     /// use bstr::{B, ByteSlice};
1155     ///
1156     /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1157     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1158     /// ```
1159     ///
1160     /// When the empty string is used as a separator, it splits every **byte**
1161     /// in the byte string, along with the beginning and end of the byte
1162     /// string.
1163     ///
1164     /// ```
1165     /// use bstr::{B, ByteSlice};
1166     ///
1167     /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1168     /// assert_eq!(x, vec![
1169     ///     B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1170     /// ]);
1171     ///
1172     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1173     /// // may not be valid UTF-8!
1174     /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1175     /// assert_eq!(x, vec![
1176     ///     B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1177     /// ]);
1178     /// ```
1179     ///
1180     /// Contiguous separators, especially whitespace, can lead to possibly
1181     /// surprising behavior. For example, this code is correct:
1182     ///
1183     /// ```
1184     /// use bstr::{B, ByteSlice};
1185     ///
1186     /// let x: Vec<&[u8]> = b"    a  b c".split_str(" ").collect();
1187     /// assert_eq!(x, vec![
1188     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1189     /// ]);
1190     /// ```
1191     ///
1192     /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1193     /// [`fields`](#method.fields) instead.
1194     #[inline]
split_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &'a B, ) -> Split<'a>1195     fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
1196         &'a self,
1197         splitter: &'a B,
1198     ) -> Split<'a> {
1199         Split::new(self.as_bytes(), splitter.as_ref())
1200     }
1201 
1202     /// Returns an iterator over substrings of this byte string, separated by
1203     /// the given byte string, in reverse. Each element yielded is guaranteed
1204     /// not to include the splitter substring.
1205     ///
1206     /// The splitter may be any type that can be cheaply converted into a
1207     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1208     ///
1209     /// # Examples
1210     ///
1211     /// Basic usage:
1212     ///
1213     /// ```
1214     /// use bstr::{B, ByteSlice};
1215     ///
1216     /// let x: Vec<&[u8]> =
1217     ///     b"Mary had a little lamb".rsplit_str(" ").collect();
1218     /// assert_eq!(x, vec![
1219     ///     B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1220     /// ]);
1221     ///
1222     /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1223     /// assert_eq!(x, vec![b""]);
1224     ///
1225     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1226     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1227     ///
1228     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1229     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1230     /// ```
1231     ///
1232     /// If a string contains multiple contiguous separators, you will end up
1233     /// with empty strings yielded by the iterator:
1234     ///
1235     /// ```
1236     /// use bstr::{B, ByteSlice};
1237     ///
1238     /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1239     /// assert_eq!(x, vec![
1240     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1241     /// ]);
1242     ///
1243     /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1244     /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1245     /// ```
1246     ///
1247     /// Separators at the start or end of a string are neighbored by empty
1248     /// strings.
1249     ///
1250     /// ```
1251     /// use bstr::{B, ByteSlice};
1252     ///
1253     /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1254     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1255     /// ```
1256     ///
1257     /// When the empty string is used as a separator, it splits every **byte**
1258     /// in the byte string, along with the beginning and end of the byte
1259     /// string.
1260     ///
1261     /// ```
1262     /// use bstr::{B, ByteSlice};
1263     ///
1264     /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1265     /// assert_eq!(x, vec![
1266     ///     B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1267     /// ]);
1268     ///
1269     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1270     /// // may not be valid UTF-8!
1271     /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1272     /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1273     /// ```
1274     ///
1275     /// Contiguous separators, especially whitespace, can lead to possibly
1276     /// surprising behavior. For example, this code is correct:
1277     ///
1278     /// ```
1279     /// use bstr::{B, ByteSlice};
1280     ///
1281     /// let x: Vec<&[u8]> = b"    a  b c".rsplit_str(" ").collect();
1282     /// assert_eq!(x, vec![
1283     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1284     /// ]);
1285     /// ```
1286     ///
1287     /// It does *not* give you `["a", "b", "c"]`.
1288     #[inline]
rsplit_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &'a B, ) -> SplitReverse<'a>1289     fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
1290         &'a self,
1291         splitter: &'a B,
1292     ) -> SplitReverse<'a> {
1293         SplitReverse::new(self.as_bytes(), splitter.as_ref())
1294     }
1295 
1296     /// Returns an iterator of at most `limit` substrings of this byte string,
1297     /// separated by the given byte string. If `limit` substrings are yielded,
1298     /// then the last substring will contain the remainder of this byte string.
1299     ///
1300     /// The needle may be any type that can be cheaply converted into a
1301     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1302     ///
1303     /// # Examples
1304     ///
1305     /// Basic usage:
1306     ///
1307     /// ```
1308     /// use bstr::{B, ByteSlice};
1309     ///
1310     /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1311     /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1312     ///
1313     /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1314     /// assert_eq!(x, vec![b""]);
1315     ///
1316     /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1317     /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1318     ///
1319     /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1320     /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1321     ///
1322     /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1323     /// assert_eq!(x, vec![B("abcXdef")]);
1324     ///
1325     /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1326     /// assert_eq!(x, vec![B("abcdef")]);
1327     ///
1328     /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1329     /// assert!(x.is_empty());
1330     /// ```
1331     #[inline]
splitn_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, limit: usize, splitter: &'a B, ) -> SplitN<'a>1332     fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1333         &'a self,
1334         limit: usize,
1335         splitter: &'a B,
1336     ) -> SplitN<'a> {
1337         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1338     }
1339 
1340     /// Returns an iterator of at most `limit` substrings of this byte string,
1341     /// separated by the given byte string, in reverse. If `limit` substrings
1342     /// are yielded, then the last substring will contain the remainder of this
1343     /// byte string.
1344     ///
1345     /// The needle may be any type that can be cheaply converted into a
1346     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1347     ///
1348     /// # Examples
1349     ///
1350     /// Basic usage:
1351     ///
1352     /// ```
1353     /// use bstr::{B, ByteSlice};
1354     ///
1355     /// let x: Vec<_> =
1356     ///     b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1357     /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1358     ///
1359     /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1360     /// assert_eq!(x, vec![b""]);
1361     ///
1362     /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1363     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1364     ///
1365     /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1366     /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1367     ///
1368     /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1369     /// assert_eq!(x, vec![B("abcXdef")]);
1370     ///
1371     /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1372     /// assert_eq!(x, vec![B("abcdef")]);
1373     ///
1374     /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1375     /// assert!(x.is_empty());
1376     /// ```
1377     #[inline]
rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, limit: usize, splitter: &'a B, ) -> SplitNReverse<'a>1378     fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1379         &'a self,
1380         limit: usize,
1381         splitter: &'a B,
1382     ) -> SplitNReverse<'a> {
1383         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1384     }
1385 
1386     /// Replace all matches of the given needle with the given replacement, and
1387     /// the result as a new `Vec<u8>`.
1388     ///
1389     /// This routine is useful as a convenience. If you need to reuse an
1390     /// allocation, use [`replace_into`](#method.replace_into) instead.
1391     ///
1392     /// # Examples
1393     ///
1394     /// Basic usage:
1395     ///
1396     /// ```
1397     /// use bstr::ByteSlice;
1398     ///
1399     /// let s = b"this is old".replace("old", "new");
1400     /// assert_eq!(s, "this is new".as_bytes());
1401     /// ```
1402     ///
1403     /// When the pattern doesn't match:
1404     ///
1405     /// ```
1406     /// use bstr::ByteSlice;
1407     ///
1408     /// let s = b"this is old".replace("nada nada", "limonada");
1409     /// assert_eq!(s, "this is old".as_bytes());
1410     /// ```
1411     ///
1412     /// When the needle is an empty string:
1413     ///
1414     /// ```
1415     /// use bstr::ByteSlice;
1416     ///
1417     /// let s = b"foo".replace("", "Z");
1418     /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1419     /// ```
1420     #[cfg(feature = "std")]
1421     #[inline]
replace<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, ) -> Vec<u8>1422     fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1423         &self,
1424         needle: N,
1425         replacement: R,
1426     ) -> Vec<u8> {
1427         let mut dest = Vec::with_capacity(self.as_bytes().len());
1428         self.replace_into(needle, replacement, &mut dest);
1429         dest
1430     }
1431 
1432     /// Replace up to `limit` matches of the given needle with the given
1433     /// replacement, and the result as a new `Vec<u8>`.
1434     ///
1435     /// This routine is useful as a convenience. If you need to reuse an
1436     /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1437     ///
1438     /// # Examples
1439     ///
1440     /// Basic usage:
1441     ///
1442     /// ```
1443     /// use bstr::ByteSlice;
1444     ///
1445     /// let s = b"foofoo".replacen("o", "z", 2);
1446     /// assert_eq!(s, "fzzfoo".as_bytes());
1447     /// ```
1448     ///
1449     /// When the pattern doesn't match:
1450     ///
1451     /// ```
1452     /// use bstr::ByteSlice;
1453     ///
1454     /// let s = b"foofoo".replacen("a", "z", 2);
1455     /// assert_eq!(s, "foofoo".as_bytes());
1456     /// ```
1457     ///
1458     /// When the needle is an empty string:
1459     ///
1460     /// ```
1461     /// use bstr::ByteSlice;
1462     ///
1463     /// let s = b"foo".replacen("", "Z", 2);
1464     /// assert_eq!(s, "ZfZoo".as_bytes());
1465     /// ```
1466     #[cfg(feature = "std")]
1467     #[inline]
replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, limit: usize, ) -> Vec<u8>1468     fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1469         &self,
1470         needle: N,
1471         replacement: R,
1472         limit: usize,
1473     ) -> Vec<u8> {
1474         let mut dest = Vec::with_capacity(self.as_bytes().len());
1475         self.replacen_into(needle, replacement, limit, &mut dest);
1476         dest
1477     }
1478 
1479     /// Replace all matches of the given needle with the given replacement,
1480     /// and write the result into the provided `Vec<u8>`.
1481     ///
1482     /// This does **not** clear `dest` before writing to it.
1483     ///
1484     /// This routine is useful for reusing allocation. For a more convenient
1485     /// API, use [`replace`](#method.replace) instead.
1486     ///
1487     /// # Examples
1488     ///
1489     /// Basic usage:
1490     ///
1491     /// ```
1492     /// use bstr::ByteSlice;
1493     ///
1494     /// let s = b"this is old";
1495     ///
1496     /// let mut dest = vec![];
1497     /// s.replace_into("old", "new", &mut dest);
1498     /// assert_eq!(dest, "this is new".as_bytes());
1499     /// ```
1500     ///
1501     /// When the pattern doesn't match:
1502     ///
1503     /// ```
1504     /// use bstr::ByteSlice;
1505     ///
1506     /// let s = b"this is old";
1507     ///
1508     /// let mut dest = vec![];
1509     /// s.replace_into("nada nada", "limonada", &mut dest);
1510     /// assert_eq!(dest, "this is old".as_bytes());
1511     /// ```
1512     ///
1513     /// When the needle is an empty string:
1514     ///
1515     /// ```
1516     /// use bstr::ByteSlice;
1517     ///
1518     /// let s = b"foo";
1519     ///
1520     /// let mut dest = vec![];
1521     /// s.replace_into("", "Z", &mut dest);
1522     /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1523     /// ```
1524     #[cfg(feature = "std")]
1525     #[inline]
replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, dest: &mut Vec<u8>, )1526     fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1527         &self,
1528         needle: N,
1529         replacement: R,
1530         dest: &mut Vec<u8>,
1531     ) {
1532         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1533 
1534         let mut last = 0;
1535         for start in self.find_iter(needle) {
1536             dest.push_str(&self.as_bytes()[last..start]);
1537             dest.push_str(replacement);
1538             last = start + needle.len();
1539         }
1540         dest.push_str(&self.as_bytes()[last..]);
1541     }
1542 
1543     /// Replace up to `limit` matches of the given needle with the given
1544     /// replacement, and write the result into the provided `Vec<u8>`.
1545     ///
1546     /// This does **not** clear `dest` before writing to it.
1547     ///
1548     /// This routine is useful for reusing allocation. For a more convenient
1549     /// API, use [`replacen`](#method.replacen) instead.
1550     ///
1551     /// # Examples
1552     ///
1553     /// Basic usage:
1554     ///
1555     /// ```
1556     /// use bstr::ByteSlice;
1557     ///
1558     /// let s = b"foofoo";
1559     ///
1560     /// let mut dest = vec![];
1561     /// s.replacen_into("o", "z", 2, &mut dest);
1562     /// assert_eq!(dest, "fzzfoo".as_bytes());
1563     /// ```
1564     ///
1565     /// When the pattern doesn't match:
1566     ///
1567     /// ```
1568     /// use bstr::ByteSlice;
1569     ///
1570     /// let s = b"foofoo";
1571     ///
1572     /// let mut dest = vec![];
1573     /// s.replacen_into("a", "z", 2, &mut dest);
1574     /// assert_eq!(dest, "foofoo".as_bytes());
1575     /// ```
1576     ///
1577     /// When the needle is an empty string:
1578     ///
1579     /// ```
1580     /// use bstr::ByteSlice;
1581     ///
1582     /// let s = b"foo";
1583     ///
1584     /// let mut dest = vec![];
1585     /// s.replacen_into("", "Z", 2, &mut dest);
1586     /// assert_eq!(dest, "ZfZoo".as_bytes());
1587     /// ```
1588     #[cfg(feature = "std")]
1589     #[inline]
replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, limit: usize, dest: &mut Vec<u8>, )1590     fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1591         &self,
1592         needle: N,
1593         replacement: R,
1594         limit: usize,
1595         dest: &mut Vec<u8>,
1596     ) {
1597         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1598 
1599         let mut last = 0;
1600         for start in self.find_iter(needle).take(limit) {
1601             dest.push_str(&self.as_bytes()[last..start]);
1602             dest.push_str(replacement);
1603             last = start + needle.len();
1604         }
1605         dest.push_str(&self.as_bytes()[last..]);
1606     }
1607 
1608     /// Returns an iterator over the bytes in this byte string.
1609     ///
1610     /// # Examples
1611     ///
1612     /// Basic usage:
1613     ///
1614     /// ```
1615     /// use bstr::ByteSlice;
1616     ///
1617     /// let bs = b"foobar";
1618     /// let bytes: Vec<u8> = bs.bytes().collect();
1619     /// assert_eq!(bytes, bs);
1620     /// ```
1621     #[inline]
bytes(&self) -> Bytes1622     fn bytes(&self) -> Bytes {
1623         Bytes { it: self.as_bytes().iter() }
1624     }
1625 
1626     /// Returns an iterator over the Unicode scalar values in this byte string.
1627     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1628     /// is yielded instead.
1629     ///
1630     /// # Examples
1631     ///
1632     /// Basic usage:
1633     ///
1634     /// ```
1635     /// use bstr::ByteSlice;
1636     ///
1637     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1638     /// let chars: Vec<char> = bs.chars().collect();
1639     /// assert_eq!(vec!['☃', '\u{FFFD}', '��', '\u{FFFD}', 'a'], chars);
1640     /// ```
1641     ///
1642     /// Codepoints can also be iterated over in reverse:
1643     ///
1644     /// ```
1645     /// use bstr::ByteSlice;
1646     ///
1647     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1648     /// let chars: Vec<char> = bs.chars().rev().collect();
1649     /// assert_eq!(vec!['a', '\u{FFFD}', '��', '\u{FFFD}', '☃'], chars);
1650     /// ```
1651     #[inline]
chars(&self) -> Chars1652     fn chars(&self) -> Chars {
1653         Chars::new(self.as_bytes())
1654     }
1655 
1656     /// Returns an iterator over the Unicode scalar values in this byte string
1657     /// along with their starting and ending byte index positions. If invalid
1658     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1659     /// instead.
1660     ///
1661     /// Note that this is slightly different from the `CharIndices` iterator
1662     /// provided by the standard library. Aside from working on possibly
1663     /// invalid UTF-8, this iterator provides both the corresponding starting
1664     /// and ending byte indices of each codepoint yielded. The ending position
1665     /// is necessary to slice the original byte string when invalid UTF-8 bytes
1666     /// are converted into a Unicode replacement codepoint, since a single
1667     /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1668     /// (inclusive).
1669     ///
1670     /// # Examples
1671     ///
1672     /// Basic usage:
1673     ///
1674     /// ```
1675     /// use bstr::ByteSlice;
1676     ///
1677     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1678     /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1679     /// assert_eq!(chars, vec![
1680     ///     (0, 3, '☃'),
1681     ///     (3, 4, '\u{FFFD}'),
1682     ///     (4, 8, '��'),
1683     ///     (8, 10, '\u{FFFD}'),
1684     ///     (10, 11, 'a'),
1685     /// ]);
1686     /// ```
1687     ///
1688     /// Codepoints can also be iterated over in reverse:
1689     ///
1690     /// ```
1691     /// use bstr::ByteSlice;
1692     ///
1693     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1694     /// let chars: Vec<(usize, usize, char)> = bs
1695     ///     .char_indices()
1696     ///     .rev()
1697     ///     .collect();
1698     /// assert_eq!(chars, vec![
1699     ///     (10, 11, 'a'),
1700     ///     (8, 10, '\u{FFFD}'),
1701     ///     (4, 8, '��'),
1702     ///     (3, 4, '\u{FFFD}'),
1703     ///     (0, 3, '☃'),
1704     /// ]);
1705     /// ```
1706     #[inline]
char_indices(&self) -> CharIndices1707     fn char_indices(&self) -> CharIndices {
1708         CharIndices::new(self.as_bytes())
1709     }
1710 
1711     /// Iterate over chunks of valid UTF-8.
1712     ///
1713     /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1714     /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1715     /// which are determined via the "substitution of maximal subparts"
1716     /// strategy described in the docs for the
1717     /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1718     /// method.
1719     ///
1720     /// # Examples
1721     ///
1722     /// This example shows how to gather all valid and invalid chunks from a
1723     /// byte slice:
1724     ///
1725     /// ```
1726     /// use bstr::{ByteSlice, Utf8Chunk};
1727     ///
1728     /// let bytes = b"foo\xFD\xFEbar\xFF";
1729     ///
1730     /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1731     /// for chunk in bytes.utf8_chunks() {
1732     ///     if !chunk.valid().is_empty() {
1733     ///         valid_chunks.push(chunk.valid());
1734     ///     }
1735     ///     if !chunk.invalid().is_empty() {
1736     ///         invalid_chunks.push(chunk.invalid());
1737     ///     }
1738     /// }
1739     ///
1740     /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1741     /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1742     /// ```
1743     #[inline]
utf8_chunks(&self) -> Utf8Chunks1744     fn utf8_chunks(&self) -> Utf8Chunks {
1745         Utf8Chunks { bytes: self.as_bytes() }
1746     }
1747 
1748     /// Returns an iterator over the grapheme clusters in this byte string.
1749     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1750     /// is yielded instead.
1751     ///
1752     /// # Examples
1753     ///
1754     /// This example shows how multiple codepoints can combine to form a
1755     /// single grapheme cluster:
1756     ///
1757     /// ```
1758     /// use bstr::ByteSlice;
1759     ///
1760     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1761     /// let graphemes: Vec<&str> = bs.graphemes().collect();
1762     /// assert_eq!(vec!["à̖", "����"], graphemes);
1763     /// ```
1764     ///
1765     /// This shows that graphemes can be iterated over in reverse:
1766     ///
1767     /// ```
1768     /// use bstr::ByteSlice;
1769     ///
1770     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1771     /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1772     /// assert_eq!(vec!["����", "à̖"], graphemes);
1773     /// ```
1774     #[cfg(feature = "unicode")]
1775     #[inline]
graphemes(&self) -> Graphemes1776     fn graphemes(&self) -> Graphemes {
1777         Graphemes::new(self.as_bytes())
1778     }
1779 
1780     /// Returns an iterator over the grapheme clusters in this byte string
1781     /// along with their starting and ending byte index positions. If invalid
1782     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1783     /// instead.
1784     ///
1785     /// # Examples
1786     ///
1787     /// This example shows how to get the byte offsets of each individual
1788     /// grapheme cluster:
1789     ///
1790     /// ```
1791     /// use bstr::ByteSlice;
1792     ///
1793     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1794     /// let graphemes: Vec<(usize, usize, &str)> =
1795     ///     bs.grapheme_indices().collect();
1796     /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "����")], graphemes);
1797     /// ```
1798     ///
1799     /// This example shows what happens when invalid UTF-8 is enountered. Note
1800     /// that the offsets are valid indices into the original string, and do
1801     /// not necessarily correspond to the length of the `&str` returned!
1802     ///
1803     /// ```
1804     /// use bstr::{ByteSlice, ByteVec};
1805     ///
1806     /// let mut bytes = vec![];
1807     /// bytes.push_str("a\u{0300}\u{0316}");
1808     /// bytes.push(b'\xFF');
1809     /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1810     ///
1811     /// let graphemes: Vec<(usize, usize, &str)> =
1812     ///     bytes.grapheme_indices().collect();
1813     /// assert_eq!(
1814     ///     graphemes,
1815     ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "����")]
1816     /// );
1817     /// ```
1818     #[cfg(feature = "unicode")]
1819     #[inline]
grapheme_indices(&self) -> GraphemeIndices1820     fn grapheme_indices(&self) -> GraphemeIndices {
1821         GraphemeIndices::new(self.as_bytes())
1822     }
1823 
1824     /// Returns an iterator over the words in this byte string. If invalid
1825     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1826     /// instead.
1827     ///
1828     /// This is similar to
1829     /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1830     /// except it only returns elements that contain a "word" character. A word
1831     /// character is defined by UTS #18 (Annex C) to be the combination of the
1832     /// `Alphabetic` and `Join_Control` properties, along with the
1833     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1834     /// categories.
1835     ///
1836     /// Since words are made up of one or more codepoints, this iterator
1837     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1838     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1839     ///
1840     /// # Examples
1841     ///
1842     /// Basic usage:
1843     ///
1844     /// ```
1845     /// use bstr::ByteSlice;
1846     ///
1847     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1848     /// let words: Vec<&str> = bs.words().collect();
1849     /// assert_eq!(words, vec![
1850     ///     "The", "quick", "brown", "fox", "can't",
1851     ///     "jump", "32.3", "feet", "right",
1852     /// ]);
1853     /// ```
1854     #[cfg(feature = "unicode")]
1855     #[inline]
words(&self) -> Words1856     fn words(&self) -> Words {
1857         Words::new(self.as_bytes())
1858     }
1859 
1860     /// Returns an iterator over the words in this byte string along with
1861     /// their starting and ending byte index positions.
1862     ///
1863     /// This is similar to
1864     /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1865     /// except it only returns elements that contain a "word" character. A word
1866     /// character is defined by UTS #18 (Annex C) to be the combination of the
1867     /// `Alphabetic` and `Join_Control` properties, along with the
1868     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1869     /// categories.
1870     ///
1871     /// Since words are made up of one or more codepoints, this iterator
1872     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1873     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1874     ///
1875     /// # Examples
1876     ///
1877     /// This example shows how to get the byte offsets of each individual
1878     /// word:
1879     ///
1880     /// ```
1881     /// use bstr::ByteSlice;
1882     ///
1883     /// let bs = b"can't jump 32.3 feet";
1884     /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
1885     /// assert_eq!(words, vec![
1886     ///     (0, 5, "can't"),
1887     ///     (6, 10, "jump"),
1888     ///     (11, 15, "32.3"),
1889     ///     (16, 20, "feet"),
1890     /// ]);
1891     /// ```
1892     #[cfg(feature = "unicode")]
1893     #[inline]
word_indices(&self) -> WordIndices1894     fn word_indices(&self) -> WordIndices {
1895         WordIndices::new(self.as_bytes())
1896     }
1897 
1898     /// Returns an iterator over the words in this byte string, along with
1899     /// all breaks between the words. Concatenating all elements yielded by
1900     /// the iterator results in the original string (modulo Unicode replacement
1901     /// codepoint substitutions if invalid UTF-8 is encountered).
1902     ///
1903     /// Since words are made up of one or more codepoints, this iterator
1904     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1905     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1906     ///
1907     /// # Examples
1908     ///
1909     /// Basic usage:
1910     ///
1911     /// ```
1912     /// use bstr::ByteSlice;
1913     ///
1914     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1915     /// let words: Vec<&str> = bs.words_with_breaks().collect();
1916     /// assert_eq!(words, vec![
1917     ///     "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
1918     ///     " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
1919     ///     ",", " ", "right", "?",
1920     /// ]);
1921     /// ```
1922     #[cfg(feature = "unicode")]
1923     #[inline]
words_with_breaks(&self) -> WordsWithBreaks1924     fn words_with_breaks(&self) -> WordsWithBreaks {
1925         WordsWithBreaks::new(self.as_bytes())
1926     }
1927 
1928     /// Returns an iterator over the words and their byte offsets in this
1929     /// byte string, along with all breaks between the words. Concatenating
1930     /// all elements yielded by the iterator results in the original string
1931     /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
1932     /// encountered).
1933     ///
1934     /// Since words are made up of one or more codepoints, this iterator
1935     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1936     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1937     ///
1938     /// # Examples
1939     ///
1940     /// This example shows how to get the byte offsets of each individual
1941     /// word:
1942     ///
1943     /// ```
1944     /// use bstr::ByteSlice;
1945     ///
1946     /// let bs = b"can't jump 32.3 feet";
1947     /// let words: Vec<(usize, usize, &str)> =
1948     ///     bs.words_with_break_indices().collect();
1949     /// assert_eq!(words, vec![
1950     ///     (0, 5, "can't"),
1951     ///     (5, 6, " "),
1952     ///     (6, 10, "jump"),
1953     ///     (10, 11, " "),
1954     ///     (11, 15, "32.3"),
1955     ///     (15, 16, " "),
1956     ///     (16, 20, "feet"),
1957     /// ]);
1958     /// ```
1959     #[cfg(feature = "unicode")]
1960     #[inline]
words_with_break_indices(&self) -> WordsWithBreakIndices1961     fn words_with_break_indices(&self) -> WordsWithBreakIndices {
1962         WordsWithBreakIndices::new(self.as_bytes())
1963     }
1964 
1965     /// Returns an iterator over the sentences in this byte string.
1966     ///
1967     /// Typically, a sentence will include its trailing punctuation and
1968     /// whitespace. Concatenating all elements yielded by the iterator
1969     /// results in the original string (modulo Unicode replacement codepoint
1970     /// substitutions if invalid UTF-8 is encountered).
1971     ///
1972     /// Since sentences are made up of one or more codepoints, this iterator
1973     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1974     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1975     ///
1976     /// # Examples
1977     ///
1978     /// Basic usage:
1979     ///
1980     /// ```
1981     /// use bstr::ByteSlice;
1982     ///
1983     /// let bs = b"I want this. Not that. Right now.";
1984     /// let sentences: Vec<&str> = bs.sentences().collect();
1985     /// assert_eq!(sentences, vec![
1986     ///     "I want this. ",
1987     ///     "Not that. ",
1988     ///     "Right now.",
1989     /// ]);
1990     /// ```
1991     #[cfg(feature = "unicode")]
1992     #[inline]
sentences(&self) -> Sentences1993     fn sentences(&self) -> Sentences {
1994         Sentences::new(self.as_bytes())
1995     }
1996 
1997     /// Returns an iterator over the sentences in this byte string along with
1998     /// their starting and ending byte index positions.
1999     ///
2000     /// Typically, a sentence will include its trailing punctuation and
2001     /// whitespace. Concatenating all elements yielded by the iterator
2002     /// results in the original string (modulo Unicode replacement codepoint
2003     /// substitutions if invalid UTF-8 is encountered).
2004     ///
2005     /// Since sentences are made up of one or more codepoints, this iterator
2006     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2007     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2008     ///
2009     /// # Examples
2010     ///
2011     /// Basic usage:
2012     ///
2013     /// ```
2014     /// use bstr::ByteSlice;
2015     ///
2016     /// let bs = b"I want this. Not that. Right now.";
2017     /// let sentences: Vec<(usize, usize, &str)> =
2018     ///     bs.sentence_indices().collect();
2019     /// assert_eq!(sentences, vec![
2020     ///     (0, 13, "I want this. "),
2021     ///     (13, 23, "Not that. "),
2022     ///     (23, 33, "Right now."),
2023     /// ]);
2024     /// ```
2025     #[cfg(feature = "unicode")]
2026     #[inline]
sentence_indices(&self) -> SentenceIndices2027     fn sentence_indices(&self) -> SentenceIndices {
2028         SentenceIndices::new(self.as_bytes())
2029     }
2030 
2031     /// An iterator over all lines in a byte string, without their
2032     /// terminators.
2033     ///
2034     /// For this iterator, the only line terminators recognized are `\r\n` and
2035     /// `\n`.
2036     ///
2037     /// # Examples
2038     ///
2039     /// Basic usage:
2040     ///
2041     /// ```
2042     /// use bstr::{B, ByteSlice};
2043     ///
2044     /// let s = b"\
2045     /// foo
2046     ///
2047     /// bar\r
2048     /// baz
2049     ///
2050     ///
2051     /// quux";
2052     /// let lines: Vec<&[u8]> = s.lines().collect();
2053     /// assert_eq!(lines, vec![
2054     ///     B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2055     /// ]);
2056     /// ```
2057     #[inline]
lines(&self) -> Lines2058     fn lines(&self) -> Lines {
2059         Lines::new(self.as_bytes())
2060     }
2061 
2062     /// An iterator over all lines in a byte string, including their
2063     /// terminators.
2064     ///
2065     /// For this iterator, the only line terminator recognized is `\n`. (Since
2066     /// line terminators are included, this also handles `\r\n` line endings.)
2067     ///
2068     /// Line terminators are only included if they are present in the original
2069     /// byte string. For example, the last line in a byte string may not end
2070     /// with a line terminator.
2071     ///
2072     /// Concatenating all elements yielded by this iterator is guaranteed to
2073     /// yield the original byte string.
2074     ///
2075     /// # Examples
2076     ///
2077     /// Basic usage:
2078     ///
2079     /// ```
2080     /// use bstr::{B, ByteSlice};
2081     ///
2082     /// let s = b"\
2083     /// foo
2084     ///
2085     /// bar\r
2086     /// baz
2087     ///
2088     ///
2089     /// quux";
2090     /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2091     /// assert_eq!(lines, vec![
2092     ///     B("foo\n"),
2093     ///     B("\n"),
2094     ///     B("bar\r\n"),
2095     ///     B("baz\n"),
2096     ///     B("\n"),
2097     ///     B("\n"),
2098     ///     B("quux"),
2099     /// ]);
2100     /// ```
2101     #[inline]
lines_with_terminator(&self) -> LinesWithTerminator2102     fn lines_with_terminator(&self) -> LinesWithTerminator {
2103         LinesWithTerminator::new(self.as_bytes())
2104     }
2105 
2106     /// Return a byte string slice with leading and trailing whitespace
2107     /// removed.
2108     ///
2109     /// Whitespace is defined according to the terms of the `White_Space`
2110     /// Unicode property.
2111     ///
2112     /// # Examples
2113     ///
2114     /// Basic usage:
2115     ///
2116     /// ```
2117     /// use bstr::{B, ByteSlice};
2118     ///
2119     /// let s = B(" foo\tbar\t\u{2003}\n");
2120     /// assert_eq!(s.trim(), B("foo\tbar"));
2121     /// ```
2122     #[cfg(feature = "unicode")]
2123     #[inline]
trim(&self) -> &[u8]2124     fn trim(&self) -> &[u8] {
2125         self.trim_start().trim_end()
2126     }
2127 
2128     /// Return a byte string slice with leading whitespace removed.
2129     ///
2130     /// Whitespace is defined according to the terms of the `White_Space`
2131     /// Unicode property.
2132     ///
2133     /// # Examples
2134     ///
2135     /// Basic usage:
2136     ///
2137     /// ```
2138     /// use bstr::{B, ByteSlice};
2139     ///
2140     /// let s = B(" foo\tbar\t\u{2003}\n");
2141     /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2142     /// ```
2143     #[cfg(feature = "unicode")]
2144     #[inline]
trim_start(&self) -> &[u8]2145     fn trim_start(&self) -> &[u8] {
2146         let start = whitespace_len_fwd(self.as_bytes());
2147         &self.as_bytes()[start..]
2148     }
2149 
2150     /// Return a byte string slice with trailing whitespace removed.
2151     ///
2152     /// Whitespace is defined according to the terms of the `White_Space`
2153     /// Unicode property.
2154     ///
2155     /// # Examples
2156     ///
2157     /// Basic usage:
2158     ///
2159     /// ```
2160     /// use bstr::{B, ByteSlice};
2161     ///
2162     /// let s = B(" foo\tbar\t\u{2003}\n");
2163     /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2164     /// ```
2165     #[cfg(feature = "unicode")]
2166     #[inline]
trim_end(&self) -> &[u8]2167     fn trim_end(&self) -> &[u8] {
2168         let end = whitespace_len_rev(self.as_bytes());
2169         &self.as_bytes()[..end]
2170     }
2171 
2172     /// Return a byte string slice with leading and trailing characters
2173     /// satisfying the given predicate removed.
2174     ///
2175     /// # Examples
2176     ///
2177     /// Basic usage:
2178     ///
2179     /// ```
2180     /// use bstr::{B, ByteSlice};
2181     ///
2182     /// let s = b"123foo5bar789";
2183     /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2184     /// ```
2185     #[inline]
trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2186     fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2187         self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2188     }
2189 
2190     /// Return a byte string slice with leading characters satisfying the given
2191     /// predicate removed.
2192     ///
2193     /// # Examples
2194     ///
2195     /// Basic usage:
2196     ///
2197     /// ```
2198     /// use bstr::{B, ByteSlice};
2199     ///
2200     /// let s = b"123foo5bar789";
2201     /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2202     /// ```
2203     #[inline]
trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2204     fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2205         for (s, _, ch) in self.char_indices() {
2206             if !trim(ch) {
2207                 return &self.as_bytes()[s..];
2208             }
2209         }
2210         b""
2211     }
2212 
2213     /// Return a byte string slice with trailing characters satisfying the
2214     /// given predicate removed.
2215     ///
2216     /// # Examples
2217     ///
2218     /// Basic usage:
2219     ///
2220     /// ```
2221     /// use bstr::{B, ByteSlice};
2222     ///
2223     /// let s = b"123foo5bar789";
2224     /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2225     /// ```
2226     #[inline]
trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2227     fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2228         for (_, e, ch) in self.char_indices().rev() {
2229             if !trim(ch) {
2230                 return &self.as_bytes()[..e];
2231             }
2232         }
2233         b""
2234     }
2235 
2236     /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2237     /// byte string.
2238     ///
2239     /// In this case, lowercase is defined according to the `Lowercase` Unicode
2240     /// property.
2241     ///
2242     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2243     /// then it is written to the given buffer unchanged.
2244     ///
2245     /// Note that some characters in this byte string may expand into multiple
2246     /// characters when changing the case, so the number of bytes written to
2247     /// the given byte string may not be equivalent to the number of bytes in
2248     /// this byte string.
2249     ///
2250     /// If you'd like to reuse an allocation for performance reasons, then use
2251     /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2252     ///
2253     /// # Examples
2254     ///
2255     /// Basic usage:
2256     ///
2257     /// ```
2258     /// use bstr::{B, ByteSlice};
2259     ///
2260     /// let s = B("HELLO Β");
2261     /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2262     /// ```
2263     ///
2264     /// Scripts without case are not changed:
2265     ///
2266     /// ```
2267     /// use bstr::{B, ByteSlice};
2268     ///
2269     /// let s = B("农历新年");
2270     /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2271     /// ```
2272     ///
2273     /// Invalid UTF-8 remains as is:
2274     ///
2275     /// ```
2276     /// use bstr::{B, ByteSlice};
2277     ///
2278     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2279     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2280     /// ```
2281     #[cfg(all(feature = "std", feature = "unicode"))]
2282     #[inline]
to_lowercase(&self) -> Vec<u8>2283     fn to_lowercase(&self) -> Vec<u8> {
2284         let mut buf = vec![];
2285         self.to_lowercase_into(&mut buf);
2286         buf
2287     }
2288 
2289     /// Writes the lowercase equivalent of this byte string into the given
2290     /// buffer. The buffer is not cleared before written to.
2291     ///
2292     /// In this case, lowercase is defined according to the `Lowercase`
2293     /// Unicode property.
2294     ///
2295     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2296     /// then it is written to the given buffer unchanged.
2297     ///
2298     /// Note that some characters in this byte string may expand into multiple
2299     /// characters when changing the case, so the number of bytes written to
2300     /// the given byte string may not be equivalent to the number of bytes in
2301     /// this byte string.
2302     ///
2303     /// If you don't need to amortize allocation and instead prefer
2304     /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2305     ///
2306     /// # Examples
2307     ///
2308     /// Basic usage:
2309     ///
2310     /// ```
2311     /// use bstr::{B, ByteSlice};
2312     ///
2313     /// let s = B("HELLO Β");
2314     ///
2315     /// let mut buf = vec![];
2316     /// s.to_lowercase_into(&mut buf);
2317     /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2318     /// ```
2319     ///
2320     /// Scripts without case are not changed:
2321     ///
2322     /// ```
2323     /// use bstr::{B, ByteSlice};
2324     ///
2325     /// let s = B("农历新年");
2326     ///
2327     /// let mut buf = vec![];
2328     /// s.to_lowercase_into(&mut buf);
2329     /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2330     /// ```
2331     ///
2332     /// Invalid UTF-8 remains as is:
2333     ///
2334     /// ```
2335     /// use bstr::{B, ByteSlice};
2336     ///
2337     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2338     ///
2339     /// let mut buf = vec![];
2340     /// s.to_lowercase_into(&mut buf);
2341     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2342     /// ```
2343     #[cfg(all(feature = "std", feature = "unicode"))]
2344     #[inline]
to_lowercase_into(&self, buf: &mut Vec<u8>)2345     fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2346         // TODO: This is the best we can do given what std exposes I think.
2347         // If we roll our own case handling, then we might be able to do this
2348         // a bit faster. We shouldn't roll our own case handling unless we
2349         // need to, e.g., for doing caseless matching or case folding.
2350 
2351         // TODO(BUG): This doesn't handle any special casing rules.
2352 
2353         buf.reserve(self.as_bytes().len());
2354         for (s, e, ch) in self.char_indices() {
2355             if ch == '\u{FFFD}' {
2356                 buf.push_str(&self.as_bytes()[s..e]);
2357             } else if ch.is_ascii() {
2358                 buf.push_char(ch.to_ascii_lowercase());
2359             } else {
2360                 for upper in ch.to_lowercase() {
2361                     buf.push_char(upper);
2362                 }
2363             }
2364         }
2365     }
2366 
2367     /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2368     /// this byte string.
2369     ///
2370     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2371     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2372     /// In particular, the length of the byte string returned is always
2373     /// equivalent to the length of this byte string.
2374     ///
2375     /// If you'd like to reuse an allocation for performance reasons, then use
2376     /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2377     /// the conversion in place.
2378     ///
2379     /// # Examples
2380     ///
2381     /// Basic usage:
2382     ///
2383     /// ```
2384     /// use bstr::{B, ByteSlice};
2385     ///
2386     /// let s = B("HELLO Β");
2387     /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2388     /// ```
2389     ///
2390     /// Invalid UTF-8 remains as is:
2391     ///
2392     /// ```
2393     /// use bstr::{B, ByteSlice};
2394     ///
2395     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2396     /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2397     /// ```
2398     #[cfg(feature = "std")]
2399     #[inline]
to_ascii_lowercase(&self) -> Vec<u8>2400     fn to_ascii_lowercase(&self) -> Vec<u8> {
2401         self.as_bytes().to_ascii_lowercase()
2402     }
2403 
2404     /// Convert this byte string to its lowercase ASCII equivalent in place.
2405     ///
2406     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2407     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2408     ///
2409     /// If you don't need to do the conversion in
2410     /// place and instead prefer convenience, then use
2411     /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2412     ///
2413     /// # Examples
2414     ///
2415     /// Basic usage:
2416     ///
2417     /// ```
2418     /// use bstr::ByteSlice;
2419     ///
2420     /// let mut s = <Vec<u8>>::from("HELLO Β");
2421     /// s.make_ascii_lowercase();
2422     /// assert_eq!(s, "hello Β".as_bytes());
2423     /// ```
2424     ///
2425     /// Invalid UTF-8 remains as is:
2426     ///
2427     /// ```
2428     /// use bstr::{B, ByteSlice, ByteVec};
2429     ///
2430     /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2431     /// s.make_ascii_lowercase();
2432     /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2433     /// ```
2434     #[inline]
make_ascii_lowercase(&mut self)2435     fn make_ascii_lowercase(&mut self) {
2436         self.as_bytes_mut().make_ascii_lowercase();
2437     }
2438 
2439     /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2440     /// byte string.
2441     ///
2442     /// In this case, uppercase is defined according to the `Uppercase`
2443     /// Unicode property.
2444     ///
2445     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2446     /// then it is written to the given buffer unchanged.
2447     ///
2448     /// Note that some characters in this byte string may expand into multiple
2449     /// characters when changing the case, so the number of bytes written to
2450     /// the given byte string may not be equivalent to the number of bytes in
2451     /// this byte string.
2452     ///
2453     /// If you'd like to reuse an allocation for performance reasons, then use
2454     /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2455     ///
2456     /// # Examples
2457     ///
2458     /// Basic usage:
2459     ///
2460     /// ```
2461     /// use bstr::{B, ByteSlice};
2462     ///
2463     /// let s = B("hello β");
2464     /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2465     /// ```
2466     ///
2467     /// Scripts without case are not changed:
2468     ///
2469     /// ```
2470     /// use bstr::{B, ByteSlice};
2471     ///
2472     /// let s = B("农历新年");
2473     /// assert_eq!(s.to_uppercase(), B("农历新年"));
2474     /// ```
2475     ///
2476     /// Invalid UTF-8 remains as is:
2477     ///
2478     /// ```
2479     /// use bstr::{B, ByteSlice};
2480     ///
2481     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2482     /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2483     /// ```
2484     #[cfg(all(feature = "std", feature = "unicode"))]
2485     #[inline]
to_uppercase(&self) -> Vec<u8>2486     fn to_uppercase(&self) -> Vec<u8> {
2487         let mut buf = vec![];
2488         self.to_uppercase_into(&mut buf);
2489         buf
2490     }
2491 
2492     /// Writes the uppercase equivalent of this byte string into the given
2493     /// buffer. The buffer is not cleared before written to.
2494     ///
2495     /// In this case, uppercase is defined according to the `Uppercase`
2496     /// Unicode property.
2497     ///
2498     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2499     /// then it is written to the given buffer unchanged.
2500     ///
2501     /// Note that some characters in this byte string may expand into multiple
2502     /// characters when changing the case, so the number of bytes written to
2503     /// the given byte string may not be equivalent to the number of bytes in
2504     /// this byte string.
2505     ///
2506     /// If you don't need to amortize allocation and instead prefer
2507     /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2508     ///
2509     /// # Examples
2510     ///
2511     /// Basic usage:
2512     ///
2513     /// ```
2514     /// use bstr::{B, ByteSlice};
2515     ///
2516     /// let s = B("hello β");
2517     ///
2518     /// let mut buf = vec![];
2519     /// s.to_uppercase_into(&mut buf);
2520     /// assert_eq!(buf, B("HELLO Β"));
2521     /// ```
2522     ///
2523     /// Scripts without case are not changed:
2524     ///
2525     /// ```
2526     /// use bstr::{B, ByteSlice};
2527     ///
2528     /// let s = B("农历新年");
2529     ///
2530     /// let mut buf = vec![];
2531     /// s.to_uppercase_into(&mut buf);
2532     /// assert_eq!(buf, B("农历新年"));
2533     /// ```
2534     ///
2535     /// Invalid UTF-8 remains as is:
2536     ///
2537     /// ```
2538     /// use bstr::{B, ByteSlice};
2539     ///
2540     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2541     ///
2542     /// let mut buf = vec![];
2543     /// s.to_uppercase_into(&mut buf);
2544     /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2545     /// ```
2546     #[cfg(all(feature = "std", feature = "unicode"))]
2547     #[inline]
to_uppercase_into(&self, buf: &mut Vec<u8>)2548     fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2549         // TODO: This is the best we can do given what std exposes I think.
2550         // If we roll our own case handling, then we might be able to do this
2551         // a bit faster. We shouldn't roll our own case handling unless we
2552         // need to, e.g., for doing caseless matching or case folding.
2553         buf.reserve(self.as_bytes().len());
2554         for (s, e, ch) in self.char_indices() {
2555             if ch == '\u{FFFD}' {
2556                 buf.push_str(&self.as_bytes()[s..e]);
2557             } else if ch.is_ascii() {
2558                 buf.push_char(ch.to_ascii_uppercase());
2559             } else {
2560                 for upper in ch.to_uppercase() {
2561                     buf.push_char(upper);
2562                 }
2563             }
2564         }
2565     }
2566 
2567     /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2568     /// this byte string.
2569     ///
2570     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2571     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2572     /// In particular, the length of the byte string returned is always
2573     /// equivalent to the length of this byte string.
2574     ///
2575     /// If you'd like to reuse an allocation for performance reasons, then use
2576     /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2577     /// the conversion in place.
2578     ///
2579     /// # Examples
2580     ///
2581     /// Basic usage:
2582     ///
2583     /// ```
2584     /// use bstr::{B, ByteSlice};
2585     ///
2586     /// let s = B("hello β");
2587     /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2588     /// ```
2589     ///
2590     /// Invalid UTF-8 remains as is:
2591     ///
2592     /// ```
2593     /// use bstr::{B, ByteSlice};
2594     ///
2595     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2596     /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2597     /// ```
2598     #[cfg(feature = "std")]
2599     #[inline]
to_ascii_uppercase(&self) -> Vec<u8>2600     fn to_ascii_uppercase(&self) -> Vec<u8> {
2601         self.as_bytes().to_ascii_uppercase()
2602     }
2603 
2604     /// Convert this byte string to its uppercase ASCII equivalent in place.
2605     ///
2606     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2607     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2608     ///
2609     /// If you don't need to do the conversion in
2610     /// place and instead prefer convenience, then use
2611     /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2612     ///
2613     /// # Examples
2614     ///
2615     /// Basic usage:
2616     ///
2617     /// ```
2618     /// use bstr::{B, ByteSlice};
2619     ///
2620     /// let mut s = <Vec<u8>>::from("hello β");
2621     /// s.make_ascii_uppercase();
2622     /// assert_eq!(s, B("HELLO β"));
2623     /// ```
2624     ///
2625     /// Invalid UTF-8 remains as is:
2626     ///
2627     /// ```
2628     /// use bstr::{B, ByteSlice, ByteVec};
2629     ///
2630     /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2631     /// s.make_ascii_uppercase();
2632     /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2633     /// ```
2634     #[inline]
make_ascii_uppercase(&mut self)2635     fn make_ascii_uppercase(&mut self) {
2636         self.as_bytes_mut().make_ascii_uppercase();
2637     }
2638 
2639     /// Reverse the bytes in this string, in place.
2640     ///
2641     /// This is not necessarily a well formed operation! For example, if this
2642     /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2643     /// string will likely result in invalid UTF-8 and otherwise non-sensical
2644     /// content.
2645     ///
2646     /// Note that this is equivalent to the generic `[u8]::reverse` method.
2647     /// This method is provided to permit callers to explicitly differentiate
2648     /// between reversing bytes, codepoints and graphemes.
2649     ///
2650     /// # Examples
2651     ///
2652     /// Basic usage:
2653     ///
2654     /// ```
2655     /// use bstr::ByteSlice;
2656     ///
2657     /// let mut s = <Vec<u8>>::from("hello");
2658     /// s.reverse_bytes();
2659     /// assert_eq!(s, "olleh".as_bytes());
2660     /// ```
2661     #[inline]
reverse_bytes(&mut self)2662     fn reverse_bytes(&mut self) {
2663         self.as_bytes_mut().reverse();
2664     }
2665 
2666     /// Reverse the codepoints in this string, in place.
2667     ///
2668     /// If this byte string is valid UTF-8, then its reversal by codepoint
2669     /// is also guaranteed to be valid UTF-8.
2670     ///
2671     /// This operation is equivalent to the following, but without allocating:
2672     ///
2673     /// ```
2674     /// use bstr::ByteSlice;
2675     ///
2676     /// let mut s = <Vec<u8>>::from("foo☃bar");
2677     ///
2678     /// let mut chars: Vec<char> = s.chars().collect();
2679     /// chars.reverse();
2680     ///
2681     /// let reversed: String = chars.into_iter().collect();
2682     /// assert_eq!(reversed, "rab☃oof");
2683     /// ```
2684     ///
2685     /// Note that this is not necessarily a well formed operation. For example,
2686     /// if this byte string contains grapheme clusters with more than one
2687     /// codepoint, then those grapheme clusters will not necessarily be
2688     /// preserved. If you'd like to preserve grapheme clusters, then use
2689     /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2690     ///
2691     /// # Examples
2692     ///
2693     /// Basic usage:
2694     ///
2695     /// ```
2696     /// use bstr::ByteSlice;
2697     ///
2698     /// let mut s = <Vec<u8>>::from("foo☃bar");
2699     /// s.reverse_chars();
2700     /// assert_eq!(s, "rab☃oof".as_bytes());
2701     /// ```
2702     ///
2703     /// This example shows that not all reversals lead to a well formed string.
2704     /// For example, in this case, combining marks are used to put accents over
2705     /// some letters, and those accent marks must appear after the codepoints
2706     /// they modify.
2707     ///
2708     /// ```
2709     /// use bstr::{B, ByteSlice};
2710     ///
2711     /// let mut s = <Vec<u8>>::from("résumé");
2712     /// s.reverse_chars();
2713     /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2714     /// ```
2715     ///
2716     /// A word of warning: the above example relies on the fact that
2717     /// `résumé` is in decomposed normal form, which means there are separate
2718     /// codepoints for the accents above `e`. If it is instead in composed
2719     /// normal form, then the example works:
2720     ///
2721     /// ```
2722     /// use bstr::{B, ByteSlice};
2723     ///
2724     /// let mut s = <Vec<u8>>::from("résumé");
2725     /// s.reverse_chars();
2726     /// assert_eq!(s, B("émusér"));
2727     /// ```
2728     ///
2729     /// The point here is to be cautious and not assume that just because
2730     /// `reverse_chars` works in one case, that it therefore works in all
2731     /// cases.
2732     #[inline]
reverse_chars(&mut self)2733     fn reverse_chars(&mut self) {
2734         let mut i = 0;
2735         loop {
2736             let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2737             if size == 0 {
2738                 break;
2739             }
2740             if size > 1 {
2741                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2742             }
2743             i += size;
2744         }
2745         self.reverse_bytes();
2746     }
2747 
2748     /// Reverse the graphemes in this string, in place.
2749     ///
2750     /// If this byte string is valid UTF-8, then its reversal by grapheme
2751     /// is also guaranteed to be valid UTF-8.
2752     ///
2753     /// This operation is equivalent to the following, but without allocating:
2754     ///
2755     /// ```
2756     /// use bstr::ByteSlice;
2757     ///
2758     /// let mut s = <Vec<u8>>::from("foo☃bar");
2759     ///
2760     /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2761     /// graphemes.reverse();
2762     ///
2763     /// let reversed = graphemes.concat();
2764     /// assert_eq!(reversed, "rab☃oof");
2765     /// ```
2766     ///
2767     /// # Examples
2768     ///
2769     /// Basic usage:
2770     ///
2771     /// ```
2772     /// use bstr::ByteSlice;
2773     ///
2774     /// let mut s = <Vec<u8>>::from("foo☃bar");
2775     /// s.reverse_graphemes();
2776     /// assert_eq!(s, "rab☃oof".as_bytes());
2777     /// ```
2778     ///
2779     /// This example shows how this correctly handles grapheme clusters,
2780     /// unlike `reverse_chars`.
2781     ///
2782     /// ```
2783     /// use bstr::ByteSlice;
2784     ///
2785     /// let mut s = <Vec<u8>>::from("résumé");
2786     /// s.reverse_graphemes();
2787     /// assert_eq!(s, "émusér".as_bytes());
2788     /// ```
2789     #[cfg(feature = "unicode")]
2790     #[inline]
reverse_graphemes(&mut self)2791     fn reverse_graphemes(&mut self) {
2792         use unicode::decode_grapheme;
2793 
2794         let mut i = 0;
2795         loop {
2796             let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2797             if size == 0 {
2798                 break;
2799             }
2800             if size > 1 {
2801                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2802             }
2803             i += size;
2804         }
2805         self.reverse_bytes();
2806     }
2807 
2808     /// Returns true if and only if every byte in this byte string is ASCII.
2809     ///
2810     /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2811     /// an ASCII codepoint if and only if it is in the inclusive range
2812     /// `[0, 127]`.
2813     ///
2814     /// # Examples
2815     ///
2816     /// Basic usage:
2817     ///
2818     /// ```
2819     /// use bstr::{B, ByteSlice};
2820     ///
2821     /// assert!(B("abc").is_ascii());
2822     /// assert!(!B("☃βツ").is_ascii());
2823     /// assert!(!B(b"\xFF").is_ascii());
2824     /// ```
2825     #[inline]
is_ascii(&self) -> bool2826     fn is_ascii(&self) -> bool {
2827         ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2828     }
2829 
2830     /// Returns true if and only if the entire byte string is valid UTF-8.
2831     ///
2832     /// If you need location information about where a byte string's first
2833     /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2834     ///
2835     /// # Examples
2836     ///
2837     /// Basic usage:
2838     ///
2839     /// ```
2840     /// use bstr::{B, ByteSlice};
2841     ///
2842     /// assert!(B("abc").is_utf8());
2843     /// assert!(B("☃βツ").is_utf8());
2844     /// // invalid bytes
2845     /// assert!(!B(b"abc\xFF").is_utf8());
2846     /// // surrogate encoding
2847     /// assert!(!B(b"\xED\xA0\x80").is_utf8());
2848     /// // incomplete sequence
2849     /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
2850     /// // overlong sequence
2851     /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
2852     /// ```
2853     #[inline]
is_utf8(&self) -> bool2854     fn is_utf8(&self) -> bool {
2855         utf8::validate(self.as_bytes()).is_ok()
2856     }
2857 
2858     /// Returns the last byte in this byte string, if it's non-empty. If this
2859     /// byte string is empty, this returns `None`.
2860     ///
2861     /// Note that this is like the generic `[u8]::last`, except this returns
2862     /// the byte by value instead of a reference to the byte.
2863     ///
2864     /// # Examples
2865     ///
2866     /// Basic usage:
2867     ///
2868     /// ```
2869     /// use bstr::ByteSlice;
2870     ///
2871     /// assert_eq!(Some(b'z'), b"baz".last_byte());
2872     /// assert_eq!(None, b"".last_byte());
2873     /// ```
2874     #[inline]
last_byte(&self) -> Option<u8>2875     fn last_byte(&self) -> Option<u8> {
2876         let bytes = self.as_bytes();
2877         bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
2878     }
2879 
2880     /// Returns the index of the first non-ASCII byte in this byte string (if
2881     /// any such indices exist). Specifically, it returns the index of the
2882     /// first byte with a value greater than or equal to `0x80`.
2883     ///
2884     /// # Examples
2885     ///
2886     /// Basic usage:
2887     ///
2888     /// ```
2889     /// use bstr::{ByteSlice, B};
2890     ///
2891     /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
2892     /// assert_eq!(None, b"abcde".find_non_ascii_byte());
2893     /// assert_eq!(Some(0), B("��").find_non_ascii_byte());
2894     /// ```
2895     #[inline]
find_non_ascii_byte(&self) -> Option<usize>2896     fn find_non_ascii_byte(&self) -> Option<usize> {
2897         let index = ascii::first_non_ascii_byte(self.as_bytes());
2898         if index == self.as_bytes().len() {
2899             None
2900         } else {
2901             Some(index)
2902         }
2903     }
2904 
2905     /// Copies elements from one part of the slice to another part of itself,
2906     /// where the parts may be overlapping.
2907     ///
2908     /// `src` is the range within this byte string to copy from, while `dest`
2909     /// is the starting index of the range within this byte string to copy to.
2910     /// The length indicated by `src` must be less than or equal to the number
2911     /// of bytes from `dest` to the end of the byte string.
2912     ///
2913     /// # Panics
2914     ///
2915     /// Panics if either range is out of bounds, or if `src` is too big to fit
2916     /// into `dest`, or if the end of `src` is before the start.
2917     ///
2918     /// # Examples
2919     ///
2920     /// Copying four bytes within a byte string:
2921     ///
2922     /// ```
2923     /// use bstr::{B, ByteSlice};
2924     ///
2925     /// let mut buf = *b"Hello, World!";
2926     /// let s = &mut buf;
2927     /// s.copy_within_str(1..5, 8);
2928     /// assert_eq!(s, B("Hello, Wello!"));
2929     /// ```
2930     #[inline]
copy_within_str<R>(&mut self, src: R, dest: usize) where R: ops::RangeBounds<usize>,2931     fn copy_within_str<R>(&mut self, src: R, dest: usize)
2932     where
2933         R: ops::RangeBounds<usize>,
2934     {
2935         // TODO: Deprecate this once slice::copy_within stabilizes.
2936         let src_start = match src.start_bound() {
2937             ops::Bound::Included(&n) => n,
2938             ops::Bound::Excluded(&n) => {
2939                 n.checked_add(1).expect("attempted to index slice beyond max")
2940             }
2941             ops::Bound::Unbounded => 0,
2942         };
2943         let src_end = match src.end_bound() {
2944             ops::Bound::Included(&n) => {
2945                 n.checked_add(1).expect("attempted to index slice beyond max")
2946             }
2947             ops::Bound::Excluded(&n) => n,
2948             ops::Bound::Unbounded => self.as_bytes().len(),
2949         };
2950         assert!(src_start <= src_end, "src end is before src start");
2951         assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
2952         let count = src_end - src_start;
2953         assert!(
2954             dest <= self.as_bytes().len() - count,
2955             "dest is out of bounds",
2956         );
2957 
2958         // SAFETY: This is safe because we use ptr::copy to handle overlapping
2959         // copies, and is also safe because we've checked all the bounds above.
2960         // Finally, we are only dealing with u8 data, which is Copy, which
2961         // means we can copy without worrying about ownership/destructors.
2962         unsafe {
2963             ptr::copy(
2964                 self.as_bytes().get_unchecked(src_start),
2965                 self.as_bytes_mut().get_unchecked_mut(dest),
2966                 count,
2967             );
2968         }
2969     }
2970 }
2971 
2972 /// A single substring searcher fixed to a particular needle.
2973 ///
2974 /// The purpose of this type is to permit callers to construct a substring
2975 /// searcher that can be used to search haystacks without the overhead of
2976 /// constructing the searcher in the first place. This is a somewhat niche
2977 /// concern when it's necessary to re-use the same needle to search multiple
2978 /// different haystacks with as little overhead as possible. In general, using
2979 /// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
2980 /// or
2981 /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
2982 /// is good enough, but `Finder` is useful when you can meaningfully observe
2983 /// searcher construction time in a profile.
2984 ///
2985 /// When the `std` feature is enabled, then this type has an `into_owned`
2986 /// version which permits building a `Finder` that is not connected to the
2987 /// lifetime of its needle.
2988 #[derive(Clone, Debug)]
2989 pub struct Finder<'a> {
2990     searcher: TwoWay<'a>,
2991 }
2992 
2993 impl<'a> Finder<'a> {
2994     /// Create a new finder for the given needle.
2995     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a>2996     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
2997         Finder { searcher: TwoWay::forward(needle.as_ref()) }
2998     }
2999 
3000     /// Convert this finder into its owned variant, such that it no longer
3001     /// borrows the needle.
3002     ///
3003     /// If this is already an owned finder, then this is a no-op. Otherwise,
3004     /// this copies the needle.
3005     ///
3006     /// This is only available when the `std` feature is enabled.
3007     #[cfg(feature = "std")]
3008     #[inline]
into_owned(self) -> Finder<'static>3009     pub fn into_owned(self) -> Finder<'static> {
3010         Finder { searcher: self.searcher.into_owned() }
3011     }
3012 
3013     /// Returns the needle that this finder searches for.
3014     ///
3015     /// Note that the lifetime of the needle returned is tied to the lifetime
3016     /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3017     /// finder's needle can be either borrowed or owned, so the lifetime of the
3018     /// needle returned must necessarily be the shorter of the two.
3019     #[inline]
needle(&self) -> &[u8]3020     pub fn needle(&self) -> &[u8] {
3021         self.searcher.needle()
3022     }
3023 
3024     /// Returns the index of the first occurrence of this needle in the given
3025     /// haystack.
3026     ///
3027     /// The haystack may be any type that can be cheaply converted into a
3028     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3029     ///
3030     /// # Complexity
3031     ///
3032     /// This routine is guaranteed to have worst case linear time complexity
3033     /// with respect to both the needle and the haystack. That is, this runs
3034     /// in `O(needle.len() + haystack.len())` time.
3035     ///
3036     /// This routine is also guaranteed to have worst case constant space
3037     /// complexity.
3038     ///
3039     /// # Examples
3040     ///
3041     /// Basic usage:
3042     ///
3043     /// ```
3044     /// use bstr::Finder;
3045     ///
3046     /// let haystack = "foo bar baz";
3047     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3048     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3049     /// assert_eq!(None, Finder::new("quux").find(haystack));
3050     /// ```
3051     #[inline]
find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>3052     pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3053         self.searcher.find(haystack.as_ref())
3054     }
3055 }
3056 
3057 /// A single substring reverse searcher fixed to a particular needle.
3058 ///
3059 /// The purpose of this type is to permit callers to construct a substring
3060 /// searcher that can be used to search haystacks without the overhead of
3061 /// constructing the searcher in the first place. This is a somewhat niche
3062 /// concern when it's necessary to re-use the same needle to search multiple
3063 /// different haystacks with as little overhead as possible. In general, using
3064 /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3065 /// or
3066 /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3067 /// is good enough, but `FinderReverse` is useful when you can meaningfully
3068 /// observe searcher construction time in a profile.
3069 ///
3070 /// When the `std` feature is enabled, then this type has an `into_owned`
3071 /// version which permits building a `FinderReverse` that is not connected to
3072 /// the lifetime of its needle.
3073 #[derive(Clone, Debug)]
3074 pub struct FinderReverse<'a> {
3075     searcher: TwoWay<'a>,
3076 }
3077 
3078 impl<'a> FinderReverse<'a> {
3079     /// Create a new reverse finder for the given needle.
3080     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a>3081     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3082         FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) }
3083     }
3084 
3085     /// Convert this finder into its owned variant, such that it no longer
3086     /// borrows the needle.
3087     ///
3088     /// If this is already an owned finder, then this is a no-op. Otherwise,
3089     /// this copies the needle.
3090     ///
3091     /// This is only available when the `std` feature is enabled.
3092     #[cfg(feature = "std")]
3093     #[inline]
into_owned(self) -> FinderReverse<'static>3094     pub fn into_owned(self) -> FinderReverse<'static> {
3095         FinderReverse { searcher: self.searcher.into_owned() }
3096     }
3097 
3098     /// Returns the needle that this finder searches for.
3099     ///
3100     /// Note that the lifetime of the needle returned is tied to the lifetime
3101     /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3102     /// a finder's needle can be either borrowed or owned, so the lifetime of
3103     /// the needle returned must necessarily be the shorter of the two.
3104     #[inline]
needle(&self) -> &[u8]3105     pub fn needle(&self) -> &[u8] {
3106         self.searcher.needle()
3107     }
3108 
3109     /// Returns the index of the last occurrence of this needle in the given
3110     /// haystack.
3111     ///
3112     /// The haystack may be any type that can be cheaply converted into a
3113     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3114     ///
3115     /// # Complexity
3116     ///
3117     /// This routine is guaranteed to have worst case linear time complexity
3118     /// with respect to both the needle and the haystack. That is, this runs
3119     /// in `O(needle.len() + haystack.len())` time.
3120     ///
3121     /// This routine is also guaranteed to have worst case constant space
3122     /// complexity.
3123     ///
3124     /// # Examples
3125     ///
3126     /// Basic usage:
3127     ///
3128     /// ```
3129     /// use bstr::FinderReverse;
3130     ///
3131     /// let haystack = "foo bar baz";
3132     /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3133     /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3134     /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3135     /// ```
3136     #[inline]
rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>3137     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3138         self.searcher.rfind(haystack.as_ref())
3139     }
3140 }
3141 
3142 /// An iterator over non-overlapping substring matches.
3143 ///
3144 /// Matches are reported by the byte offset at which they begin.
3145 ///
3146 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3147 /// byte string being looked for.
3148 #[derive(Debug)]
3149 pub struct Find<'a> {
3150     haystack: &'a [u8],
3151     prestate: PrefilterState,
3152     searcher: TwoWay<'a>,
3153     pos: usize,
3154 }
3155 
3156 impl<'a> Find<'a> {
new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a>3157     fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
3158         let searcher = TwoWay::forward(needle);
3159         let prestate = searcher.prefilter_state();
3160         Find { haystack, prestate, searcher, pos: 0 }
3161     }
3162 }
3163 
3164 impl<'a> Iterator for Find<'a> {
3165     type Item = usize;
3166 
3167     #[inline]
next(&mut self) -> Option<usize>3168     fn next(&mut self) -> Option<usize> {
3169         if self.pos > self.haystack.len() {
3170             return None;
3171         }
3172         let result = self
3173             .searcher
3174             .find_with(&mut self.prestate, &self.haystack[self.pos..]);
3175         match result {
3176             None => None,
3177             Some(i) => {
3178                 let pos = self.pos + i;
3179                 self.pos = pos + cmp::max(1, self.searcher.needle().len());
3180                 Some(pos)
3181             }
3182         }
3183     }
3184 }
3185 
3186 /// An iterator over non-overlapping substring matches in reverse.
3187 ///
3188 /// Matches are reported by the byte offset at which they begin.
3189 ///
3190 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3191 /// byte string being looked for.
3192 #[derive(Debug)]
3193 pub struct FindReverse<'a> {
3194     haystack: &'a [u8],
3195     prestate: PrefilterState,
3196     searcher: TwoWay<'a>,
3197     /// When searching with an empty needle, this gets set to `None` after
3198     /// we've yielded the last element at `0`.
3199     pos: Option<usize>,
3200 }
3201 
3202 impl<'a> FindReverse<'a> {
new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a>3203     fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
3204         let searcher = TwoWay::reverse(needle);
3205         let prestate = searcher.prefilter_state();
3206         let pos = Some(haystack.len());
3207         FindReverse { haystack, prestate, searcher, pos }
3208     }
3209 
haystack(&self) -> &'a [u8]3210     fn haystack(&self) -> &'a [u8] {
3211         self.haystack
3212     }
3213 
needle(&self) -> &[u8]3214     fn needle(&self) -> &[u8] {
3215         self.searcher.needle()
3216     }
3217 }
3218 
3219 impl<'a> Iterator for FindReverse<'a> {
3220     type Item = usize;
3221 
3222     #[inline]
next(&mut self) -> Option<usize>3223     fn next(&mut self) -> Option<usize> {
3224         let pos = match self.pos {
3225             None => return None,
3226             Some(pos) => pos,
3227         };
3228         let result = self
3229             .searcher
3230             .rfind_with(&mut self.prestate, &self.haystack[..pos]);
3231         match result {
3232             None => None,
3233             Some(i) => {
3234                 if pos == i {
3235                     self.pos = pos.checked_sub(1);
3236                 } else {
3237                     self.pos = Some(i);
3238                 }
3239                 Some(i)
3240             }
3241         }
3242     }
3243 }
3244 
3245 /// An iterator over the bytes in a byte string.
3246 ///
3247 /// `'a` is the lifetime of the byte string being traversed.
3248 #[derive(Clone, Debug)]
3249 pub struct Bytes<'a> {
3250     it: slice::Iter<'a, u8>,
3251 }
3252 
3253 impl<'a> Bytes<'a> {
3254     /// Views the remaining underlying data as a subslice of the original data.
3255     /// This has the same lifetime as the original slice,
3256     /// and so the iterator can continue to be used while this exists.
3257     #[inline]
as_slice(&self) -> &'a [u8]3258     pub fn as_slice(&self) -> &'a [u8] {
3259         self.it.as_slice()
3260     }
3261 }
3262 
3263 impl<'a> Iterator for Bytes<'a> {
3264     type Item = u8;
3265 
3266     #[inline]
next(&mut self) -> Option<u8>3267     fn next(&mut self) -> Option<u8> {
3268         self.it.next().map(|&b| b)
3269     }
3270 
3271     #[inline]
size_hint(&self) -> (usize, Option<usize>)3272     fn size_hint(&self) -> (usize, Option<usize>) {
3273         self.it.size_hint()
3274     }
3275 }
3276 
3277 impl<'a> DoubleEndedIterator for Bytes<'a> {
3278     #[inline]
next_back(&mut self) -> Option<u8>3279     fn next_back(&mut self) -> Option<u8> {
3280         self.it.next_back().map(|&b| b)
3281     }
3282 }
3283 
3284 impl<'a> ExactSizeIterator for Bytes<'a> {
3285     #[inline]
len(&self) -> usize3286     fn len(&self) -> usize {
3287         self.it.len()
3288     }
3289 }
3290 
3291 impl<'a> iter::FusedIterator for Bytes<'a> {}
3292 
3293 /// An iterator over the fields in a byte string, separated by whitespace.
3294 ///
3295 /// This iterator splits on contiguous runs of whitespace, such that the fields
3296 /// in `foo\t\t\n  \nbar` are `foo` and `bar`.
3297 ///
3298 /// `'a` is the lifetime of the byte string being split.
3299 #[derive(Debug)]
3300 pub struct Fields<'a> {
3301     it: FieldsWith<'a, fn(char) -> bool>,
3302 }
3303 
3304 impl<'a> Fields<'a> {
new(bytes: &'a [u8]) -> Fields<'a>3305     fn new(bytes: &'a [u8]) -> Fields<'a> {
3306         Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3307     }
3308 }
3309 
3310 impl<'a> Iterator for Fields<'a> {
3311     type Item = &'a [u8];
3312 
3313     #[inline]
next(&mut self) -> Option<&'a [u8]>3314     fn next(&mut self) -> Option<&'a [u8]> {
3315         self.it.next()
3316     }
3317 }
3318 
3319 /// An iterator over fields in the byte string, separated by a predicate over
3320 /// codepoints.
3321 ///
3322 /// This iterator splits a byte string based on its predicate function such
3323 /// that the elements returned are separated by contiguous runs of codepoints
3324 /// for which the predicate returns true.
3325 ///
3326 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3327 /// of the predicate, i.e., `FnMut(char) -> bool`.
3328 #[derive(Debug)]
3329 pub struct FieldsWith<'a, F> {
3330     f: F,
3331     bytes: &'a [u8],
3332     chars: CharIndices<'a>,
3333 }
3334 
3335 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F>3336     fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3337         FieldsWith { f, bytes, chars: bytes.char_indices() }
3338     }
3339 }
3340 
3341 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3342     type Item = &'a [u8];
3343 
3344     #[inline]
next(&mut self) -> Option<&'a [u8]>3345     fn next(&mut self) -> Option<&'a [u8]> {
3346         let (start, mut end);
3347         loop {
3348             match self.chars.next() {
3349                 None => return None,
3350                 Some((s, e, ch)) => {
3351                     if !(self.f)(ch) {
3352                         start = s;
3353                         end = e;
3354                         break;
3355                     }
3356                 }
3357             }
3358         }
3359         while let Some((_, e, ch)) = self.chars.next() {
3360             if (self.f)(ch) {
3361                 break;
3362             }
3363             end = e;
3364         }
3365         Some(&self.bytes[start..end])
3366     }
3367 }
3368 
3369 /// An iterator over substrings in a byte string, split by a separator.
3370 ///
3371 /// `'a` is the lifetime of the byte string being split.
3372 #[derive(Debug)]
3373 pub struct Split<'a> {
3374     finder: Find<'a>,
3375     /// The end position of the previous match of our splitter. The element
3376     /// we yield corresponds to the substring starting at `last` up to the
3377     /// beginning of the next match of the splitter.
3378     last: usize,
3379     /// Only set when iteration is complete. A corner case here is when a
3380     /// splitter is matched at the end of the haystack. At that point, we still
3381     /// need to yield an empty string following it.
3382     done: bool,
3383 }
3384 
3385 impl<'a> Split<'a> {
new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a>3386     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
3387         let finder = haystack.find_iter(splitter);
3388         Split { finder, last: 0, done: false }
3389     }
3390 }
3391 
3392 impl<'a> Iterator for Split<'a> {
3393     type Item = &'a [u8];
3394 
3395     #[inline]
next(&mut self) -> Option<&'a [u8]>3396     fn next(&mut self) -> Option<&'a [u8]> {
3397         let haystack = self.finder.haystack;
3398         match self.finder.next() {
3399             Some(start) => {
3400                 let next = &haystack[self.last..start];
3401                 self.last = start + self.finder.searcher.needle().len();
3402                 Some(next)
3403             }
3404             None => {
3405                 if self.last >= haystack.len() {
3406                     if !self.done {
3407                         self.done = true;
3408                         Some(b"")
3409                     } else {
3410                         None
3411                     }
3412                 } else {
3413                     let s = &haystack[self.last..];
3414                     self.last = haystack.len();
3415                     self.done = true;
3416                     Some(s)
3417                 }
3418             }
3419         }
3420     }
3421 }
3422 
3423 /// An iterator over substrings in a byte string, split by a separator, in
3424 /// reverse.
3425 ///
3426 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3427 /// of the predicate, i.e., `FnMut(char) -> bool`.
3428 #[derive(Debug)]
3429 pub struct SplitReverse<'a> {
3430     finder: FindReverse<'a>,
3431     /// The end position of the previous match of our splitter. The element
3432     /// we yield corresponds to the substring starting at `last` up to the
3433     /// beginning of the next match of the splitter.
3434     last: usize,
3435     /// Only set when iteration is complete. A corner case here is when a
3436     /// splitter is matched at the end of the haystack. At that point, we still
3437     /// need to yield an empty string following it.
3438     done: bool,
3439 }
3440 
3441 impl<'a> SplitReverse<'a> {
new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a>3442     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
3443         let finder = haystack.rfind_iter(splitter);
3444         SplitReverse { finder, last: haystack.len(), done: false }
3445     }
3446 }
3447 
3448 impl<'a> Iterator for SplitReverse<'a> {
3449     type Item = &'a [u8];
3450 
3451     #[inline]
next(&mut self) -> Option<&'a [u8]>3452     fn next(&mut self) -> Option<&'a [u8]> {
3453         let haystack = self.finder.haystack();
3454         match self.finder.next() {
3455             Some(start) => {
3456                 let nlen = self.finder.needle().len();
3457                 let next = &haystack[start + nlen..self.last];
3458                 self.last = start;
3459                 Some(next)
3460             }
3461             None => {
3462                 if self.last == 0 {
3463                     if !self.done {
3464                         self.done = true;
3465                         Some(b"")
3466                     } else {
3467                         None
3468                     }
3469                 } else {
3470                     let s = &haystack[..self.last];
3471                     self.last = 0;
3472                     self.done = true;
3473                     Some(s)
3474                 }
3475             }
3476         }
3477     }
3478 }
3479 
3480 /// An iterator over at most `n` substrings in a byte string, split by a
3481 /// separator.
3482 ///
3483 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3484 /// of the predicate, i.e., `FnMut(char) -> bool`.
3485 #[derive(Debug)]
3486 pub struct SplitN<'a> {
3487     split: Split<'a>,
3488     limit: usize,
3489     count: usize,
3490 }
3491 
3492 impl<'a> SplitN<'a> {
new( haystack: &'a [u8], splitter: &'a [u8], limit: usize, ) -> SplitN<'a>3493     fn new(
3494         haystack: &'a [u8],
3495         splitter: &'a [u8],
3496         limit: usize,
3497     ) -> SplitN<'a> {
3498         let split = haystack.split_str(splitter);
3499         SplitN { split, limit, count: 0 }
3500     }
3501 }
3502 
3503 impl<'a> Iterator for SplitN<'a> {
3504     type Item = &'a [u8];
3505 
3506     #[inline]
next(&mut self) -> Option<&'a [u8]>3507     fn next(&mut self) -> Option<&'a [u8]> {
3508         self.count += 1;
3509         if self.count > self.limit || self.split.done {
3510             None
3511         } else if self.count == self.limit {
3512             Some(&self.split.finder.haystack[self.split.last..])
3513         } else {
3514             self.split.next()
3515         }
3516     }
3517 }
3518 
3519 /// An iterator over at most `n` substrings in a byte string, split by a
3520 /// separator, in reverse.
3521 ///
3522 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3523 /// of the predicate, i.e., `FnMut(char) -> bool`.
3524 #[derive(Debug)]
3525 pub struct SplitNReverse<'a> {
3526     split: SplitReverse<'a>,
3527     limit: usize,
3528     count: usize,
3529 }
3530 
3531 impl<'a> SplitNReverse<'a> {
new( haystack: &'a [u8], splitter: &'a [u8], limit: usize, ) -> SplitNReverse<'a>3532     fn new(
3533         haystack: &'a [u8],
3534         splitter: &'a [u8],
3535         limit: usize,
3536     ) -> SplitNReverse<'a> {
3537         let split = haystack.rsplit_str(splitter);
3538         SplitNReverse { split, limit, count: 0 }
3539     }
3540 }
3541 
3542 impl<'a> Iterator for SplitNReverse<'a> {
3543     type Item = &'a [u8];
3544 
3545     #[inline]
next(&mut self) -> Option<&'a [u8]>3546     fn next(&mut self) -> Option<&'a [u8]> {
3547         self.count += 1;
3548         if self.count > self.limit || self.split.done {
3549             None
3550         } else if self.count == self.limit {
3551             Some(&self.split.finder.haystack()[..self.split.last])
3552         } else {
3553             self.split.next()
3554         }
3555     }
3556 }
3557 
3558 /// An iterator over all lines in a byte string, without their terminators.
3559 ///
3560 /// For this iterator, the only line terminators recognized are `\r\n` and
3561 /// `\n`.
3562 ///
3563 /// `'a` is the lifetime of the byte string being iterated over.
3564 pub struct Lines<'a> {
3565     it: LinesWithTerminator<'a>,
3566 }
3567 
3568 impl<'a> Lines<'a> {
new(bytes: &'a [u8]) -> Lines<'a>3569     fn new(bytes: &'a [u8]) -> Lines<'a> {
3570         Lines { it: LinesWithTerminator::new(bytes) }
3571     }
3572 }
3573 
3574 impl<'a> Iterator for Lines<'a> {
3575     type Item = &'a [u8];
3576 
3577     #[inline]
next(&mut self) -> Option<&'a [u8]>3578     fn next(&mut self) -> Option<&'a [u8]> {
3579         let mut line = self.it.next()?;
3580         if line.last_byte() == Some(b'\n') {
3581             line = &line[..line.len() - 1];
3582             if line.last_byte() == Some(b'\r') {
3583                 line = &line[..line.len() - 1];
3584             }
3585         }
3586         Some(line)
3587     }
3588 }
3589 
3590 /// An iterator over all lines in a byte string, including their terminators.
3591 ///
3592 /// For this iterator, the only line terminator recognized is `\n`. (Since
3593 /// line terminators are included, this also handles `\r\n` line endings.)
3594 ///
3595 /// Line terminators are only included if they are present in the original
3596 /// byte string. For example, the last line in a byte string may not end with
3597 /// a line terminator.
3598 ///
3599 /// Concatenating all elements yielded by this iterator is guaranteed to yield
3600 /// the original byte string.
3601 ///
3602 /// `'a` is the lifetime of the byte string being iterated over.
3603 pub struct LinesWithTerminator<'a> {
3604     bytes: &'a [u8],
3605 }
3606 
3607 impl<'a> LinesWithTerminator<'a> {
new(bytes: &'a [u8]) -> LinesWithTerminator<'a>3608     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3609         LinesWithTerminator { bytes }
3610     }
3611 }
3612 
3613 impl<'a> Iterator for LinesWithTerminator<'a> {
3614     type Item = &'a [u8];
3615 
3616     #[inline]
next(&mut self) -> Option<&'a [u8]>3617     fn next(&mut self) -> Option<&'a [u8]> {
3618         match self.bytes.find_byte(b'\n') {
3619             None if self.bytes.is_empty() => None,
3620             None => {
3621                 let line = self.bytes;
3622                 self.bytes = b"";
3623                 Some(line)
3624             }
3625             Some(end) => {
3626                 let line = &self.bytes[..end + 1];
3627                 self.bytes = &self.bytes[end + 1..];
3628                 Some(line)
3629             }
3630         }
3631     }
3632 }
3633 
3634 #[cfg(test)]
3635 mod tests {
3636     use ext_slice::{ByteSlice, B};
3637     use tests::LOSSY_TESTS;
3638 
3639     #[test]
to_str_lossy()3640     fn to_str_lossy() {
3641         for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3642             let got = B(input).to_str_lossy();
3643             assert_eq!(
3644                 expected.as_bytes(),
3645                 got.as_bytes(),
3646                 "to_str_lossy(ith: {:?}, given: {:?})",
3647                 i,
3648                 input,
3649             );
3650 
3651             let mut got = String::new();
3652             B(input).to_str_lossy_into(&mut got);
3653             assert_eq!(
3654                 expected.as_bytes(),
3655                 got.as_bytes(),
3656                 "to_str_lossy_into",
3657             );
3658 
3659             let got = String::from_utf8_lossy(input);
3660             assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3661         }
3662     }
3663 
3664     #[test]
3665     #[should_panic]
copy_within_fail1()3666     fn copy_within_fail1() {
3667         let mut buf = *b"foobar";
3668         let s = &mut buf;
3669         s.copy_within_str(0..2, 5);
3670     }
3671 
3672     #[test]
3673     #[should_panic]
copy_within_fail2()3674     fn copy_within_fail2() {
3675         let mut buf = *b"foobar";
3676         let s = &mut buf;
3677         s.copy_within_str(3..2, 0);
3678     }
3679 
3680     #[test]
3681     #[should_panic]
copy_within_fail3()3682     fn copy_within_fail3() {
3683         let mut buf = *b"foobar";
3684         let s = &mut buf;
3685         s.copy_within_str(5..7, 0);
3686     }
3687 
3688     #[test]
3689     #[should_panic]
copy_within_fail4()3690     fn copy_within_fail4() {
3691         let mut buf = *b"foobar";
3692         let s = &mut buf;
3693         s.copy_within_str(0..1, 6);
3694     }
3695 }
3696