1 //! Word splitting functionality.
2 //!
3 //! To wrap text into lines, long words sometimes need to be split
4 //! across lines. The [`WordSplitter`] trait defines this
5 //! functionality. [`HyphenSplitter`] is the default implementation of
6 //! this treat: it will simply split words on existing hyphens.
7 
8 /// The `WordSplitter` trait describes where words can be split.
9 ///
10 /// If the textwrap crate has been compiled with the `hyphenation`
11 /// Cargo feature enabled, you will find an implementation of
12 /// `WordSplitter` by the `hyphenation::Standard` struct. Use this
13 /// struct for language-aware hyphenation:
14 ///
15 /// ```
16 /// #[cfg(feature = "hyphenation")]
17 /// {
18 ///     use hyphenation::{Language, Load, Standard};
19 ///     use textwrap::{wrap, Options};
20 ///
21 ///     let text = "Oxidation is the loss of electrons.";
22 ///     let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
23 ///     let options = Options::new(8).splitter(dictionary);
24 ///     assert_eq!(wrap(text, &options), vec!["Oxida-",
25 ///                                           "tion is",
26 ///                                           "the loss",
27 ///                                           "of elec-",
28 ///                                           "trons."]);
29 /// }
30 /// ```
31 ///
32 /// Please see the documentation for the [hyphenation] crate for more
33 /// details.
34 ///
35 /// [hyphenation]: https://docs.rs/hyphenation/
36 pub trait WordSplitter: std::fmt::Debug {
37     /// Return all possible indices where `word` can be split.
38     ///
39     /// The indices returned must be in range `0..word.len()`. They
40     /// should point to the index _after_ the split point, i.e., after
41     /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
42     /// will break the word into two well-formed pieces.
43     ///
44     /// # Examples
45     ///
46     /// ```
47     /// use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};
48     /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
49     /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
50     /// ```
split_points(&self, word: &str) -> Vec<usize>51     fn split_points(&self, word: &str) -> Vec<usize>;
52 }
53 
54 impl<S: WordSplitter + ?Sized> WordSplitter for Box<S> {
split_points(&self, word: &str) -> Vec<usize>55     fn split_points(&self, word: &str) -> Vec<usize> {
56         use std::ops::Deref;
57         self.deref().split_points(word)
58     }
59 }
60 
61 impl<T: ?Sized + WordSplitter> WordSplitter for &T {
split_points(&self, word: &str) -> Vec<usize>62     fn split_points(&self, word: &str) -> Vec<usize> {
63         (*self).split_points(word)
64     }
65 }
66 
67 /// Use this as a [`Options.splitter`] to avoid any kind of
68 /// hyphenation:
69 ///
70 /// ```
71 /// use textwrap::{wrap, NoHyphenation, Options};
72 ///
73 /// let options = Options::new(8).splitter(NoHyphenation);
74 /// assert_eq!(wrap("foo bar-baz", &options),
75 ///            vec!["foo", "bar-baz"]);
76 /// ```
77 ///
78 /// [`Options.splitter`]: super::Options::splitter
79 #[derive(Clone, Copy, Debug)]
80 pub struct NoHyphenation;
81 
82 /// `NoHyphenation` implements `WordSplitter` by not splitting the
83 /// word at all.
84 impl WordSplitter for NoHyphenation {
split_points(&self, _: &str) -> Vec<usize>85     fn split_points(&self, _: &str) -> Vec<usize> {
86         Vec::new()
87     }
88 }
89 
90 /// Simple and default way to split words: splitting on existing
91 /// hyphens only.
92 ///
93 /// You probably don't need to use this type since it's already used
94 /// by default by [`Options::new`](super::Options::new).
95 #[derive(Clone, Copy, Debug)]
96 pub struct HyphenSplitter;
97 
98 /// `HyphenSplitter` is the default `WordSplitter` used by
99 /// [`Options::new`](super::Options::new). It will split words on any
100 /// existing hyphens in the word.
101 ///
102 /// It will only use hyphens that are surrounded by alphanumeric
103 /// characters, which prevents a word like `"--foo-bar"` from being
104 /// split into `"--"` and `"foo-bar"`.
105 impl WordSplitter for HyphenSplitter {
split_points(&self, word: &str) -> Vec<usize>106     fn split_points(&self, word: &str) -> Vec<usize> {
107         let mut splits = Vec::new();
108 
109         for (idx, _) in word.match_indices('-') {
110             // We only use hyphens that are surrounded by alphanumeric
111             // characters. This is to avoid splitting on repeated hyphens,
112             // such as those found in --foo-bar.
113             let prev = word[..idx].chars().next_back();
114             let next = word[idx + 1..].chars().next();
115 
116             if prev.filter(|ch| ch.is_alphanumeric()).is_some()
117                 && next.filter(|ch| ch.is_alphanumeric()).is_some()
118             {
119                 splits.push(idx + 1); // +1 due to width of '-'.
120             }
121         }
122 
123         splits
124     }
125 }
126 
127 /// A hyphenation dictionary can be used to do language-specific
128 /// hyphenation using patterns from the [hyphenation] crate.
129 ///
130 /// **Note:** Only available when the `hyphenation` Cargo feature is
131 /// enabled.
132 ///
133 /// [hyphenation]: https://docs.rs/hyphenation/
134 #[cfg(feature = "hyphenation")]
135 impl WordSplitter for hyphenation::Standard {
split_points(&self, word: &str) -> Vec<usize>136     fn split_points(&self, word: &str) -> Vec<usize> {
137         use hyphenation::Hyphenator;
138         self.hyphenate(word).breaks
139     }
140 }
141