1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! Determine displayed width of `char` and `str` types according to
12 //! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
13 //! rules.
14 //!
15 //! ```rust
16 //! extern crate unicode_width;
17 //!
18 //! use unicode_width::UnicodeWidthStr;
19 //!
20 //! fn main() {
21 //!     let teststr = "Hello, world!";
22 //!     let width = UnicodeWidthStr::width(teststr);
23 //!     println!("{}", teststr);
24 //!     println!("The above string is {} columns wide.", width);
25 //!     let width = teststr.width_cjk();
26 //!     println!("The above string is {} columns wide (CJK).", width);
27 //! }
28 //! ```
29 //!
30 //! # features
31 //!
32 //! unicode-width supports a `no_std` feature. This eliminates dependence
33 //! on std, and instead uses equivalent functions from core.
34 //!
35 //! # crates.io
36 //!
37 //! You can use this package in your project by adding the following
38 //! to your `Cargo.toml`:
39 //!
40 //! ```toml
41 //! [dependencies]
42 //! unicode-width = "0.1.5"
43 //! ```
44 
45 #![deny(missing_docs, unsafe_code)]
46 #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
47        html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
48 
49 #![cfg_attr(feature = "bench", feature(test))]
50 #![no_std]
51 
52 // ANDROID: Unconditionally use std to allow building as a dylib.
53 #[macro_use]
54 extern crate std;
55 
56 #[cfg(feature = "bench")]
57 extern crate test;
58 
59 use tables::charwidth as cw;
60 pub use tables::UNICODE_VERSION;
61 
62 use core::ops::Add;
63 
64 mod tables;
65 
66 #[cfg(test)]
67 mod tests;
68 
69 /// Methods for determining displayed width of Unicode characters.
70 pub trait UnicodeWidthChar {
71     /// Returns the character's displayed width in columns, or `None` if the
72     /// character is a control character other than `'\x00'`.
73     ///
74     /// This function treats characters in the Ambiguous category according
75     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
76     /// as 1 column wide. This is consistent with the recommendations for non-CJK
77     /// contexts, or when the context cannot be reliably determined.
width(self) -> Option<usize>78     fn width(self) -> Option<usize>;
79 
80     /// Returns the character's displayed width in columns, or `None` if the
81     /// character is a control character other than `'\x00'`.
82     ///
83     /// This function treats characters in the Ambiguous category according
84     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
85     /// as 2 columns wide. This is consistent with the recommendations for
86     /// CJK contexts.
width_cjk(self) -> Option<usize>87     fn width_cjk(self) -> Option<usize>;
88 }
89 
90 impl UnicodeWidthChar for char {
91     #[inline]
width(self) -> Option<usize>92     fn width(self) -> Option<usize> { cw::width(self, false) }
93 
94     #[inline]
width_cjk(self) -> Option<usize>95     fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
96 }
97 
98 /// Methods for determining displayed width of Unicode strings.
99 pub trait UnicodeWidthStr {
100     /// Returns the string's displayed width in columns.
101     ///
102     /// Control characters are treated as having zero width.
103     ///
104     /// This function treats characters in the Ambiguous category according
105     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
106     /// as 1 column wide. This is consistent with the recommendations for
107     /// non-CJK contexts, or when the context cannot be reliably determined.
width<'a>(&'a self) -> usize108     fn width<'a>(&'a self) -> usize;
109 
110     /// Returns the string's displayed width in columns.
111     ///
112     /// Control characters are treated as having zero width.
113     ///
114     /// This function treats characters in the Ambiguous category according
115     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
116     /// as 2 column wide. This is consistent with the recommendations for
117     /// CJK contexts.
width_cjk<'a>(&'a self) -> usize118     fn width_cjk<'a>(&'a self) -> usize;
119 }
120 
121 impl UnicodeWidthStr for str {
122     #[inline]
width(&self) -> usize123     fn width(&self) -> usize {
124         self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add)
125     }
126 
127     #[inline]
width_cjk(&self) -> usize128     fn width_cjk(&self) -> usize {
129         self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add)
130     }
131 }
132