1 use core::fmt::Debug;
2 use core::hash::Hash;
3 use core::mem::size_of;
4 
5 use byteorder::{ByteOrder, NativeEndian};
6 
7 #[cfg(feature = "std")]
8 pub use self::std::*;
9 
10 #[cfg(feature = "std")]
11 mod std {
12     use byteorder::ByteOrder;
13     use core::mem::size_of;
14     use error::{Error, Result};
15 
16     use super::StateID;
17 
18     /// Check that the premultiplication of the given state identifier can
19     /// fit into the representation indicated by `S`. If it cannot, or if it
20     /// overflows `usize` itself, then an error is returned.
premultiply_overflow_error<S: StateID>( last_state: S, alphabet_len: usize, ) -> Result<()>21     pub fn premultiply_overflow_error<S: StateID>(
22         last_state: S,
23         alphabet_len: usize,
24     ) -> Result<()> {
25         let requested = match last_state.to_usize().checked_mul(alphabet_len) {
26             Some(requested) => requested,
27             None => return Err(Error::premultiply_overflow(0, 0)),
28         };
29         if requested > S::max_id() {
30             return Err(Error::premultiply_overflow(S::max_id(), requested));
31         }
32         Ok(())
33     }
34 
35     /// Allocate the next sequential identifier for a fresh state given
36     /// the previously constructed state identified by `current`. If the
37     /// next sequential identifier would overflow `usize` or the chosen
38     /// representation indicated by `S`, then an error is returned.
next_state_id<S: StateID>(current: S) -> Result<S>39     pub fn next_state_id<S: StateID>(current: S) -> Result<S> {
40         let next = match current.to_usize().checked_add(1) {
41             Some(next) => next,
42             None => return Err(Error::state_id_overflow(::std::usize::MAX)),
43         };
44         if next > S::max_id() {
45             return Err(Error::state_id_overflow(S::max_id()));
46         }
47         Ok(S::from_usize(next))
48     }
49 
50     /// Convert the given `usize` to the chosen state identifier
51     /// representation. If the given value cannot fit in the chosen
52     /// representation, then an error is returned.
usize_to_state_id<S: StateID>(value: usize) -> Result<S>53     pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
54         if value > S::max_id() {
55             Err(Error::state_id_overflow(S::max_id()))
56         } else {
57             Ok(S::from_usize(value))
58         }
59     }
60 
61     /// Write the given identifier to the given slice of bytes using the
62     /// specified endianness. The given slice must have length at least
63     /// `size_of::<S>()`.
64     ///
65     /// The given state identifier representation must have size 1, 2, 4 or 8.
write_state_id_bytes<E: ByteOrder, S: StateID>( slice: &mut [u8], id: S, )66     pub fn write_state_id_bytes<E: ByteOrder, S: StateID>(
67         slice: &mut [u8],
68         id: S,
69     ) {
70         assert!(
71             1 == size_of::<S>()
72                 || 2 == size_of::<S>()
73                 || 4 == size_of::<S>()
74                 || 8 == size_of::<S>()
75         );
76 
77         match size_of::<S>() {
78             1 => slice[0] = id.to_usize() as u8,
79             2 => E::write_u16(slice, id.to_usize() as u16),
80             4 => E::write_u32(slice, id.to_usize() as u32),
81             8 => E::write_u64(slice, id.to_usize() as u64),
82             _ => unreachable!(),
83         }
84     }
85 }
86 
87 /// Return the unique identifier for a DFA's dead state in the chosen
88 /// representation indicated by `S`.
dead_id<S: StateID>() -> S89 pub fn dead_id<S: StateID>() -> S {
90     S::from_usize(0)
91 }
92 
93 /// A trait describing the representation of a DFA's state identifier.
94 ///
95 /// The purpose of this trait is to safely express both the possible state
96 /// identifier representations that can be used in a DFA and to convert between
97 /// state identifier representations and types that can be used to efficiently
98 /// index memory (such as `usize`).
99 ///
100 /// In general, one should not need to implement this trait explicitly. In
101 /// particular, this crate provides implementations for `u8`, `u16`, `u32`,
102 /// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can
103 /// represent all corresponding values in a `usize`.)
104 ///
105 /// # Safety
106 ///
107 /// This trait is unsafe because the correctness of its implementations may be
108 /// relied upon by other unsafe code. For example, one possible way to
109 /// implement this trait incorrectly would be to return a maximum identifier
110 /// in `max_id` that is greater than the real maximum identifier. This will
111 /// likely result in wrap-on-overflow semantics in release mode, which can in
112 /// turn produce incorrect state identifiers. Those state identifiers may then
113 /// in turn access out-of-bounds memory in a DFA's search routine, where bounds
114 /// checks are explicitly elided for performance reasons.
115 pub unsafe trait StateID:
116     Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
117 {
118     /// Convert from a `usize` to this implementation's representation.
119     ///
120     /// Implementors may assume that `n <= Self::max_id`. That is, implementors
121     /// do not need to check whether `n` can fit inside this implementation's
122     /// representation.
from_usize(n: usize) -> Self123     fn from_usize(n: usize) -> Self;
124 
125     /// Convert this implementation's representation to a `usize`.
126     ///
127     /// Implementors must not return a `usize` value greater than
128     /// `Self::max_id` and must not permit overflow when converting between the
129     /// implementor's representation and `usize`. In general, the preferred
130     /// way for implementors to achieve this is to simply not provide
131     /// implementations of `StateID` that cannot fit into the target platform's
132     /// `usize`.
to_usize(self) -> usize133     fn to_usize(self) -> usize;
134 
135     /// Return the maximum state identifier supported by this representation.
136     ///
137     /// Implementors must return a correct bound. Doing otherwise may result
138     /// in memory unsafety.
max_id() -> usize139     fn max_id() -> usize;
140 
141     /// Read a single state identifier from the given slice of bytes in native
142     /// endian format.
143     ///
144     /// Implementors may assume that the given slice has length at least
145     /// `size_of::<Self>()`.
read_bytes(slice: &[u8]) -> Self146     fn read_bytes(slice: &[u8]) -> Self;
147 
148     /// Write this state identifier to the given slice of bytes in native
149     /// endian format.
150     ///
151     /// Implementors may assume that the given slice has length at least
152     /// `size_of::<Self>()`.
write_bytes(self, slice: &mut [u8])153     fn write_bytes(self, slice: &mut [u8]);
154 }
155 
156 unsafe impl StateID for usize {
157     #[inline]
from_usize(n: usize) -> usize158     fn from_usize(n: usize) -> usize {
159         n
160     }
161 
162     #[inline]
to_usize(self) -> usize163     fn to_usize(self) -> usize {
164         self
165     }
166 
167     #[inline]
max_id() -> usize168     fn max_id() -> usize {
169         ::core::usize::MAX
170     }
171 
172     #[inline]
read_bytes(slice: &[u8]) -> Self173     fn read_bytes(slice: &[u8]) -> Self {
174         NativeEndian::read_uint(slice, size_of::<usize>()) as usize
175     }
176 
177     #[inline]
write_bytes(self, slice: &mut [u8])178     fn write_bytes(self, slice: &mut [u8]) {
179         NativeEndian::write_uint(slice, self as u64, size_of::<usize>())
180     }
181 }
182 
183 unsafe impl StateID for u8 {
184     #[inline]
from_usize(n: usize) -> u8185     fn from_usize(n: usize) -> u8 {
186         n as u8
187     }
188 
189     #[inline]
to_usize(self) -> usize190     fn to_usize(self) -> usize {
191         self as usize
192     }
193 
194     #[inline]
max_id() -> usize195     fn max_id() -> usize {
196         ::core::u8::MAX as usize
197     }
198 
199     #[inline]
read_bytes(slice: &[u8]) -> Self200     fn read_bytes(slice: &[u8]) -> Self {
201         slice[0]
202     }
203 
204     #[inline]
write_bytes(self, slice: &mut [u8])205     fn write_bytes(self, slice: &mut [u8]) {
206         slice[0] = self;
207     }
208 }
209 
210 unsafe impl StateID for u16 {
211     #[inline]
from_usize(n: usize) -> u16212     fn from_usize(n: usize) -> u16 {
213         n as u16
214     }
215 
216     #[inline]
to_usize(self) -> usize217     fn to_usize(self) -> usize {
218         self as usize
219     }
220 
221     #[inline]
max_id() -> usize222     fn max_id() -> usize {
223         ::core::u16::MAX as usize
224     }
225 
226     #[inline]
read_bytes(slice: &[u8]) -> Self227     fn read_bytes(slice: &[u8]) -> Self {
228         NativeEndian::read_u16(slice)
229     }
230 
231     #[inline]
write_bytes(self, slice: &mut [u8])232     fn write_bytes(self, slice: &mut [u8]) {
233         NativeEndian::write_u16(slice, self)
234     }
235 }
236 
237 #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
238 unsafe impl StateID for u32 {
239     #[inline]
from_usize(n: usize) -> u32240     fn from_usize(n: usize) -> u32 {
241         n as u32
242     }
243 
244     #[inline]
to_usize(self) -> usize245     fn to_usize(self) -> usize {
246         self as usize
247     }
248 
249     #[inline]
max_id() -> usize250     fn max_id() -> usize {
251         ::core::u32::MAX as usize
252     }
253 
254     #[inline]
read_bytes(slice: &[u8]) -> Self255     fn read_bytes(slice: &[u8]) -> Self {
256         NativeEndian::read_u32(slice)
257     }
258 
259     #[inline]
write_bytes(self, slice: &mut [u8])260     fn write_bytes(self, slice: &mut [u8]) {
261         NativeEndian::write_u32(slice, self)
262     }
263 }
264 
265 #[cfg(target_pointer_width = "64")]
266 unsafe impl StateID for u64 {
267     #[inline]
from_usize(n: usize) -> u64268     fn from_usize(n: usize) -> u64 {
269         n as u64
270     }
271 
272     #[inline]
to_usize(self) -> usize273     fn to_usize(self) -> usize {
274         self as usize
275     }
276 
277     #[inline]
max_id() -> usize278     fn max_id() -> usize {
279         ::core::u64::MAX as usize
280     }
281 
282     #[inline]
read_bytes(slice: &[u8]) -> Self283     fn read_bytes(slice: &[u8]) -> Self {
284         NativeEndian::read_u64(slice)
285     }
286 
287     #[inline]
write_bytes(self, slice: &mut [u8])288     fn write_bytes(self, slice: &mut [u8]) {
289         NativeEndian::write_u64(slice, self)
290     }
291 }
292