1 // This code is unused. PCMPESTRI is gratuitously slow. I imagine it might
2 // start winning with a hypothetical memchr4 (or greater). This technique might
3 // also be good for exposing searches over ranges of bytes, but that departs
4 // from the standard memchr API, so it's not clear whether we actually want
5 // that or not.
6 //
7 // N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind
8 // of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means
9 // I don't see a way of effectively using PCMPISTRI unless there's some fast
10 // way to replace zero bytes with a byte that is not not a needle byte.
11 
12 use core::arch::x86_64::*;
13 use core::mem::size_of;
14 
15 use x86::sse2;
16 
17 const VECTOR_SIZE: usize = size_of::<__m128i>();
18 const CONTROL_ANY: i32 =
19     _SIDD_UBYTE_OPS
20     | _SIDD_CMP_EQUAL_ANY
21     | _SIDD_POSITIVE_POLARITY
22     | _SIDD_LEAST_SIGNIFICANT;
23 
24 #[target_feature(enable = "sse4.2")]
memchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8] ) -> Option<usize>25 pub unsafe fn memchr3(
26     n1: u8, n2: u8, n3: u8,
27     haystack: &[u8]
28 ) -> Option<usize> {
29     let vn1 = _mm_set1_epi8(n1 as i8);
30     let vn2 = _mm_set1_epi8(n2 as i8);
31     let vn3 = _mm_set1_epi8(n3 as i8);
32     let vn = _mm_setr_epi8(
33         n1 as i8, n2 as i8, n3 as i8, 0,
34         0, 0, 0, 0,
35         0, 0, 0, 0,
36         0, 0, 0, 0,
37     );
38     let len = haystack.len();
39     let start_ptr = haystack.as_ptr();
40     let end_ptr = haystack[haystack.len()..].as_ptr();
41     let mut ptr = start_ptr;
42 
43     if haystack.len() < VECTOR_SIZE {
44         while ptr < end_ptr {
45             if *ptr == n1 || *ptr == n2 || *ptr == n3 {
46                 return Some(sub(ptr, start_ptr));
47             }
48             ptr = ptr.offset(1);
49         }
50         return None;
51     }
52     while ptr <= end_ptr.sub(VECTOR_SIZE) {
53         let chunk = _mm_loadu_si128(ptr as *const __m128i);
54         let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY);
55         if res < 16 {
56             return Some(sub(ptr, start_ptr) + res as usize);
57         }
58         ptr = ptr.add(VECTOR_SIZE);
59     }
60     if ptr < end_ptr {
61         debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
62         ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
63         debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
64 
65         return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
66     }
67     None
68 }
69 
70 /// Subtract `b` from `a` and return the difference. `a` should be greater than
71 /// or equal to `b`.
sub(a: *const u8, b: *const u8) -> usize72 fn sub(a: *const u8, b: *const u8) -> usize {
73     debug_assert!(a >= b);
74     (a as usize) - (b as usize)
75 }
76