1 /* compare258_sse.c -- SSE4.2 version of compare258
2  *
3  * Copyright (C) 2013 Intel Corporation. All rights reserved.
4  * Authors:
5  *  Wajdi Feghali   <wajdi.k.feghali@intel.com>
6  *  Jim Guilford    <james.guilford@intel.com>
7  *  Vinodh Gopal    <vinodh.gopal@intel.com>
8  *     Erdinc Ozturk   <erdinc.ozturk@intel.com>
9  *  Jim Kukunas     <james.t.kukunas@linux.intel.com>
10  *
11  * Portions are Copyright (C) 2016 12Sided Technology, LLC.
12  * Author:
13  *  Phil Vachon     <pvachon@12sidedtech.com>
14  *
15  * For conditions of distribution and use, see copyright notice in zlib.h
16  */
17 
18 #include "../../zbuild.h"
19 #include "../../zutil.h"
20 
21 #ifdef X86_SSE42_CMP_STR
22 
23 #include <immintrin.h>
24 #ifdef _MSC_VER
25 #  include <nmmintrin.h>
26 #endif
27 
28 /* UNALIGNED_OK, SSE4.2 intrinsic comparison */
compare256_unaligned_sse4_static(const unsigned char * src0,const unsigned char * src1)29 static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
30     uint32_t len = 0;
31 
32     do {
33         #define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
34         __m128i xmm_src0, xmm_src1;
35         uint32_t ret;
36 
37         xmm_src0 = _mm_loadu_si128((__m128i *)src0);
38         xmm_src1 = _mm_loadu_si128((__m128i *)src1);
39         ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
40         if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
41             return len + ret;
42         }
43         src0 += 16, src1 += 16, len += 16;
44 
45         xmm_src0 = _mm_loadu_si128((__m128i *)src0);
46         xmm_src1 = _mm_loadu_si128((__m128i *)src1);
47         ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
48         if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
49             return len + ret;
50         }
51         src0 += 16, src1 += 16, len += 16;
52     } while (len < 256);
53 
54     return 256;
55 }
56 
compare258_unaligned_sse4_static(const unsigned char * src0,const unsigned char * src1)57 static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
58     if (*(uint16_t *)src0 != *(uint16_t *)src1)
59         return (*src0 == *src1);
60 
61     return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
62 }
63 
compare258_unaligned_sse4(const unsigned char * src0,const unsigned char * src1)64 Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
65     return compare258_unaligned_sse4_static(src0, src1);
66 }
67 
68 #define LONGEST_MATCH   longest_match_unaligned_sse4
69 #define COMPARE256      compare256_unaligned_sse4_static
70 #define COMPARE258      compare258_unaligned_sse4_static
71 
72 #include "match_tpl.h"
73 
74 #endif
75