1 /*
2  * AVX2 optimized hash slide, based on Intel's slide_sse implementation
3  *
4  * Copyright (C) 2017 Intel Corporation
5  * Authors:
6  *   Arjan van de Ven   <arjan@linux.intel.com>
7  *   Jim Kukunas        <james.t.kukunas@linux.intel.com>
8  *   Mika T. Lindqvist  <postmaster@raasu.org>
9  *
10  * For conditions of distribution and use, see copyright notice in zlib.h
11  */
12 #include "../../zbuild.h"
13 #include "../../deflate.h"
14 
15 #include <immintrin.h>
16 
slide_hash_avx2(deflate_state * s)17 Z_INTERNAL void slide_hash_avx2(deflate_state *s) {
18     Pos *p;
19     unsigned n;
20     uint16_t wsize = (uint16_t)s->w_size;
21     const __m256i zmm_wsize = _mm256_set1_epi16((short)wsize);
22 
23     n = HASH_SIZE;
24     p = &s->head[n] - 16;
25     do {
26         __m256i value, result;
27 
28         value = _mm256_loadu_si256((__m256i *)p);
29         result= _mm256_subs_epu16(value, zmm_wsize);
30         _mm256_storeu_si256((__m256i *)p, result);
31         p -= 16;
32         n -= 16;
33     } while (n > 0);
34 
35     n = wsize;
36     p = &s->prev[n] - 16;
37     do {
38         __m256i value, result;
39 
40         value = _mm256_loadu_si256((__m256i *)p);
41         result= _mm256_subs_epu16(value, zmm_wsize);
42         _mm256_storeu_si256((__m256i *)p, result);
43 
44         p -= 16;
45         n -= 16;
46     } while (n > 0);
47 }
48