1 /* Copyright 2018 The Chromium Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the Chromium source repository LICENSE file.
4 */
5 #ifndef __SLIDE_HASH__NEON__
6 #define __SLIDE_HASH__NEON__
7
8 #include "deflate.h"
9 #include <arm_neon.h>
10
neon_slide_hash_update(Posf * hash,const uInt hash_size,const ush w_size)11 inline static void ZLIB_INTERNAL neon_slide_hash_update(Posf *hash,
12 const uInt hash_size,
13 const ush w_size)
14 {
15 /* NEON 'Q' registers allow to store 128 bits, so we can load 8x16-bits
16 * values. For further details, check:
17 * ARM DHT 0002A, section 1.3.2 NEON Registers.
18 */
19 const size_t chunk = sizeof(uint16x8_t) / sizeof(uint16_t);
20 /* Unrolling the operation yielded a compression performance boost in both
21 * ARMv7 (from 11.7% to 13.4%) and ARMv8 (from 3.7% to 7.5%) for HTML4
22 * content. For full benchmarking data, check: http://crbug.com/863257.
23 */
24 const size_t stride = 2*chunk;
25 const uint16x8_t v = vdupq_n_u16(w_size);
26
27 for (Posf *end = hash + hash_size; hash != end; hash += stride) {
28 uint16x8_t m_low = vld1q_u16(hash);
29 uint16x8_t m_high = vld1q_u16(hash + chunk);
30
31 /* The first 'q' in vqsubq_u16 makes these subtracts saturate to zero,
32 * replacing the ternary operator expression in the original code:
33 * (m >= wsize ? m - wsize : NIL).
34 */
35 m_low = vqsubq_u16(m_low, v);
36 m_high = vqsubq_u16(m_high, v);
37
38 vst1q_u16(hash, m_low);
39 vst1q_u16(hash + chunk, m_high);
40 }
41 }
42
43
neon_slide_hash(Posf * head,Posf * prev,const unsigned short w_size,const uInt hash_size)44 inline static void ZLIB_INTERNAL neon_slide_hash(Posf *head, Posf *prev,
45 const unsigned short w_size,
46 const uInt hash_size)
47 {
48 /*
49 * SIMD implementation for hash table rebase assumes:
50 * 1. hash chain offset (Pos) is 2 bytes.
51 * 2. hash table size is multiple of 32 bytes.
52 * #1 should be true as Pos is defined as "ush"
53 * #2 should be true as hash_bits are greater than 7
54 */
55 const size_t size = hash_size * sizeof(head[0]);
56 Assert(sizeof(Pos) == 2, "Wrong Pos size.");
57 Assert((size % sizeof(uint16x8_t) * 2) == 0, "Hash table size error.");
58
59 neon_slide_hash_update(head, hash_size, w_size);
60 #ifndef FASTEST
61 neon_slide_hash_update(prev, w_size, w_size);
62 #endif
63 }
64
65 #endif
66