1;
2;  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11; TODO(johannkoenig): Add the necessary include guards to vpx_config.asm.
12; vpx_config.asm is not guarded so can not be included twice. Because this will
13; be used in conjunction with x86_abi_support.asm or x86inc.asm, it must be
14; included after those files.
15
16; Increment register by sizeof() tran_low_t * 8.
17%macro INCREMENT_TRAN_LOW 1
18%if CONFIG_VP9_HIGHBITDEPTH
19  add %1, 32
20%else
21  add %1, 16
22%endif
23%endmacro
24
25; Increment %1 by sizeof() tran_low_t * %2.
26%macro INCREMENT_ELEMENTS_TRAN_LOW 2
27%if CONFIG_VP9_HIGHBITDEPTH
28  lea %1, [%1 + %2 * 4]
29%else
30  lea %1, [%1 + %2 * 2]
31%endif
32%endmacro
33
34; Load %2 + %3 into m%1.
35; %3 is the offset in elements, not bytes.
36; If tran_low_t is 16 bits (low bit depth configuration) then load the value
37; directly. If tran_low_t is 32 bits (high bit depth configuration) then pack
38; the values down to 16 bits.
39%macro LOAD_TRAN_LOW 3
40%if CONFIG_VP9_HIGHBITDEPTH
41  mova     m%1, [%2 + (%3) * 4]
42  packssdw m%1, [%2 + (%3) * 4 + 16]
43%else
44  mova     m%1, [%2 + (%3) * 2]
45%endif
46%endmacro
47
48; Store m%1 to %2 + %3.
49; %3 is the offset in elements, not bytes.
50; If 5 arguments are provided then m%1 is corrupted.
51; If 6 arguments are provided then m%1 is preserved.
52; If tran_low_t is 16 bits (low bit depth configuration) then store the value
53; directly. If tran_low_t is 32 bits (high bit depth configuration) then sign
54; extend the values first.
55; Uses m%4-m%6 as scratch registers for high bit depth.
56%macro STORE_TRAN_LOW 5-6
57%if CONFIG_VP9_HIGHBITDEPTH
58  pxor                      m%4, m%4
59  mova                      m%5, m%1
60  %if %0 == 6
61  mova                      m%6, m%1
62  %endif
63  pcmpgtw                   m%4, m%1
64  punpcklwd                 m%5, m%4
65  %if %0 == 5
66  punpckhwd                 m%1, m%4
67  %else
68  punpckhwd                 m%6, m%4
69  %endif
70  mova     [%2 + (%3) * 4 +  0], m%5
71  %if %0 == 5
72  mova     [%2 + (%3) * 4 + 16], m%1
73  %else
74  mova     [%2 + (%3) * 4 + 16], m%6
75  %endif
76%else
77  mova          [%2 + (%3) * 2], m%1
78%endif
79%endmacro
80
81; Store zeros (in m%1) to %2 + %3.
82; %3 is the offset in elements, not bytes.
83%macro STORE_ZERO_TRAN_LOW 3
84%if CONFIG_VP9_HIGHBITDEPTH
85  mova     [%2 + (%3) * 4 +  0], m%1
86  mova     [%2 + (%3) * 4 + 16], m%1
87%else
88  mova          [%2 + (%3) * 2], m%1
89%endif
90%endmacro
91