1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12%include "vpx_config.asm"
13
14; 32/64 bit compatibility macros
15;
16; In general, we make the source use 64 bit syntax, then twiddle with it using
17; the preprocessor to get the 32 bit syntax on 32 bit platforms.
18;
19%ifidn __OUTPUT_FORMAT__,elf32
20%define ABI_IS_32BIT 1
21%elifidn __OUTPUT_FORMAT__,macho32
22%define ABI_IS_32BIT 1
23%elifidn __OUTPUT_FORMAT__,win32
24%define ABI_IS_32BIT 1
25%elifidn __OUTPUT_FORMAT__,aout
26%define ABI_IS_32BIT 1
27%else
28%define ABI_IS_32BIT 0
29%endif
30
31%if ABI_IS_32BIT
32%define rax eax
33%define rbx ebx
34%define rcx ecx
35%define rdx edx
36%define rsi esi
37%define rdi edi
38%define rsp esp
39%define rbp ebp
40%define movsxd mov
41%macro movq 2
42  %ifidn %1,eax
43    movd %1,%2
44  %elifidn %2,eax
45    movd %1,%2
46  %elifidn %1,ebx
47    movd %1,%2
48  %elifidn %2,ebx
49    movd %1,%2
50  %elifidn %1,ecx
51    movd %1,%2
52  %elifidn %2,ecx
53    movd %1,%2
54  %elifidn %1,edx
55    movd %1,%2
56  %elifidn %2,edx
57    movd %1,%2
58  %elifidn %1,esi
59    movd %1,%2
60  %elifidn %2,esi
61    movd %1,%2
62  %elifidn %1,edi
63    movd %1,%2
64  %elifidn %2,edi
65    movd %1,%2
66  %elifidn %1,esp
67    movd %1,%2
68  %elifidn %2,esp
69    movd %1,%2
70  %elifidn %1,ebp
71    movd %1,%2
72  %elifidn %2,ebp
73    movd %1,%2
74  %else
75    movq %1,%2
76  %endif
77%endmacro
78%endif
79
80
81; LIBVPX_YASM_WIN64
82; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64
83; or win64 is defined on the Yasm command line.
84%ifidn __OUTPUT_FORMAT__,win64
85%define LIBVPX_YASM_WIN64 1
86%elifidn __OUTPUT_FORMAT__,x64
87%define LIBVPX_YASM_WIN64 1
88%else
89%define LIBVPX_YASM_WIN64 0
90%endif
91
92; sym()
93; Return the proper symbol name for the target ABI.
94;
95; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
96; with C linkage be prefixed with an underscore.
97;
98%ifidn   __OUTPUT_FORMAT__,elf32
99%define sym(x) x
100%elifidn __OUTPUT_FORMAT__,elf64
101%define sym(x) x
102%elifidn __OUTPUT_FORMAT__,elfx32
103%define sym(x) x
104%elif LIBVPX_YASM_WIN64
105%define sym(x) x
106%else
107%define sym(x) _ %+ x
108%endif
109
110;  PRIVATE
111;  Macro for the attribute to hide a global symbol for the target ABI.
112;  This is only active if CHROMIUM is defined.
113;
114;  Chromium doesn't like exported global symbols due to symbol clashing with
115;  plugins among other things.
116;
117;  Requires Chromium's patched copy of yasm:
118;    http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
119;    http://www.tortall.net/projects/yasm/ticket/236
120;
121%ifdef CHROMIUM
122  %ifidn   __OUTPUT_FORMAT__,elf32
123    %define PRIVATE :hidden
124  %elifidn __OUTPUT_FORMAT__,elf64
125    %define PRIVATE :hidden
126  %elifidn __OUTPUT_FORMAT__,elfx32
127    %define PRIVATE :hidden
128  %elif LIBVPX_YASM_WIN64
129    %define PRIVATE
130  %else
131    %define PRIVATE :private_extern
132  %endif
133%else
134  %define PRIVATE
135%endif
136
137; arg()
138; Return the address specification of the given argument
139;
140%if ABI_IS_32BIT
141  %define arg(x) [ebp+8+4*x]
142%else
143  ; 64 bit ABI passes arguments in registers. This is a workaround to get up
144  ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
145  %if LIBVPX_YASM_WIN64
146    %define arg(x) [rbp+16+8*x]
147  %else
148    %define arg(x) [rbp-8-8*x]
149  %endif
150%endif
151
152; REG_SZ_BYTES, REG_SZ_BITS
153; Size of a register
154%if ABI_IS_32BIT
155%define REG_SZ_BYTES 4
156%define REG_SZ_BITS  32
157%else
158%define REG_SZ_BYTES 8
159%define REG_SZ_BITS  64
160%endif
161
162
163; ALIGN_STACK <alignment> <register>
164; This macro aligns the stack to the given alignment (in bytes). The stack
165; is left such that the previous value of the stack pointer is the first
166; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
167; This macro uses one temporary register, which is not preserved, and thus
168; must be specified as an argument.
169%macro ALIGN_STACK 2
170    mov         %2, rsp
171    and         rsp, -%1
172    lea         rsp, [rsp - (%1 - REG_SZ_BYTES)]
173    push        %2
174%endmacro
175
176
177;
178; The Microsoft assembler tries to impose a certain amount of type safety in
179; its register usage. YASM doesn't recognize these directives, so we just
180; %define them away to maintain as much compatibility as possible with the
181; original inline assembler we're porting from.
182;
183%idefine PTR
184%idefine XMMWORD
185%idefine MMWORD
186
187; PIC macros
188;
189%if ABI_IS_32BIT
190  %if CONFIG_PIC=1
191  %ifidn __OUTPUT_FORMAT__,elf32
192    %define GET_GOT_SAVE_ARG 1
193    %define WRT_PLT wrt ..plt
194    %macro GET_GOT 1
195      extern _GLOBAL_OFFSET_TABLE_
196      push %1
197      call %%get_got
198      %%sub_offset:
199      jmp %%exitGG
200      %%get_got:
201      mov %1, [esp]
202      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
203      ret
204      %%exitGG:
205      %undef GLOBAL
206      %define GLOBAL(x) x + %1 wrt ..gotoff
207      %undef RESTORE_GOT
208      %define RESTORE_GOT pop %1
209    %endmacro
210  %elifidn __OUTPUT_FORMAT__,macho32
211    %define GET_GOT_SAVE_ARG 1
212    %macro GET_GOT 1
213      push %1
214      call %%get_got
215      %%get_got:
216      pop  %1
217      %undef GLOBAL
218      %define GLOBAL(x) x + %1 - %%get_got
219      %undef RESTORE_GOT
220      %define RESTORE_GOT pop %1
221    %endmacro
222  %endif
223  %endif
224
225  %ifdef CHROMIUM
226    %ifidn __OUTPUT_FORMAT__,macho32
227      %define HIDDEN_DATA(x) x:private_extern
228    %else
229      %define HIDDEN_DATA(x) x
230    %endif
231  %else
232    %define HIDDEN_DATA(x) x
233  %endif
234%else
235  %macro GET_GOT 1
236  %endmacro
237  %define GLOBAL(x) rel x
238  %ifidn __OUTPUT_FORMAT__,elf64
239    %define WRT_PLT wrt ..plt
240    %define HIDDEN_DATA(x) x:data hidden
241  %elifidn __OUTPUT_FORMAT__,elfx32
242    %define WRT_PLT wrt ..plt
243    %define HIDDEN_DATA(x) x:data hidden
244  %elifidn __OUTPUT_FORMAT__,macho64
245    %ifdef CHROMIUM
246      %define HIDDEN_DATA(x) x:private_extern
247    %else
248      %define HIDDEN_DATA(x) x
249    %endif
250  %else
251    %define HIDDEN_DATA(x) x
252  %endif
253%endif
254%ifnmacro GET_GOT
255    %macro GET_GOT 1
256    %endmacro
257    %define GLOBAL(x) x
258%endif
259%ifndef RESTORE_GOT
260%define RESTORE_GOT
261%endif
262%ifndef WRT_PLT
263%define WRT_PLT
264%endif
265
266%if ABI_IS_32BIT
267  %macro SHADOW_ARGS_TO_STACK 1
268  %endm
269  %define UNSHADOW_ARGS
270%else
271%if LIBVPX_YASM_WIN64
272  %macro SHADOW_ARGS_TO_STACK 1 ; argc
273    %if %1 > 0
274        mov arg(0),rcx
275    %endif
276    %if %1 > 1
277        mov arg(1),rdx
278    %endif
279    %if %1 > 2
280        mov arg(2),r8
281    %endif
282    %if %1 > 3
283        mov arg(3),r9
284    %endif
285  %endm
286%else
287  %macro SHADOW_ARGS_TO_STACK 1 ; argc
288    %if %1 > 0
289        push rdi
290    %endif
291    %if %1 > 1
292        push rsi
293    %endif
294    %if %1 > 2
295        push rdx
296    %endif
297    %if %1 > 3
298        push rcx
299    %endif
300    %if %1 > 4
301        push r8
302    %endif
303    %if %1 > 5
304        push r9
305    %endif
306    %if %1 > 6
307      %assign i %1-6
308      %assign off 16
309      %rep i
310        mov rax,[rbp+off]
311        push rax
312        %assign off off+8
313      %endrep
314    %endif
315  %endm
316%endif
317  %define UNSHADOW_ARGS mov rsp, rbp
318%endif
319
320; Win64 ABI requires that XMM6:XMM15 are callee saved
321; SAVE_XMM n, [u]
322; store registers 6-n on the stack
323; if u is specified, use unaligned movs.
324; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
325; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
326; but in some cases this is not done and unaligned movs must be used.
327%if LIBVPX_YASM_WIN64
328%macro SAVE_XMM 1-2 a
329  %if %1 < 6
330    %error Only xmm registers 6-15 must be preserved
331  %else
332    %assign last_xmm %1
333    %define movxmm movdq %+ %2
334    %assign xmm_stack_space ((last_xmm - 5) * 16)
335    sub rsp, xmm_stack_space
336    %assign i 6
337    %rep (last_xmm - 5)
338      movxmm [rsp + ((i - 6) * 16)], xmm %+ i
339      %assign i i+1
340    %endrep
341  %endif
342%endmacro
343%macro RESTORE_XMM 0
344  %ifndef last_xmm
345    %error RESTORE_XMM must be paired with SAVE_XMM n
346  %else
347    %assign i last_xmm
348    %rep (last_xmm - 5)
349      movxmm xmm %+ i, [rsp +((i - 6) * 16)]
350      %assign i i-1
351    %endrep
352    add rsp, xmm_stack_space
353    ; there are a couple functions which return from multiple places.
354    ; otherwise, we could uncomment these:
355    ; %undef last_xmm
356    ; %undef xmm_stack_space
357    ; %undef movxmm
358  %endif
359%endmacro
360%else
361%macro SAVE_XMM 1-2
362%endmacro
363%macro RESTORE_XMM 0
364%endmacro
365%endif
366
367; Name of the rodata section
368;
369; .rodata seems to be an elf-ism, as it doesn't work on OSX.
370;
371%ifidn __OUTPUT_FORMAT__,macho64
372%define SECTION_RODATA section .text
373%elifidn __OUTPUT_FORMAT__,macho32
374%macro SECTION_RODATA 0
375section .text
376%endmacro
377%elifidn __OUTPUT_FORMAT__,aout
378%define SECTION_RODATA section .data
379%else
380%define SECTION_RODATA section .rodata
381%endif
382
383
384; Tell GNU ld that we don't require an executable stack.
385%ifidn __OUTPUT_FORMAT__,elf32
386section .note.GNU-stack noalloc noexec nowrite progbits
387section .text
388%elifidn __OUTPUT_FORMAT__,elf64
389section .note.GNU-stack noalloc noexec nowrite progbits
390section .text
391%elifidn __OUTPUT_FORMAT__,elfx32
392section .note.GNU-stack noalloc noexec nowrite progbits
393section .text
394%endif
395
396; On Android platforms use lrand48 when building postproc routines. Prior to L
397; rand() was not available.
398%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1
399%ifdef __ANDROID__
400extern sym(lrand48)
401%define LIBVPX_RAND lrand48
402%else
403extern sym(rand)
404%define LIBVPX_RAND rand
405%endif
406%endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
407