1#!/usr/bin/env perl 2# Copyright (c) 2018, Google Inc. 3# 4# Permission to use, copy, modify, and/or distribute this software for any 5# purpose with or without fee is hereby granted, provided that the above 6# copyright notice and this permission notice appear in all copies. 7# 8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 16# This file defines helper functions for crypto/test/abi_test.h on x86_64. See 17# that header for details on how to use this. 18# 19# For convenience, this file is linked into libcrypto, where consuming builds 20# already support architecture-specific sources. The static linker should drop 21# this code in non-test binaries. This includes a shared library build of 22# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is 23# used. 24# 25# References: 26# 27# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf 28# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017 29 30use strict; 31 32my $flavour = shift; 33my $output = shift; 34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 35 36my $win64 = 0; 37$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 38 39$0 =~ m/(.*[\/\\])[^\/\\]+$/; 40my $dir = $1; 41my $xlate; 42( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 43( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 44die "can't locate x86_64-xlate.pl"; 45 46open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; 47*STDOUT = *OUT; 48 49# @inp is the registers used for function inputs, in order. 50my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") : 51 ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"); 52 53# @caller_state is the list of registers that the callee must preserve for the 54# caller. This must match the definition of CallerState in abi_test.h. 55my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15"); 56if ($win64) { 57 @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14", 58 "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", 59 "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15"); 60} 61 62# $caller_state_size is the size of CallerState, in bytes. 63my $caller_state_size = 0; 64foreach (@caller_state) { 65 if (/^%r/) { 66 $caller_state_size += 8; 67 } elsif (/^%xmm/) { 68 $caller_state_size += 16; 69 } else { 70 die "unknown register $_"; 71 } 72} 73 74# load_caller_state returns code which loads a CallerState structure at 75# $off($reg) into the respective registers. No other registers are touched, but 76# $reg may not be a register in CallerState. $cb is an optional callback to 77# add extra lines after each movq or movdqa. $cb is passed the offset, relative 78# to $reg, and name of each register. 79sub load_caller_state { 80 my ($off, $reg, $cb) = @_; 81 my $ret = ""; 82 foreach (@caller_state) { 83 my $old_off = $off; 84 if (/^%r/) { 85 $ret .= "\tmovq\t$off($reg), $_\n"; 86 $off += 8; 87 } elsif (/^%xmm/) { 88 $ret .= "\tmovdqa\t$off($reg), $_\n"; 89 $off += 16; 90 } else { 91 die "unknown register $_"; 92 } 93 $ret .= $cb->($old_off, $_) if (defined($cb)); 94 } 95 return $ret; 96} 97 98# store_caller_state behaves like load_caller_state, except that it writes the 99# current values of the registers into $off($reg). 100sub store_caller_state { 101 my ($off, $reg, $cb) = @_; 102 my $ret = ""; 103 foreach (@caller_state) { 104 my $old_off = $off; 105 if (/^%r/) { 106 $ret .= "\tmovq\t$_, $off($reg)\n"; 107 $off += 8; 108 } elsif (/^%xmm/) { 109 $ret .= "\tmovdqa\t$_, $off($reg)\n"; 110 $off += 16; 111 } else { 112 die "unknown register $_"; 113 } 114 $ret .= $cb->($old_off, $_) if (defined($cb)); 115 } 116 return $ret; 117} 118 119# $max_params is the maximum number of parameters abi_test_trampoline supports. 120my $max_params = 10; 121 122# Windows reserves stack space for the register-based parameters, while SysV 123# only reserves space for the overflow ones. 124my $stack_params_skip = $win64 ? scalar(@inp) : 0; 125my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp); 126 127my ($func, $state, $argv, $argc, $unwind) = @inp; 128my $code = <<____; 129.text 130 131# abi_test_trampoline loads callee-saved registers from |state|, calls |func| 132# with |argv|, then saves the callee-saved registers into |state|. It returns 133# the result of |func|. If |unwind| is non-zero, this function triggers unwind 134# instrumentation. 135# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, 136# const uint64_t *argv, size_t argc, 137# int unwind); 138.type abi_test_trampoline, \@abi-omnipotent 139.globl abi_test_trampoline 140.align 16 141abi_test_trampoline: 142.Labi_test_trampoline_seh_begin: 143.cfi_startproc 144 # Stack layout: 145 # 8 bytes - align 146 # $caller_state_size bytes - saved caller registers 147 # 8 bytes - scratch space 148 # 8 bytes - saved copy of \$unwind (SysV-only) 149 # 8 bytes - saved copy of \$state 150 # 8 bytes - saved copy of \$func 151 # 8 bytes - if needed for stack alignment 152 # 8*$num_stack_params bytes - parameters for \$func 153____ 154my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params; 155if (!$win64) { 156 $stack_alloc_size += 8; 157} 158# SysV and Windows both require the stack to be 16-byte-aligned. The call 159# instruction offsets it by 8, so stack allocations must be 8 mod 16. 160if ($stack_alloc_size % 16 != 8) { 161 $num_stack_params++; 162 $stack_alloc_size += 8; 163} 164my $stack_params_offset = 8 * $stack_params_skip; 165my $func_offset = 8 * $num_stack_params; 166my $state_offset = $func_offset + 8; 167# On Win64, unwind is already passed in memory. On SysV, it is passed in as 168# register and we must reserve stack space for it. 169my ($unwind_offset, $scratch_offset); 170if ($win64) { 171 $unwind_offset = $stack_alloc_size + 5*8; 172 $scratch_offset = $state_offset + 8; 173} else { 174 $unwind_offset = $state_offset + 8; 175 $scratch_offset = $unwind_offset + 8; 176} 177my $caller_state_offset = $scratch_offset + 8; 178$code .= <<____; 179 subq \$$stack_alloc_size, %rsp 180.cfi_adjust_cfa_offset $stack_alloc_size 181.Labi_test_trampoline_seh_prolog_alloc: 182____ 183$code .= <<____ if (!$win64); 184 movq $unwind, $unwind_offset(%rsp) 185____ 186# Store our caller's state. This is needed because we modify it ourselves, and 187# also to isolate the test infrastruction from the function under test failing 188# to save some register. 189my %reg_offsets; 190$code .= store_caller_state($caller_state_offset, "%rsp", sub { 191 my ($off, $reg) = @_; 192 $reg = substr($reg, 1); 193 $reg_offsets{$reg} = $off; 194 $off -= $stack_alloc_size + 8; 195 return <<____; 196.cfi_offset $reg, $off 197.Labi_test_trampoline_seh_prolog_$reg: 198____ 199}); 200$code .= <<____; 201.Labi_test_trampoline_seh_prolog_end: 202____ 203 204$code .= load_caller_state(0, $state); 205$code .= <<____; 206 # Stash \$func and \$state, so they are available after the call returns. 207 movq $func, $func_offset(%rsp) 208 movq $state, $state_offset(%rsp) 209 210 # Load parameters. Note this will clobber \$argv and \$argc, so we can 211 # only use non-parameter volatile registers. There are three, and they 212 # are the same between SysV and Win64: %rax, %r10, and %r11. 213 movq $argv, %r10 214 movq $argc, %r11 215____ 216foreach (@inp) { 217 $code .= <<____; 218 dec %r11 219 js .Largs_done 220 movq (%r10), $_ 221 addq \$8, %r10 222____ 223} 224$code .= <<____; 225 leaq $stack_params_offset(%rsp), %rax 226.Largs_loop: 227 dec %r11 228 js .Largs_done 229 230 # This block should be: 231 # movq (%r10), %rtmp 232 # movq %rtmp, (%rax) 233 # There are no spare registers available, so we spill into the scratch 234 # space. 235 movq %r11, $scratch_offset(%rsp) 236 movq (%r10), %r11 237 movq %r11, (%rax) 238 movq $scratch_offset(%rsp), %r11 239 240 addq \$8, %r10 241 addq \$8, %rax 242 jmp .Largs_loop 243 244.Largs_done: 245 movq $func_offset(%rsp), %rax 246 movq $unwind_offset(%rsp), %r10 247 testq %r10, %r10 248 jz .Lno_unwind 249 250 # Set the trap flag. 251 pushfq 252 orq \$0x100, 0(%rsp) 253 popfq 254 255 # Run an instruction to trigger a breakpoint immediately before the 256 # call. 257 nop 258.globl abi_test_unwind_start 259abi_test_unwind_start: 260 261 call *%rax 262.globl abi_test_unwind_return 263abi_test_unwind_return: 264 265 # Clear the trap flag. Note this assumes the trap flag was clear on 266 # entry. We do not support instrumenting an unwind-instrumented 267 # |abi_test_trampoline|. 268 pushfq 269 andq \$-0x101, 0(%rsp) # -0x101 is ~0x100 270 popfq 271.globl abi_test_unwind_stop 272abi_test_unwind_stop: 273 274 jmp .Lcall_done 275 276.Lno_unwind: 277 call *%rax 278 279.Lcall_done: 280 # Store what \$func did our state, so our caller can check. 281 movq $state_offset(%rsp), $state 282____ 283$code .= store_caller_state(0, $state); 284 285# Restore our caller's state. 286$code .= load_caller_state($caller_state_offset, "%rsp", sub { 287 my ($off, $reg) = @_; 288 $reg = substr($reg, 1); 289 return ".cfi_restore\t$reg\n"; 290}); 291$code .= <<____; 292 addq \$$stack_alloc_size, %rsp 293.cfi_adjust_cfa_offset -$stack_alloc_size 294 295 # %rax already contains \$func's return value, unmodified. 296 ret 297.cfi_endproc 298.Labi_test_trampoline_seh_end: 299.size abi_test_trampoline,.-abi_test_trampoline 300____ 301 302# abi_test_clobber_* zeros the corresponding register. These are used to test 303# the ABI-testing framework. 304foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) { 305 $code .= <<____; 306.type abi_test_clobber_r$_, \@abi-omnipotent 307.globl abi_test_clobber_r$_ 308.align 16 309abi_test_clobber_r$_: 310 xorq %r$_, %r$_ 311 ret 312.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ 313____ 314} 315 316foreach (0..15) { 317 $code .= <<____; 318.type abi_test_clobber_xmm$_, \@abi-omnipotent 319.globl abi_test_clobber_xmm$_ 320.align 16 321abi_test_clobber_xmm$_: 322 pxor %xmm$_, %xmm$_ 323 ret 324.size abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_ 325____ 326} 327 328$code .= <<____; 329# abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong 330# register in unwind metadata. 331# void abi_test_bad_unwind_wrong_register(void); 332.type abi_test_bad_unwind_wrong_register, \@abi-omnipotent 333.globl abi_test_bad_unwind_wrong_register 334.align 16 335abi_test_bad_unwind_wrong_register: 336.cfi_startproc 337.Labi_test_bad_unwind_wrong_register_seh_begin: 338 pushq %r12 339.cfi_push %r13 # This should be %r12 340.Labi_test_bad_unwind_wrong_register_seh_push_r13: 341 # Windows evaluates epilogs directly in the unwinder, rather than using 342 # unwind codes. Add a nop so there is one non-epilog point (immediately 343 # before the nop) where the unwinder can observe the mistake. 344 nop 345 popq %r12 346.cfi_pop %r12 347 ret 348.Labi_test_bad_unwind_wrong_register_seh_end: 349.cfi_endproc 350.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register 351 352# abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the 353# storage space for a saved register, breaking unwind. 354# void abi_test_bad_unwind_temporary(void); 355.type abi_test_bad_unwind_temporary, \@abi-omnipotent 356.globl abi_test_bad_unwind_temporary 357.align 16 358abi_test_bad_unwind_temporary: 359.cfi_startproc 360.Labi_test_bad_unwind_temporary_seh_begin: 361 pushq %r12 362.cfi_push %r12 363.Labi_test_bad_unwind_temporary_seh_push_r12: 364 365 movq %r12, %rax 366 inc %rax 367 movq %rax, (%rsp) 368 # Unwinding from here is incorrect. Although %r12 itself has not been 369 # changed, the unwind codes say to look in (%rsp) instead. 370 371 movq %r12, (%rsp) 372 # Unwinding is now fixed. 373 374 popq %r12 375.cfi_pop %r12 376 ret 377.Labi_test_bad_unwind_temporary_seh_end: 378.cfi_endproc 379.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary 380 381# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag 382# was previously set, it returns one. Otherwise, it returns zero. 383# int abi_test_get_and_clear_direction_flag(void); 384.type abi_test_set_direction_flag, \@abi-omnipotent 385.globl abi_test_get_and_clear_direction_flag 386abi_test_get_and_clear_direction_flag: 387 pushfq 388 popq %rax 389 andq \$0x400, %rax 390 shrq \$10, %rax 391 cld 392 ret 393.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag 394 395# abi_test_set_direction_flag sets the direction flag. 396# void abi_test_set_direction_flag(void); 397.type abi_test_set_direction_flag, \@abi-omnipotent 398.globl abi_test_set_direction_flag 399abi_test_set_direction_flag: 400 std 401 ret 402.size abi_test_set_direction_flag,.-abi_test_set_direction_flag 403____ 404 405if ($win64) { 406 $code .= <<____; 407# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the 408# prolog, but the epilog does not match Win64's rules, breaking unwind during 409# the epilog. 410# void abi_test_bad_unwind_epilog(void); 411.type abi_test_bad_unwind_epilog, \@abi-omnipotent 412.globl abi_test_bad_unwind_epilog 413.align 16 414abi_test_bad_unwind_epilog: 415.Labi_test_bad_unwind_epilog_seh_begin: 416 pushq %r12 417.Labi_test_bad_unwind_epilog_seh_push_r12: 418 419 nop 420 421 # The epilog should begin here, but the nop makes it invalid. 422 popq %r12 423 nop 424 ret 425.Labi_test_bad_unwind_epilog_seh_end: 426.size abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog 427____ 428 429 # Add unwind metadata for SEH. 430 # 431 # TODO(davidben): This is all manual right now. Once we've added SEH tests, 432 # add support for emitting these in x86_64-xlate.pl, probably based on MASM 433 # and Yasm's unwind directives, and unify with CFI. (Sadly, NASM does not 434 # support these directives.) Then push that upstream to replace the 435 # error-prone and non-standard custom handlers. 436 437 # See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017 438 my $UWOP_PUSH_NONVOL = 0; 439 my $UWOP_ALLOC_LARGE = 1; 440 my $UWOP_ALLOC_SMALL = 2; 441 my $UWOP_SAVE_NONVOL = 4; 442 my $UWOP_SAVE_XMM128 = 8; 443 444 my %UWOP_REG_NUMBER = (rax => 0, rcx => 1, rdx => 2, rbx => 3, rsp => 4, 445 rbp => 5, rsi => 6, rdi => 7, 446 map(("r$_" => $_), (8..15))); 447 448 my $unwind_codes = ""; 449 my $num_slots = 0; 450 foreach my $reg (reverse @caller_state) { 451 $reg = substr($reg, 1); 452 die "unknown register $reg" unless exists($reg_offsets{$reg}); 453 if ($reg =~ /^r/) { 454 die "unknown register $reg" unless exists($UWOP_REG_NUMBER{$reg}); 455 my $info = $UWOP_SAVE_NONVOL | ($UWOP_REG_NUMBER{$reg} << 4); 456 my $value = $reg_offsets{$reg} / 8; 457 $unwind_codes .= <<____; 458 .byte .Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin 459 .byte $info 460 .value $value 461____ 462 $num_slots += 2; 463 } elsif ($reg =~ /^xmm/) { 464 my $info = $UWOP_SAVE_XMM128 | (substr($reg, 3) << 4); 465 my $value = $reg_offsets{$reg} / 16; 466 $unwind_codes .= <<____; 467 .byte .Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin 468 .byte $info 469 .value $value 470____ 471 $num_slots += 2; 472 } else { 473 die "unknown register $reg"; 474 } 475 } 476 477 if ($stack_alloc_size <= 128) { 478 my $info = $UWOP_ALLOC_SMALL | ((($stack_alloc_size - 8) / 8) << 4); 479 $unwind_codes .= <<____; 480 .byte .Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin 481 .byte $info 482____ 483 $num_slots++; 484 } else { 485 die "stack allocation needs three unwind slots" if ($stack_alloc_size > 512 * 1024 + 8); 486 my $info = $UWOP_ALLOC_LARGE; 487 my $value = $stack_alloc_size / 8; 488 $unwind_codes .= <<____; 489 .byte .Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin 490 .byte $info 491 .value $value 492____ 493 $num_slots += 2; 494 } 495 496 $code .= <<____; 497.section .pdata 498.align 4 499 # https://docs.microsoft.com/en-us/cpp/build/struct-runtime-function?view=vs-2017 500 .rva .Labi_test_trampoline_seh_begin 501 .rva .Labi_test_trampoline_seh_end 502 .rva .Labi_test_trampoline_seh_info 503 504 .rva .Labi_test_bad_unwind_wrong_register_seh_begin 505 .rva .Labi_test_bad_unwind_wrong_register_seh_end 506 .rva .Labi_test_bad_unwind_wrong_register_seh_info 507 508 .rva .Labi_test_bad_unwind_temporary_seh_begin 509 .rva .Labi_test_bad_unwind_temporary_seh_end 510 .rva .Labi_test_bad_unwind_temporary_seh_info 511 512 .rva .Labi_test_bad_unwind_epilog_seh_begin 513 .rva .Labi_test_bad_unwind_epilog_seh_end 514 .rva .Labi_test_bad_unwind_epilog_seh_info 515 516.section .xdata 517.align 8 518.Labi_test_trampoline_seh_info: 519 # https://docs.microsoft.com/en-us/cpp/build/struct-unwind-info?view=vs-2017 520 .byte 1 # version 1, no flags 521 .byte .Labi_test_trampoline_seh_prolog_end-.Labi_test_trampoline_seh_begin 522 .byte $num_slots 523 .byte 0 # no frame register 524$unwind_codes 525 526.align 8 527.Labi_test_bad_unwind_wrong_register_seh_info: 528 .byte 1 # version 1, no flags 529 .byte .Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin 530 .byte 1 # one slot 531 .byte 0 # no frame register 532 533 .byte .Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin 534 .byte @{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r13} << 4)]} 535 536.align 8 537.Labi_test_bad_unwind_temporary_seh_info: 538 .byte 1 # version 1, no flags 539 .byte .Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin 540 .byte 1 # one slot 541 .byte 0 # no frame register 542 543 .byte .Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin 544 .byte @{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]} 545 546.align 8 547.Labi_test_bad_unwind_epilog_seh_info: 548 .byte 1 # version 1, no flags 549 .byte .Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin 550 .byte 1 # one slot 551 .byte 0 # no frame register 552 553 .byte .Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin 554 .byte @{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]} 555____ 556} 557 558print $code; 559close STDOUT; 560