1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9// Functions that implement common sequences in function prologues and epilogues 10// used to save code size 11 12 .macro FUNCTION_BEGIN name 13 .text 14 .globl \name 15 .type \name, @function 16 .falign 17\name: 18 .endm 19 20 .macro FUNCTION_END name 21 .size \name, . - \name 22 .endm 23 24 .macro FALLTHROUGH_TAIL_CALL name0 name1 25 .size \name0, . - \name0 26 .globl \name1 27 .type \name1, @function 28 .falign 29\name1: 30 .endm 31 32 33 34 35// Save r25:24 at fp+#-8 and r27:26 at fp+#-16. 36 37 38 39 40// The compiler knows that the __save_* functions clobber LR. No other 41// registers should be used without informing the compiler. 42 43// Since we can only issue one store per packet, we don't hurt performance by 44// simply jumping to the right point in this sequence of stores. 45 46FUNCTION_BEGIN __save_r24_through_r27 47 memd(fp+#-16) = r27:26 48FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 49 { 50 memd(fp+#-8) = r25:24 51 jumpr lr 52 } 53FUNCTION_END __save_r24_through_r25 54 55 56 57 58// For each of the *_before_tailcall functions, jumpr lr is executed in parallel 59// with deallocframe. That way, the return gets the old value of lr, which is 60// where these functions need to return, and at the same time, lr gets the value 61// it needs going into the tail call. 62 63FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall 64 r27:26 = memd(fp+#-16) 65FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall 66 { 67 r25:24 = memd(fp+#-8) 68 deallocframe 69 jumpr lr 70 } 71FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall 72 73 74 75 76// Here we use the extra load bandwidth to restore LR early, allowing the return 77// to occur in parallel with the deallocframe. 78 79FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe 80 { 81 lr = memw(fp+#4) 82 r27:26 = memd(fp+#-16) 83 } 84 { 85 r25:24 = memd(fp+#-8) 86 deallocframe 87 jumpr lr 88 } 89FUNCTION_END __restore_r24_through_r27_and_deallocframe 90 91 92 93 94// Here the load bandwidth is maximized. 95 96FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe 97 { 98 r25:24 = memd(fp+#-8) 99 deallocframe 100 } 101 jumpr lr 102FUNCTION_END __restore_r24_through_r25_and_deallocframe 103