1# D30V parallel optimization test 2# assemble with "-O" 3 4 .text 5start: 6 abs r1,r2 7 abs r3,r4 8 9 notfg f0,f4 10 notfg f1,f2 11 12 abs r1,r2 13 notfg f1,f2 14 15# both change C flag 16 add r1,r2,r3 17 notfg C,f0 18 19# one uses and one changes C flag 20 add r1,r2,r3 21 notfg f0,C 22 23 bra . 24 abs r1,r2 25 26 abs r1,r2 27 bra . 28 29 bsr . 30 abs r1,r2 31 32 abs r1,r2 33 abs r1,r2 34 bsr . 35 36 ldb r1,@(r2,r3) 37 stb r7,@(r8,r9) 38 39 stb r7,@(r8,r9) 40 ldb r1,@(r2,r3) 41 42 ldb r7,@(r8,r9) 43 ldb r1,@(r2,r3) 44 45 stb r7,@(r8,r9) 46 stb r1,@(r2,r3) 47 48 add r3, r3, r6 49 stw r2, @(r3, 0) 50 51# should be serial because of conditional execution 52 cmple f0,r4,r5 53 jmp/tx 0x0 54 55 cmple f0,r4,r5 56 jmp/fx 0x0 57 58 cmple f0,r4,r5 59 jmp/xt 0x0 60 61 cmple f0,r4,r5 62 jmp/xf 0x0 63 64 cmple f0,r4,r5 65 jmp/tt 0x0 66 67 cmple f0,r4,r5 68 jmp/tf 0x0 69 70 cmple f1,r4,r5 71 jmp/tx 0x0 72 73 cmple f1,r4,r5 74 jmp/xt 0x0 75 76 # serial because of the r4 dependency 77 add r4, r0, 1 78 cmple f0, r4, r5 79 80 # parallel 81 add r4, r0, 1 82 cmple f0, r3, r5 83 84 # serial because ld2w loads r5 85 ld2w r4,@(r0,r6) 86 adds r5,r19,r20 87 88 # serial because ld2w loads r5 89 ld2w r4,@(r0,r6) 90 adds r3,r5,r20 91 92 # parallel even though ld2w uses r6 and adds changes it 93 ld2w r4,@(r0,r6) 94 adds r6,r19,r20 95 96 # parallel 97 ld2w r4,@(r0,r6) 98 adds r7,r19,r20 99 100 # parallel 101 ld2w r4,@(r0,r6) 102 adds r7,r0,r20 103 104 # parallel even though st2w uses r5 and adds modifies it 105 st2w r4,@(r0,r6) 106 adds r5,r19,r20 107 108 # parallel, both use but don't modify r5 109 st2w r4,@(r0,r6) 110 adds r3,r5,r20 111 112 # parallel even though st2w uses r6 and adds changes it 113 st2w r4,@(r0,r6) 114 adds r6,r19,r20 115 116 # parallel 117 st2w r4,@(r0,r6) 118 adds r7,r19,r20 119 120 # parallel 121 st2w r4,@(r0,r6) 122 adds r7,r0,r20 123 124# test memory dependencies 125 126 # always serial because one could overwrite the other 127 st2w r10,@(r3,r4) 128 st2w r40,@(r43,r44) 129 130 # always serial 131 stw r1,@(r2,r3) 132 ldw r41,@(r42,r43) 133 134 # reads can happen in parallel but the current architecture 135 # doesn't support it 136 ldw r1,@(r2,r3) 137 ldb r41,@(r42,r43) 138 139# test post increment and decrement dependencies 140 141 # serial 142 ldw r4,@(r6+,r11) 143 adds r9,r6,2 144 145 # parallel, modification to r6 happens last 146 adds r9,r6,2 147 ldw r4,@(r6-,r11) 148 149 # serial 150 stw r4,@(r6-,r11) 151 adds r9,r6,2 152 153 # parallel 154 ldw r4,@(r6,r11) 155 adds r9,r6,2 156 157 # parallel 158 adds r9,r6,2 159 ldw r4,@(r6,r11) 160 161# if the first instruction is a jmp, don't parallelize 162 jmp 0 163 abs r1,r2 164 165 jsr 0 166 abs r1,r2 167 168 .align 3 169 170 bra 0 171 abs r1,r2 172 173 bsr 0 174 abs r1,r2 175 176# Explicitly prohibited from parallel execution. 177# The labels are here to prevent instruction pairs 178# from being merged with following pairs. 179 180label1: 181 st2w r2, @(r2, r3) 182 addhlll r4, r5, r6 183label2: 184 st4hb r8, @(r8, r9) 185 subhllh r10, r11, r12 186label3: 187 ld2w r14, @(r14, r15) 188 mulhxhl r16, r17, r18 189label4: 190 ldw r19, @(r20, r21) 191 mulx2h r22, r23, r24 192label5: 193 ldh r25, @(r26, r27) 194 mul2h r28, r29, r30 195 196# Insertion of NOPs required to prevent pipeline clashes. 197 198label6: 199 mul r1,r2,r3 200 mulhxll r4,r5,r6 201 add r7, r8, r9 202label7: 203 204 mul r2,r3,r4 205 ldw r5, @(r6,r0) 206 207 ldw r10, @(r11, r0) <- mul r7,r8,r9 208 209 mul r12,r13,r14 -> ldw r15, @(r16, r0) 210 211 mac1 r2,r3,r4 212 ldw r5, @(r6,r0) 213 214 ldw r10, @(r11, r0) <- mac0 r7,r8,r9 215 ldw r10, @(r11, r0) 216 217