1 2.EXTERN MY_LABEL2; 3.section .text; 4 5// 6//13 VIDEO PIXEL OPERATIONS 7// 8 9//Dreg = ALIGN8 ( Dreg, Dreg ) ; /* overlay 1 byte (b) */ 10R0 = ALIGN8(R0, R0); 11R0 = ALIGN8(R0, R1); 12R0 = ALIGN8(R1, R0); 13R0 = ALIGN8(R1, R1); 14R0 = ALIGN8(R1, R2); 15R3 = ALIGN8(R4, R5); 16R6 = ALIGN8(R7, R0); 17R1 = ALIGN8(R2, R3); 18R4 = ALIGN8(R5, R6); 19R7 = ALIGN8(R0, R1); 20R2 = ALIGN8(R3, R4); 21R5 = ALIGN8(R6, R7); 22 23//Dreg = ALIGN16 ( Dreg, Dreg ) ; /* overlay 2 bytes (b) */ 24R0 = ALIGN16(R0, R0); 25R0 = ALIGN16(R0, R1); 26R0 = ALIGN16(R1, R0); 27R0 = ALIGN16(R1, R1); 28R0 = ALIGN16(R1, R2); 29R3 = ALIGN16(R4, R5); 30R6 = ALIGN16(R7, R0); 31R1 = ALIGN16(R2, R3); 32R4 = ALIGN16(R5, R6); 33R7 = ALIGN16(R0, R1); 34R2 = ALIGN16(R3, R4); 35R5 = ALIGN16(R6, R7); 36 37//Dreg = ALIGN24 ( Dreg, Dreg ) ; /* overlay 3 bytes (b) */ 38R0 = ALIGN24(R0, R0); 39R0 = ALIGN24(R0, R1); 40R0 = ALIGN24(R1, R0); 41R0 = ALIGN24(R1, R1); 42R0 = ALIGN24(R1, R2); 43R3 = ALIGN24(R4, R5); 44R6 = ALIGN24(R7, R0); 45R1 = ALIGN24(R2, R3); 46R4 = ALIGN24(R5, R6); 47R7 = ALIGN24(R0, R1); 48R2 = ALIGN24(R3, R4); 49R5 = ALIGN24(R6, R7); 50 51DISALGNEXCPT ; /* (b) */ 52 53/* forward byte order operands */ 54//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (LO) ; /* sum into low bytes (b) */ 55//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (HI) ; /* sum into high bytes (b) */ 56/* reverse byte order operands */ 57//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (LO, R) ; /* sum into low bytes (b) */ 58//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (HI, R) ; /* sum into high bytes (b) */ 59 60r0 = byteop3p (r1:0, r3:2) (lo) ; 61r1 = byteop3p (r1:0, r3:2) (hi) ; 62r2 = byteop3p (r1:0, r3:2) (lo, r) ; 63r3 = byteop3p (r1:0, r3:2) (hi, r) ; 64r4 = byteop3p (r3:2, r1:0) (lo) ; 65r5 = byteop3p (r3:2, r1:0) (hi) ; 66r6 = byteop3p (r3:2, r1:0) (lo, r) ; 67r7 = byteop3p (r3:2, r1:0) (hi, r) ; 68 69//Dreg = A1.L + A1.H, Dreg = A0.L + A0.H ; /* (b) */ 70 71R0 = A1.L + A1.H, R1= A0.L + A0.H ; 72R2 = A1.L + A1.H, R3= A0.L + A0.H ; 73R4 = A1.L + A1.H, R5= A0.L + A0.H ; 74R6 = A1.L + A1.H, R7= A0.L + A0.H ; 75 76/* forward byte order operands */ 77//( Dreg, Dreg ) = BYTEOP16P ( Dreg_pair, Dreg_pair ) ; /* (b) */ 78(r7,r0) = BYTEOP16P ( r3:2,r1:0 ) ; 79(r1,r2) = byteop16p (r3:2,r1:0) ; 80(r0,r1) = BYTEOP16P ( r3:2,r1:0 ) ; 81(r2,r3) = byteop16p (r3:2,r1:0) ; 82(r7,r0) = BYTEOP16P (r1:0, r3:2) ; 83(r1,r2) = byteop16p (r1:0,r3:2) ; 84(r0,r1) = BYTEOP16P (r1:0, r3:2) ; 85(r2,r3) = byteop16p (r1:0,r3:2) ; 86 87/* reverse byte order operands */ 88//( Dreg, Dreg ) = BYTEOP16P ( Dreg_pair, Dreg_pair ) (R); /* (b) */ 89(r7,r0) = BYTEOP16P ( r3:2,r1:0 )(r) ; 90(r1,r2) = byteop16p (r3:2,r1:0)(r) ; 91(r0,r1) = BYTEOP16P ( r3:2,r1:0 )(r) ; 92(r2,r3) = byteop16p (r3:2,r1:0)(r) ; 93(r7,r0) = BYTEOP16P (r1:0, r3:2)(r) ; 94(r1,r2) = byteop16p (r1:0,r3:2)(r) ; 95(r0,r1) = BYTEOP16P (r1:0, r3:2)(r) ; 96(r2,r3) = byteop16p (r1:0,r3:2)(r) ; 97 98/* forward byte order operands */ 99//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) ; /* (b) */ 100//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (T) ; /* truncated (b)*/ 101/* reverse byte order operands */ 102//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (R) ; /* (b) */ 103//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (T, R) ; /* truncated (b) */ 104 105r3 = byteop1p (r1:0, r3:2) ; 106r3 = byteop1p (r1:0, r3:2) (r) ; 107r3 = byteop1p (r1:0, r3:2) (t) ; 108r3 = byteop1p (r1:0, r3:2) (t,r) ; 109 110r0 = byteop1p (r3:2,r1:0); 111r1 = byteop1p (r3:2,r1:0)(r) ; 112r2 = byteop1p (r3:2,r1:0)(t) ; 113r3 = byteop1p (r3:2,r1:0)(t,r) ; 114 115/* forward byte order operands */ 116//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDL) ; 117/* round into low bytes (b) */ 118//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDH) ; 119/* round into high bytes (b) */ 120//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TL) ; 121/* truncate into low bytes (b) */ 122//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TH) ; 123/* truncate into high bytes (b) */ 124/* reverse byte order operands */ 125//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDL, R) ; 126/* round into low bytes (b) */ 127//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDH, R) ; 128/* round into high bytes (b) */ 129//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TL, R) ; 130/* truncate into low bytes (b) */ 131//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TH, R) ; 132/* truncate into high bytes (b) */ 133 134r3 = byteop2p (r1:0, r3:2) (rndl) ; 135r3 = byteop2p (r1:0, r3:2) (rndh) ; 136r3 = byteop2p (r1:0, r3:2) (tl) ; 137r3 = byteop2p (r1:0, r3:2) (th) ; 138r3 = byteop2p (r1:0, r3:2) (rndl, r) ; 139r3 = byteop2p (r1:0, r3:2) (rndh, r) ; 140r3 = byteop2p (r1:0, r3:2) (tl, r) ; 141r3 = byteop2p (r1:0, r3:2) (th, r) ; 142 143r0 = byteop2p (r1:0, r3:2) (rndl) ; 144r1 = byteop2p (r1:0, r3:2) (rndh) ; 145r2 = byteop2p (r1:0, r3:2) (tl) ; 146r3 = byteop2p (r1:0, r3:2) (th) ; 147r4 = byteop2p (r1:0, r3:2) (rndl, r) ; 148r5 = byteop2p (r1:0, r3:2) (rndh, r) ; 149r6 = byteop2p (r1:0, r3:2) (tl, r) ; 150r7 = byteop2p (r1:0, r3:2) (th, r) ; 151 152r0 = byteop2p (r3:2, r3:2) (rndl) ; 153r1 = byteop2p (r3:2, r3:2) (rndh) ; 154r2 = byteop2p (r3:2, r3:2) (tl) ; 155r3 = byteop2p (r3:2, r3:2) (th) ; 156r4 = byteop2p (r3:2, r3:2) (rndl, r) ; 157r5 = byteop2p (r3:2, r3:2) (rndh, r) ; 158r6 = byteop2p (r3:2, r3:2) (tl, r) ; 159r7 = byteop2p (r3:2, r3:2) (th, r) ; 160 161//Dreg = BYTEPACK ( Dreg, Dreg ) ; /* (b) */ 162r0 = bytepack (r0,r0) ; 163r1 = bytepack (r2,r3) ; 164r4 = bytepack (r5,r6) ; 165r7 = bytepack (r0,r1) ; 166r2 = bytepack (r3,r4) ; 167r5 = bytepack (r6,r7) ; 168 169/* forward byte order operands */ 170//(Dreg, Dreg) = BYTEOP16M (Dreg_pair, Dreg_pair) ; /* (b */) 171/* reverse byte order operands */ 172//(Dreg, Dreg) = BYTEOP16M (Dreg-pair, Dreg-pair) (R) ; /* (b) */ 173 174(r1,r2)= byteop16m (r3:2,r1:0) ; 175(r1,r2)= byteop16m (r3:2,r1:0) (r) ; 176(r0,r1)= byteop16m (r3:2,r1:0) ; 177(r2,r3)= byteop16m (r3:2,r1:0) (r) ; 178(r3,r5)= byteop16m (r3:2,r1:0) ; 179(r6,r7)= byteop16m (r3:2,r1:0) (r) ; 180 181(r1,r2)= byteop16m (r1:0,r1:0) ; 182(r1,r2)= byteop16m (r1:0,r1:0) (r) ; 183(r0,r1)= byteop16m (r1:0,r1:0) ; 184(r2,r3)= byteop16m (r1:0,r1:0) (r) ; 185(r3,r5)= byteop16m (r1:0,r1:0) ; 186(r6,r7)= byteop16m (r1:0,r1:0) (r) ; 187 188(r1,r2)= byteop16m (r1:0,r3:2) ; 189(r1,r2)= byteop16m (r1:0,r3:2) (r) ; 190(r0,r1)= byteop16m (r1:0,r3:2) ; 191(r2,r3)= byteop16m (r1:0,r3:2) (r) ; 192(r3,r5)= byteop16m (r1:0,r3:2) ; 193(r6,r7)= byteop16m (r1:0,r3:2) (r) ; 194 195(r1,r2)= byteop16m (r3:2,r3:2) ; 196(r1,r2)= byteop16m (r3:2,r3:2) (r) ; 197(r0,r1)= byteop16m (r3:2,r3:2) ; 198(r2,r3)= byteop16m (r3:2,r3:2) (r) ; 199(r3,r5)= byteop16m (r3:2,r3:2) ; 200(r6,r7)= byteop16m (r3:2,r3:2) (r) ; 201 202//SAA (Dreg_pair, Dreg_pair) ; /* forward byte order operands (b) */ 203//SAA (Dreg_pair, Dreg_pair) (R) ; /* reverse byte order operands (b) */ 204 205saa(r1:0, r3:2) || r0 = [i0++] || r2 = [i1++] ; /* parallel fill instructions */ 206saa (r1:0, r3:2) (R) || r1 = [i0++] || r3 = [i1++] ; /* reverse, parallel fill instructions */ 207saa (r1:0, r3:2) ; /* last SAA in a loop, no more fill required */ 208 209//( Dreg , Dreg ) = BYTEUNPACK Dreg_pair ; /* (b) */ 210//( Dreg , Dreg ) = BYTEUNPACK Dreg_pair (R) ; /* reverse source order (b) */ 211 212(r6,r5) = byteunpack r1:0 ; /* non-reversing sources */ 213(r6,r5) = byteunpack r1:0 (R) ; /* reversing sources case */ 214(r6,r5) = byteunpack r3:2 ; /* non-reversing sources */ 215(r6,r5) = byteunpack r3:2 (R) ; /* reversing sources case */ 216(r0,r1) = byteunpack r1:0 ; /* non-reversing sources */ 217(r2,r3) = byteunpack r1:0 (R) ; /* reversing sources case */ 218(r4,r5) = byteunpack r3:2 ; /* non-reversing sources */ 219(r6,r7) = byteunpack r3:2 (R) ; /* reversing sources case */ 220