1 2/* 3 * Mesa 3-D graphics library 4 * Version: 5.1 5 * 6 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/* 27 * 3Dnow assembly code by Holger Waechtler 28 */ 29 30#ifdef USE_3DNOW_ASM 31 32#include "assyntax.h" 33#include "matypes.h" 34#include "norm_args.h" 35 36 SEG_TEXT 37 38#define M(i) REGOFF(i * 4, ECX) 39#define STRIDE REGOFF(12, ESI) 40 41 42ALIGNTEXT16 43GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals) 44HIDDEN(_mesa_3dnow_transform_normalize_normals) 45GLNAME(_mesa_3dnow_transform_normalize_normals): 46 47#define FRAME_OFFSET 12 48 49 PUSH_L ( EDI ) 50 PUSH_L ( ESI ) 51 PUSH_L ( EBP ) 52 53 MOV_L ( ARG_LENGTHS, EDI ) 54 MOV_L ( ARG_IN, ESI ) 55 MOV_L ( ARG_DEST, EAX ) 56 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ 57 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) 58 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 59 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 60 MOV_L ( ARG_MAT, ECX ) 61 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 62 63 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 64 JE ( LLBL (G3TN_end) ) 65 66 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP ) 67 FEMMS 68 69 PUSH_L ( EBP ) 70 PUSH_L ( EAX ) 71 PUSH_L ( EDX ) /* save counter & pointer for */ 72 /* the normalize pass */ 73#undef FRAME_OFFSET 74#define FRAME_OFFSET 24 75 76 MOVQ ( M(0), MM3 ) /* m1 | m0 */ 77 MOVQ ( M(4), MM4 ) /* m5 | m4 */ 78 79 MOVD ( M(2), MM5 ) /* | m2 */ 80 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */ 81 82 MOVQ ( M(8), MM6 ) /* m9 | m8 */ 83 MOVQ ( M(10), MM7 ) /* | m10 */ 84 85 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 86 JNE ( LLBL (G3TN_scale_end ) ) 87 88 MOVD ( ARG_SCALE, MM0 ) /* | scale */ 89 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ 90 91 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */ 92 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */ 93 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */ 94 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */ 95 PFMUL ( MM0, MM7 ) /* | scale * m10 */ 96 97ALIGNTEXT32 98LLBL (G3TN_scale_end): 99LLBL (G3TN_transform): 100 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ 101 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ 102 103 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 104 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 105 106 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 107 ADD_L ( CONST(16), EAX ) /* next r */ 108 109 PREFETCHW ( REGIND(EAX) ) 110 111 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 112 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 113 114 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 115 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/ 116 117 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */ 118 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 119 120 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 121 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ 122 123 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 124 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 125 126 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ 127 ADD_L ( STRIDE, EDX ) /* next normal */ 128 129 PREFETCH ( REGIND(EDX) ) 130 131 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 132 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 133 JNZ ( LLBL (G3TN_transform) ) 134 135 136 POP_L ( EDX ) /* end of transform --- */ 137 POP_L ( EAX ) /* now normalizing ... */ 138 POP_L ( EBP ) 139 140 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 141 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */ 142 143 144ALIGNTEXT32 145LLBL (G3TN_norm_w_lengths): 146 147 PREFETCHW ( REGOFF(12,EAX) ) 148 149 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ 150 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ 151 152 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ 153 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ 154 155 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 156 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ 157 158 ADD_L ( STRIDE, EDX ) /* next normal */ 159 ADD_L ( CONST(4), EDI ) /* next length */ 160 161 PREFETCH ( REGIND(EDI) ) 162 163 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ 164 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ 165 166 ADD_L ( CONST(16), EAX ) /* next r */ 167 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 168 169 JNZ ( LLBL (G3TN_norm_w_lengths) ) 170 JMP ( LLBL (G3TN_exit_3dnow) ) 171 172ALIGNTEXT32 173LLBL (G3TN_norm): 174 175 PREFETCHW ( REGIND(EAX) ) 176 177 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */ 178 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ 179 180 MOVQ ( MM0, MM3 ) /* x1 | x0 */ 181 MOVQ ( MM1, MM4 ) /* | x2 */ 182 183 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ 184 ADD_L ( CONST(16), EAX ) /* next r */ 185 186 PFMUL ( MM1, MM4 ) /* | x2*x2 */ 187 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ 188 189 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/ 190 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 191 192 MOVQ ( MM5, MM4 ) 193 PUNPCKLDQ ( MM3, MM3 ) 194 195 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 196 PFMUL ( MM5, MM5 ) 197 198 PFRSQIT1 ( MM3, MM5 ) 199 PFRCPIT2 ( MM4, MM5 ) 200 201 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/ 202 203 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ 204 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/ 205 206 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ 207 JNZ ( LLBL (G3TN_norm) ) 208 209LLBL (G3TN_exit_3dnow): 210 FEMMS 211 212LLBL (G3TN_end): 213 POP_L ( EBP ) 214 POP_L ( ESI ) 215 POP_L ( EDI ) 216 RET 217 218 219 220ALIGNTEXT16 221GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot) 222HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot) 223GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot): 224 225#undef FRAME_OFFSET 226#define FRAME_OFFSET 12 227 228 PUSH_L ( EDI ) 229 PUSH_L ( ESI ) 230 PUSH_L ( EBP ) 231 232 MOV_L ( ARG_LENGTHS, EDI ) 233 MOV_L ( ARG_IN, ESI ) 234 MOV_L ( ARG_DEST, EAX ) 235 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ 236 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) 237 MOV_L ( ARG_MAT, ECX ) 238 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 239 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 240 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 241 242 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 243 JE ( LLBL (G3TNNR_end) ) 244 245 FEMMS 246 247 MOVD ( M(0), MM0 ) /* | m0 */ 248 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */ 249 250 MOVD ( M(10), MM2 ) /* | m10 */ 251 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ 252 253 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 254 JNE ( LLBL (G3TNNR_scale_end ) ) 255 256 MOVD ( ARG_SCALE, MM7 ) /* | scale */ 257 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ 258 259 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ 260 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ 261 262ALIGNTEXT32 263LLBL (G3TNNR_scale_end): 264 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 265 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */ 266 267 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ 268 269 270ALIGNTEXT32 271LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ 272 273 PREFETCHW ( REGIND(EAX) ) 274 275 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ 276 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ 277 278 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ 279 ADD_L ( STRIDE, EDX ) /* next normal */ 280 281 PREFETCH ( REGIND(EDX) ) 282 283 PFMUL ( MM2, MM7 ) /* | x2*m10 */ 284 ADD_L ( CONST(16), EAX ) /* next r */ 285 286 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */ 287 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 288 289 ADD_L ( CONST(4), EDI ) /* next length */ 290 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */ 291 292 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 293 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ 294 295 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ 296 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ 297 298 JNZ ( LLBL (G3TNNR_norm_w_lengths) ) 299 JMP ( LLBL (G3TNNR_exit_3dnow) ) 300 301ALIGNTEXT32 302LLBL (G3TNNR_norm): /* need to calculate lengths */ 303 304 PREFETCHW ( REGIND(EAX) ) 305 306 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ 307 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ 308 309 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ 310 ADD_L ( CONST(16), EAX ) /* next r */ 311 312 PFMUL ( MM2, MM7 ) /* | x2*m10 */ 313 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */ 314 315 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ 316 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ 317 318 319 PFMUL ( MM7, MM4 ) /* | x2*x2 */ 320 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ 321 322 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ 323 ADD_L ( STRIDE, EDX ) /* next normal */ 324 325 PREFETCH ( REGIND(EDX) ) 326 327 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 328 MOVQ ( MM5, MM4 ) 329 330 PUNPCKLDQ ( MM3, MM3 ) 331 PFMUL ( MM5, MM5 ) 332 333 PFRSQIT1 ( MM3, MM5 ) 334 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 335 336 PFRCPIT2 ( MM4, MM5 ) 337 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */ 338 339 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ 340 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */ 341 342 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ 343 JNZ ( LLBL (G3TNNR_norm) ) 344 345 346LLBL (G3TNNR_exit_3dnow): 347 FEMMS 348 349LLBL (G3TNNR_end): 350 POP_L ( EBP ) 351 POP_L ( ESI ) 352 POP_L ( EDI ) 353 RET 354 355 356 357 358 359 360ALIGNTEXT16 361GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot) 362HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot) 363GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot): 364 365#undef FRAME_OFFSET 366#define FRAME_OFFSET 12 367 368 PUSH_L ( EDI ) 369 PUSH_L ( ESI ) 370 PUSH_L ( EBP ) 371 372 MOV_L ( ARG_IN, EAX ) 373 MOV_L ( ARG_DEST, EDX ) 374 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */ 375 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) ) 376 MOV_L ( ARG_IN, ESI ) 377 MOV_L ( ARG_MAT, ECX ) 378 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 379 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */ 380 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 381 382 CMP_L ( CONST(0), EBP ) 383 JE ( LLBL (G3TRNR_end) ) 384 385 FEMMS 386 387 MOVD ( ARG_SCALE, MM6 ) /* | scale */ 388 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ 389 390 MOVD ( REGIND(ECX), MM0 ) /* | m0 */ 391 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ 392 393 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */ 394 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ 395 396 PFMUL ( MM6, MM2 ) /* | scale*m10 */ 397 398ALIGNTEXT32 399LLBL (G3TRNR_rescale): 400 401 PREFETCHW ( REGIND(EAX) ) 402 403 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ 404 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ 405 406 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ 407 ADD_L ( STRIDE, EDX ) /* next normal */ 408 409 PREFETCH ( REGIND(EDX) ) 410 411 PFMUL ( MM2, MM5 ) /* | x2*m10 */ 412 ADD_L ( CONST(16), EAX ) /* next r */ 413 414 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 415 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ 416 417 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ 418 JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ 419 420 FEMMS 421 422LLBL (G3TRNR_end): 423 POP_L ( EBP ) 424 POP_L ( ESI ) 425 POP_L ( EDI ) 426 RET 427 428 429 430 431 432ALIGNTEXT16 433GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals) 434HIDDEN(_mesa_3dnow_transform_rescale_normals) 435GLNAME(_mesa_3dnow_transform_rescale_normals): 436 437#undef FRAME_OFFSET 438#define FRAME_OFFSET 8 439 440 PUSH_L ( EDI ) 441 PUSH_L ( ESI ) 442 443 MOV_L ( ARG_IN, ESI ) 444 MOV_L ( ARG_DEST, EAX ) 445 MOV_L ( ARG_MAT, ECX ) 446 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ 447 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) 448 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 449 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 450 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 451 452 CMP_L ( CONST(0), EDI ) 453 JE ( LLBL (G3TR_end) ) 454 455 FEMMS 456 457 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ 458 459 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ 460 MOVD ( ARG_SCALE, MM0 ) /* scale */ 461 462 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ 463 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ 464 465 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) 466 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ 467 468 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ 469 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */ 470 471 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ 472 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */ 473 474 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */ 475 476 PFMUL ( MM0, MM7 ) /* | scale*m10 */ 477 478ALIGNTEXT32 479LLBL (G3TR_rescale): 480 481 PREFETCHW ( REGIND(EAX) ) 482 483 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ 484 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 485 486 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 487 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 488 489 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 490 ADD_L ( CONST(16), EAX ) /* next r */ 491 492 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 493 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 494 495 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ 496 497 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 498 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ 499 500 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 501 ADD_L ( STRIDE, EDX ) /* next normal */ 502 503 PREFETCH ( REGIND(EDX) ) 504 505 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 506 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 507 508 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 509 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 510 511 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ 512 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 513 514 SUB_L ( CONST(1), EDI ) /* decrement normal counter */ 515 JNZ ( LLBL (G3TR_rescale) ) 516 517 FEMMS 518 519LLBL (G3TR_end): 520 POP_L ( ESI ) 521 POP_L ( EDI ) 522 RET 523 524 525 526 527 528 529 530ALIGNTEXT16 531GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot) 532HIDDEN(_mesa_3dnow_transform_normals_no_rot) 533GLNAME(_mesa_3dnow_transform_normals_no_rot): 534 535#undef FRAME_OFFSET 536#define FRAME_OFFSET 8 537 538 PUSH_L ( EDI ) 539 PUSH_L ( ESI ) 540 541 MOV_L ( ARG_IN, ESI ) 542 MOV_L ( ARG_DEST, EAX ) 543 MOV_L ( ARG_MAT, ECX ) 544 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ 545 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) 546 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 547 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 548 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 549 550 CMP_L ( CONST(0), EDI ) 551 JE ( LLBL (G3TNR_end) ) 552 553 FEMMS 554 555 MOVD ( REGIND(ECX), MM0 ) /* | m0 */ 556 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ 557 558 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ 559 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ 560 561ALIGNTEXT32 562LLBL (G3TNR_transform): 563 564 PREFETCHW ( REGIND(EAX) ) 565 566 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ 567 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ 568 569 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ 570 ADD_L ( STRIDE, EDX) /* next normal */ 571 572 PREFETCH ( REGIND(EDX) ) 573 574 PFMUL ( MM2, MM5 ) /* | x2*m10 */ 575 ADD_L ( CONST(16), EAX ) /* next r */ 576 577 SUB_L ( CONST(1), EDI ) /* decrement normal counter */ 578 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ 579 580 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ 581 JNZ ( LLBL (G3TNR_transform) ) 582 583 FEMMS 584 585LLBL (G3TNR_end): 586 POP_L ( ESI ) 587 POP_L ( EDI ) 588 RET 589 590 591 592 593 594 595 596 597ALIGNTEXT16 598GLOBL GLNAME(_mesa_3dnow_transform_normals) 599HIDDEN(_mesa_3dnow_transform_normals) 600GLNAME(_mesa_3dnow_transform_normals): 601 602#undef FRAME_OFFSET 603#define FRAME_OFFSET 8 604 605 PUSH_L ( EDI ) 606 PUSH_L ( ESI ) 607 608 MOV_L ( ARG_IN, ESI ) 609 MOV_L ( ARG_DEST, EAX ) 610 MOV_L ( ARG_MAT, ECX ) 611 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ 612 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) 613 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 614 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ 615 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 616 617 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ 618 JE ( LLBL (G3T_end) ) 619 620 FEMMS 621 622 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ 623 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ 624 625 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ 626 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ 627 628 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ 629 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ 630 631ALIGNTEXT32 632LLBL (G3T_transform): 633 634 PREFETCHW ( REGIND(EAX) ) 635 636 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ 637 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 638 639 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 640 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 641 642 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 643 ADD_L ( CONST(16), EAX ) /* next r */ 644 645 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 646 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 647 648 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 649 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ 650 651 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ 652 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 653 654 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 655 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 656 657 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 658 ADD_L ( STRIDE, EDX ) /* next normal */ 659 660 PREFETCH ( REGIND(EDX) ) 661 662 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 663 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ 664 665 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 666 SUB_L ( CONST(1), EDI ) /* decrement normal counter */ 667 668 JNZ ( LLBL (G3T_transform) ) 669 670 FEMMS 671 672LLBL (G3T_end): 673 POP_L ( ESI ) 674 POP_L ( EDI ) 675 RET 676 677 678 679 680 681 682ALIGNTEXT16 683GLOBL GLNAME(_mesa_3dnow_normalize_normals) 684HIDDEN(_mesa_3dnow_normalize_normals) 685GLNAME(_mesa_3dnow_normalize_normals): 686 687#undef FRAME_OFFSET 688#define FRAME_OFFSET 12 689 690 PUSH_L ( EDI ) 691 PUSH_L ( ESI ) 692 PUSH_L ( EBP ) 693 694 MOV_L ( ARG_IN, ESI ) 695 MOV_L ( ARG_DEST, EAX ) 696 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ 697 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) 698 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 699 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */ 700 MOV_L ( ARG_LENGTHS, EDX ) 701 702 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 703 JE ( LLBL (G3N_end) ) 704 705 FEMMS 706 707 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ 708 JE ( LLBL (G3N_norm2) ) /* calculate lengths */ 709 710ALIGNTEXT32 711LLBL (G3N_norm1): /* use precalculated lengths */ 712 713 PREFETCH ( REGIND(EAX) ) 714 715 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ 716 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ 717 718 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ 719 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ 720 721 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 722 ADD_L ( STRIDE, ECX ) /* next normal */ 723 724 PREFETCH ( REGIND(ECX) ) 725 726 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ 727 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ 728 729 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ 730 ADD_L ( CONST(16), EAX ) /* next r */ 731 732 ADD_L ( CONST(4), EDX ) /* next length */ 733 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 734 735 JNZ ( LLBL (G3N_norm1) ) 736 737 JMP ( LLBL (G3N_end1) ) 738 739ALIGNTEXT32 740LLBL (G3N_norm2): /* need to calculate lengths */ 741 742 PREFETCHW ( REGIND(EAX) ) 743 744 PREFETCH ( REGIND(ECX) ) 745 746 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ 747 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ 748 749 MOVQ ( MM0, MM3 ) /* x1 | x0 */ 750 ADD_L ( STRIDE, ECX ) /* next normal */ 751 752 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ 753 MOVQ ( MM1, MM4 ) /* | x2 */ 754 755 ADD_L ( CONST(16), EAX ) /* next r */ 756 PFMUL ( MM1, MM4 ) /* | x2*x2 */ 757 758 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ 759 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/ 760 761 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 762 MOVQ ( MM5, MM4 ) 763 764 PUNPCKLDQ ( MM3, MM3 ) 765 PFMUL ( MM5, MM5 ) 766 767 PFRSQIT1 ( MM3, MM5 ) 768 SUB_L ( CONST(1), EBP ) /* decrement normal counter */ 769 770 PFRCPIT2 ( MM4, MM5 ) 771 772 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */ 773 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ 774 775 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */ 776 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ 777 778 JNZ ( LLBL (G3N_norm2) ) 779 780LLBL (G3N_end1): 781 FEMMS 782 783LLBL (G3N_end): 784 POP_L ( EBP ) 785 POP_L ( ESI ) 786 POP_L ( EDI ) 787 RET 788 789 790 791 792 793 794ALIGNTEXT16 795GLOBL GLNAME(_mesa_3dnow_rescale_normals) 796HIDDEN(_mesa_3dnow_rescale_normals) 797GLNAME(_mesa_3dnow_rescale_normals): 798 799#undef FRAME_OFFSET 800#define FRAME_OFFSET 8 801 PUSH_L ( EDI ) 802 PUSH_L ( ESI ) 803 804 MOV_L ( ARG_IN, ESI ) 805 MOV_L ( ARG_DEST, EAX ) 806 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */ 807 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) ) 808 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ 809 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */ 810 811 CMP_L ( CONST(0), EDX ) 812 JE ( LLBL (G3R_end) ) 813 814 FEMMS 815 816 MOVD ( ARG_SCALE, MM0 ) /* scale */ 817 PUNPCKLDQ ( MM0, MM0 ) 818 819ALIGNTEXT32 820LLBL (G3R_rescale): 821 822 PREFETCHW ( REGIND(EAX) ) 823 824 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ 825 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ 826 827 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ 828 ADD_L ( STRIDE, ECX ) /* next normal */ 829 830 PREFETCH ( REGIND(ECX) ) 831 832 PFMUL ( MM0, MM2 ) /* | x2*scale */ 833 ADD_L ( CONST(16), EAX ) /* next r */ 834 835 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */ 836 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */ 837 838 SUB_L ( CONST(1), EDX ) /* decrement normal counter */ 839 JNZ ( LLBL (G3R_rescale) ) 840 841 FEMMS 842 843LLBL (G3R_end): 844 POP_L ( ESI ) 845 POP_L ( EDI ) 846 RET 847 848#endif 849 850#if defined (__ELF__) && defined (__linux__) 851 .section .note.GNU-stack,"",%progbits 852#endif 853