1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "cache.h" 18 19#ifndef MEMSET 20# define MEMSET android_memset16 21#endif 22 23#ifndef L 24# define L(label) .L##label 25#endif 26 27#ifndef ALIGN 28# define ALIGN(n) .p2align n 29#endif 30 31#ifndef cfi_startproc 32# define cfi_startproc .cfi_startproc 33#endif 34 35#ifndef cfi_endproc 36# define cfi_endproc .cfi_endproc 37#endif 38 39#ifndef cfi_rel_offset 40# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 41#endif 42 43#ifndef cfi_restore 44# define cfi_restore(reg) .cfi_restore reg 45#endif 46 47#ifndef cfi_adjust_cfa_offset 48# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 49#endif 50 51#ifndef ENTRY 52# define ENTRY(name) \ 53 .type name, @function; \ 54 .globl name; \ 55 .p2align 4; \ 56name: \ 57 cfi_startproc 58#endif 59 60#ifndef END 61# define END(name) \ 62 cfi_endproc; \ 63 .size name, .-name 64#endif 65 66#define CFI_PUSH(REG) \ 67 cfi_adjust_cfa_offset (4); \ 68 cfi_rel_offset (REG, 0) 69 70#define CFI_POP(REG) \ 71 cfi_adjust_cfa_offset (-4); \ 72 cfi_restore (REG) 73 74#define PUSH(REG) pushl REG; CFI_PUSH (REG) 75#define POP(REG) popl REG; CFI_POP (REG) 76 77#ifdef USE_AS_BZERO16 78# define DEST PARMS 79# define LEN DEST+4 80# define SETRTNVAL 81#else 82# define DEST PARMS 83# define CHR DEST+4 84# define LEN CHR+4 85# define SETRTNVAL movl DEST(%esp), %eax 86#endif 87 88#if (defined SHARED || defined __PIC__) 89# define ENTRANCE PUSH (%ebx); 90# define RETURN_END POP (%ebx); ret 91# define RETURN RETURN_END; CFI_PUSH (%ebx) 92# define PARMS 8 /* Preserve EBX. */ 93# define JMPTBL(I, B) I - B 94 95/* Load an entry in a jump table into EBX and branch to it. TABLE is a 96 jump table with relative offsets. */ 97# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 98 /* We first load PC into EBX. */ \ 99 call __x86.get_pc_thunk.bx; \ 100 /* Get the address of the jump table. */ \ 101 add $(TABLE - .), %ebx; \ 102 /* Get the entry and convert the relative offset to the \ 103 absolute address. */ \ 104 add (%ebx,%ecx,4), %ebx; \ 105 /* We loaded the jump table and adjuested EDX. Go. */ \ 106 jmp *%ebx 107 108 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 109 .globl __x86.get_pc_thunk.bx 110 .hidden __x86.get_pc_thunk.bx 111 ALIGN (4) 112 .type __x86.get_pc_thunk.bx,@function 113__x86.get_pc_thunk.bx: 114 movl (%esp), %ebx 115 ret 116#else 117# define ENTRANCE 118# define RETURN_END ret 119# define RETURN RETURN_END 120# define PARMS 4 121# define JMPTBL(I, B) I 122 123/* Branch to an entry in a jump table. TABLE is a jump table with 124 absolute offsets. */ 125# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 126 jmp *TABLE(,%ecx,4) 127#endif 128 129 .section .text.sse2,"ax",@progbits 130 ALIGN (4) 131ENTRY (MEMSET) 132 ENTRANCE 133 134 movl LEN(%esp), %ecx 135 shr $1, %ecx 136#ifdef USE_AS_BZERO16 137 xor %eax, %eax 138#else 139 movzwl CHR(%esp), %eax 140 mov %eax, %edx 141 shl $16, %eax 142 or %edx, %eax 143#endif 144 movl DEST(%esp), %edx 145 cmp $32, %ecx 146 jae L(32wordsormore) 147 148L(write_less32words): 149 lea (%edx, %ecx, 2), %edx 150 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words)) 151 152 153 .pushsection .rodata.sse2,"a",@progbits 154 ALIGN (2) 155L(table_less32words): 156 .int JMPTBL (L(write_0words), L(table_less32words)) 157 .int JMPTBL (L(write_1words), L(table_less32words)) 158 .int JMPTBL (L(write_2words), L(table_less32words)) 159 .int JMPTBL (L(write_3words), L(table_less32words)) 160 .int JMPTBL (L(write_4words), L(table_less32words)) 161 .int JMPTBL (L(write_5words), L(table_less32words)) 162 .int JMPTBL (L(write_6words), L(table_less32words)) 163 .int JMPTBL (L(write_7words), L(table_less32words)) 164 .int JMPTBL (L(write_8words), L(table_less32words)) 165 .int JMPTBL (L(write_9words), L(table_less32words)) 166 .int JMPTBL (L(write_10words), L(table_less32words)) 167 .int JMPTBL (L(write_11words), L(table_less32words)) 168 .int JMPTBL (L(write_12words), L(table_less32words)) 169 .int JMPTBL (L(write_13words), L(table_less32words)) 170 .int JMPTBL (L(write_14words), L(table_less32words)) 171 .int JMPTBL (L(write_15words), L(table_less32words)) 172 .int JMPTBL (L(write_16words), L(table_less32words)) 173 .int JMPTBL (L(write_17words), L(table_less32words)) 174 .int JMPTBL (L(write_18words), L(table_less32words)) 175 .int JMPTBL (L(write_19words), L(table_less32words)) 176 .int JMPTBL (L(write_20words), L(table_less32words)) 177 .int JMPTBL (L(write_21words), L(table_less32words)) 178 .int JMPTBL (L(write_22words), L(table_less32words)) 179 .int JMPTBL (L(write_23words), L(table_less32words)) 180 .int JMPTBL (L(write_24words), L(table_less32words)) 181 .int JMPTBL (L(write_25words), L(table_less32words)) 182 .int JMPTBL (L(write_26words), L(table_less32words)) 183 .int JMPTBL (L(write_27words), L(table_less32words)) 184 .int JMPTBL (L(write_28words), L(table_less32words)) 185 .int JMPTBL (L(write_29words), L(table_less32words)) 186 .int JMPTBL (L(write_30words), L(table_less32words)) 187 .int JMPTBL (L(write_31words), L(table_less32words)) 188 .popsection 189 190 ALIGN (4) 191L(write_28words): 192 movl %eax, -56(%edx) 193 movl %eax, -52(%edx) 194L(write_24words): 195 movl %eax, -48(%edx) 196 movl %eax, -44(%edx) 197L(write_20words): 198 movl %eax, -40(%edx) 199 movl %eax, -36(%edx) 200L(write_16words): 201 movl %eax, -32(%edx) 202 movl %eax, -28(%edx) 203L(write_12words): 204 movl %eax, -24(%edx) 205 movl %eax, -20(%edx) 206L(write_8words): 207 movl %eax, -16(%edx) 208 movl %eax, -12(%edx) 209L(write_4words): 210 movl %eax, -8(%edx) 211 movl %eax, -4(%edx) 212L(write_0words): 213 SETRTNVAL 214 RETURN 215 216 ALIGN (4) 217L(write_29words): 218 movl %eax, -58(%edx) 219 movl %eax, -54(%edx) 220L(write_25words): 221 movl %eax, -50(%edx) 222 movl %eax, -46(%edx) 223L(write_21words): 224 movl %eax, -42(%edx) 225 movl %eax, -38(%edx) 226L(write_17words): 227 movl %eax, -34(%edx) 228 movl %eax, -30(%edx) 229L(write_13words): 230 movl %eax, -26(%edx) 231 movl %eax, -22(%edx) 232L(write_9words): 233 movl %eax, -18(%edx) 234 movl %eax, -14(%edx) 235L(write_5words): 236 movl %eax, -10(%edx) 237 movl %eax, -6(%edx) 238L(write_1words): 239 mov %ax, -2(%edx) 240 SETRTNVAL 241 RETURN 242 243 ALIGN (4) 244L(write_30words): 245 movl %eax, -60(%edx) 246 movl %eax, -56(%edx) 247L(write_26words): 248 movl %eax, -52(%edx) 249 movl %eax, -48(%edx) 250L(write_22words): 251 movl %eax, -44(%edx) 252 movl %eax, -40(%edx) 253L(write_18words): 254 movl %eax, -36(%edx) 255 movl %eax, -32(%edx) 256L(write_14words): 257 movl %eax, -28(%edx) 258 movl %eax, -24(%edx) 259L(write_10words): 260 movl %eax, -20(%edx) 261 movl %eax, -16(%edx) 262L(write_6words): 263 movl %eax, -12(%edx) 264 movl %eax, -8(%edx) 265L(write_2words): 266 movl %eax, -4(%edx) 267 SETRTNVAL 268 RETURN 269 270 ALIGN (4) 271L(write_31words): 272 movl %eax, -62(%edx) 273 movl %eax, -58(%edx) 274L(write_27words): 275 movl %eax, -54(%edx) 276 movl %eax, -50(%edx) 277L(write_23words): 278 movl %eax, -46(%edx) 279 movl %eax, -42(%edx) 280L(write_19words): 281 movl %eax, -38(%edx) 282 movl %eax, -34(%edx) 283L(write_15words): 284 movl %eax, -30(%edx) 285 movl %eax, -26(%edx) 286L(write_11words): 287 movl %eax, -22(%edx) 288 movl %eax, -18(%edx) 289L(write_7words): 290 movl %eax, -14(%edx) 291 movl %eax, -10(%edx) 292L(write_3words): 293 movl %eax, -6(%edx) 294 movw %ax, -2(%edx) 295 SETRTNVAL 296 RETURN 297 298 ALIGN (4) 299 300L(32wordsormore): 301 shl $1, %ecx 302 test $0x01, %edx 303 jz L(aligned2bytes) 304 mov %eax, (%edx) 305 mov %eax, -4(%edx, %ecx) 306 sub $2, %ecx 307 add $1, %edx 308 rol $8, %eax 309L(aligned2bytes): 310#ifdef USE_AS_BZERO16 311 pxor %xmm0, %xmm0 312#else 313 movd %eax, %xmm0 314 pshufd $0, %xmm0, %xmm0 315#endif 316 testl $0xf, %edx 317 jz L(aligned_16) 318/* ECX > 32 and EDX is not 16 byte aligned. */ 319L(not_aligned_16): 320 movdqu %xmm0, (%edx) 321 movl %edx, %eax 322 and $-16, %edx 323 add $16, %edx 324 sub %edx, %eax 325 add %eax, %ecx 326 movd %xmm0, %eax 327 328 ALIGN (4) 329L(aligned_16): 330 cmp $128, %ecx 331 jae L(128bytesormore) 332 333L(aligned_16_less128bytes): 334 add %ecx, %edx 335 shr $1, %ecx 336 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 337 338 ALIGN (4) 339L(128bytesormore): 340#ifdef SHARED_CACHE_SIZE 341 PUSH (%ebx) 342 mov $SHARED_CACHE_SIZE, %ebx 343#else 344# if (defined SHARED || defined __PIC__) 345 call __x86.get_pc_thunk.bx 346 add $_GLOBAL_OFFSET_TABLE_, %ebx 347 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 348# else 349 PUSH (%ebx) 350 mov __x86_shared_cache_size, %ebx 351# endif 352#endif 353 cmp %ebx, %ecx 354 jae L(128bytesormore_nt_start) 355 356 357#ifdef DATA_CACHE_SIZE 358 POP (%ebx) 359# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 360 cmp $DATA_CACHE_SIZE, %ecx 361#else 362# if (defined SHARED || defined __PIC__) 363# define RESTORE_EBX_STATE 364 call __x86.get_pc_thunk.bx 365 add $_GLOBAL_OFFSET_TABLE_, %ebx 366 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 367# else 368 POP (%ebx) 369# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 370 cmp __x86_data_cache_size, %ecx 371# endif 372#endif 373 374 jae L(128bytes_L2_normal) 375 subl $128, %ecx 376L(128bytesormore_normal): 377 sub $128, %ecx 378 movdqa %xmm0, (%edx) 379 movdqa %xmm0, 0x10(%edx) 380 movdqa %xmm0, 0x20(%edx) 381 movdqa %xmm0, 0x30(%edx) 382 movdqa %xmm0, 0x40(%edx) 383 movdqa %xmm0, 0x50(%edx) 384 movdqa %xmm0, 0x60(%edx) 385 movdqa %xmm0, 0x70(%edx) 386 lea 128(%edx), %edx 387 jb L(128bytesless_normal) 388 389 390 sub $128, %ecx 391 movdqa %xmm0, (%edx) 392 movdqa %xmm0, 0x10(%edx) 393 movdqa %xmm0, 0x20(%edx) 394 movdqa %xmm0, 0x30(%edx) 395 movdqa %xmm0, 0x40(%edx) 396 movdqa %xmm0, 0x50(%edx) 397 movdqa %xmm0, 0x60(%edx) 398 movdqa %xmm0, 0x70(%edx) 399 lea 128(%edx), %edx 400 jae L(128bytesormore_normal) 401 402L(128bytesless_normal): 403 lea 128(%ecx), %ecx 404 add %ecx, %edx 405 shr $1, %ecx 406 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 407 408 ALIGN (4) 409L(128bytes_L2_normal): 410 prefetcht0 0x380(%edx) 411 prefetcht0 0x3c0(%edx) 412 sub $128, %ecx 413 movdqa %xmm0, (%edx) 414 movaps %xmm0, 0x10(%edx) 415 movaps %xmm0, 0x20(%edx) 416 movaps %xmm0, 0x30(%edx) 417 movaps %xmm0, 0x40(%edx) 418 movaps %xmm0, 0x50(%edx) 419 movaps %xmm0, 0x60(%edx) 420 movaps %xmm0, 0x70(%edx) 421 add $128, %edx 422 cmp $128, %ecx 423 jae L(128bytes_L2_normal) 424 425L(128bytesless_L2_normal): 426 add %ecx, %edx 427 shr $1, %ecx 428 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 429 430 RESTORE_EBX_STATE 431L(128bytesormore_nt_start): 432 sub %ebx, %ecx 433 mov %ebx, %eax 434 and $0x7f, %eax 435 add %eax, %ecx 436 movd %xmm0, %eax 437 ALIGN (4) 438L(128bytesormore_shared_cache_loop): 439 prefetcht0 0x3c0(%edx) 440 prefetcht0 0x380(%edx) 441 sub $0x80, %ebx 442 movdqa %xmm0, (%edx) 443 movdqa %xmm0, 0x10(%edx) 444 movdqa %xmm0, 0x20(%edx) 445 movdqa %xmm0, 0x30(%edx) 446 movdqa %xmm0, 0x40(%edx) 447 movdqa %xmm0, 0x50(%edx) 448 movdqa %xmm0, 0x60(%edx) 449 movdqa %xmm0, 0x70(%edx) 450 add $0x80, %edx 451 cmp $0x80, %ebx 452 jae L(128bytesormore_shared_cache_loop) 453 cmp $0x80, %ecx 454 jb L(shared_cache_loop_end) 455 ALIGN (4) 456L(128bytesormore_nt): 457 sub $0x80, %ecx 458 movntdq %xmm0, (%edx) 459 movntdq %xmm0, 0x10(%edx) 460 movntdq %xmm0, 0x20(%edx) 461 movntdq %xmm0, 0x30(%edx) 462 movntdq %xmm0, 0x40(%edx) 463 movntdq %xmm0, 0x50(%edx) 464 movntdq %xmm0, 0x60(%edx) 465 movntdq %xmm0, 0x70(%edx) 466 add $0x80, %edx 467 cmp $0x80, %ecx 468 jae L(128bytesormore_nt) 469 sfence 470L(shared_cache_loop_end): 471#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 472 POP (%ebx) 473#endif 474 add %ecx, %edx 475 shr $1, %ecx 476 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 477 478 479 .pushsection .rodata.sse2,"a",@progbits 480 ALIGN (2) 481L(table_16_128bytes): 482 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 546 .popsection 547 548 549 ALIGN (4) 550L(aligned_16_112bytes): 551 movdqa %xmm0, -112(%edx) 552L(aligned_16_96bytes): 553 movdqa %xmm0, -96(%edx) 554L(aligned_16_80bytes): 555 movdqa %xmm0, -80(%edx) 556L(aligned_16_64bytes): 557 movdqa %xmm0, -64(%edx) 558L(aligned_16_48bytes): 559 movdqa %xmm0, -48(%edx) 560L(aligned_16_32bytes): 561 movdqa %xmm0, -32(%edx) 562L(aligned_16_16bytes): 563 movdqa %xmm0, -16(%edx) 564L(aligned_16_0bytes): 565 SETRTNVAL 566 RETURN 567 568 569 ALIGN (4) 570L(aligned_16_114bytes): 571 movdqa %xmm0, -114(%edx) 572L(aligned_16_98bytes): 573 movdqa %xmm0, -98(%edx) 574L(aligned_16_82bytes): 575 movdqa %xmm0, -82(%edx) 576L(aligned_16_66bytes): 577 movdqa %xmm0, -66(%edx) 578L(aligned_16_50bytes): 579 movdqa %xmm0, -50(%edx) 580L(aligned_16_34bytes): 581 movdqa %xmm0, -34(%edx) 582L(aligned_16_18bytes): 583 movdqa %xmm0, -18(%edx) 584L(aligned_16_2bytes): 585 movw %ax, -2(%edx) 586 SETRTNVAL 587 RETURN 588 589 ALIGN (4) 590L(aligned_16_116bytes): 591 movdqa %xmm0, -116(%edx) 592L(aligned_16_100bytes): 593 movdqa %xmm0, -100(%edx) 594L(aligned_16_84bytes): 595 movdqa %xmm0, -84(%edx) 596L(aligned_16_68bytes): 597 movdqa %xmm0, -68(%edx) 598L(aligned_16_52bytes): 599 movdqa %xmm0, -52(%edx) 600L(aligned_16_36bytes): 601 movdqa %xmm0, -36(%edx) 602L(aligned_16_20bytes): 603 movdqa %xmm0, -20(%edx) 604L(aligned_16_4bytes): 605 movl %eax, -4(%edx) 606 SETRTNVAL 607 RETURN 608 609 610 ALIGN (4) 611L(aligned_16_118bytes): 612 movdqa %xmm0, -118(%edx) 613L(aligned_16_102bytes): 614 movdqa %xmm0, -102(%edx) 615L(aligned_16_86bytes): 616 movdqa %xmm0, -86(%edx) 617L(aligned_16_70bytes): 618 movdqa %xmm0, -70(%edx) 619L(aligned_16_54bytes): 620 movdqa %xmm0, -54(%edx) 621L(aligned_16_38bytes): 622 movdqa %xmm0, -38(%edx) 623L(aligned_16_22bytes): 624 movdqa %xmm0, -22(%edx) 625L(aligned_16_6bytes): 626 movl %eax, -6(%edx) 627 movw %ax, -2(%edx) 628 SETRTNVAL 629 RETURN 630 631 632 ALIGN (4) 633L(aligned_16_120bytes): 634 movdqa %xmm0, -120(%edx) 635L(aligned_16_104bytes): 636 movdqa %xmm0, -104(%edx) 637L(aligned_16_88bytes): 638 movdqa %xmm0, -88(%edx) 639L(aligned_16_72bytes): 640 movdqa %xmm0, -72(%edx) 641L(aligned_16_56bytes): 642 movdqa %xmm0, -56(%edx) 643L(aligned_16_40bytes): 644 movdqa %xmm0, -40(%edx) 645L(aligned_16_24bytes): 646 movdqa %xmm0, -24(%edx) 647L(aligned_16_8bytes): 648 movq %xmm0, -8(%edx) 649 SETRTNVAL 650 RETURN 651 652 653 ALIGN (4) 654L(aligned_16_122bytes): 655 movdqa %xmm0, -122(%edx) 656L(aligned_16_106bytes): 657 movdqa %xmm0, -106(%edx) 658L(aligned_16_90bytes): 659 movdqa %xmm0, -90(%edx) 660L(aligned_16_74bytes): 661 movdqa %xmm0, -74(%edx) 662L(aligned_16_58bytes): 663 movdqa %xmm0, -58(%edx) 664L(aligned_16_42bytes): 665 movdqa %xmm0, -42(%edx) 666L(aligned_16_26bytes): 667 movdqa %xmm0, -26(%edx) 668L(aligned_16_10bytes): 669 movq %xmm0, -10(%edx) 670 movw %ax, -2(%edx) 671 SETRTNVAL 672 RETURN 673 674 675 ALIGN (4) 676L(aligned_16_124bytes): 677 movdqa %xmm0, -124(%edx) 678L(aligned_16_108bytes): 679 movdqa %xmm0, -108(%edx) 680L(aligned_16_92bytes): 681 movdqa %xmm0, -92(%edx) 682L(aligned_16_76bytes): 683 movdqa %xmm0, -76(%edx) 684L(aligned_16_60bytes): 685 movdqa %xmm0, -60(%edx) 686L(aligned_16_44bytes): 687 movdqa %xmm0, -44(%edx) 688L(aligned_16_28bytes): 689 movdqa %xmm0, -28(%edx) 690L(aligned_16_12bytes): 691 movq %xmm0, -12(%edx) 692 movl %eax, -4(%edx) 693 SETRTNVAL 694 RETURN 695 696 697 ALIGN (4) 698L(aligned_16_126bytes): 699 movdqa %xmm0, -126(%edx) 700L(aligned_16_110bytes): 701 movdqa %xmm0, -110(%edx) 702L(aligned_16_94bytes): 703 movdqa %xmm0, -94(%edx) 704L(aligned_16_78bytes): 705 movdqa %xmm0, -78(%edx) 706L(aligned_16_62bytes): 707 movdqa %xmm0, -62(%edx) 708L(aligned_16_46bytes): 709 movdqa %xmm0, -46(%edx) 710L(aligned_16_30bytes): 711 movdqa %xmm0, -30(%edx) 712L(aligned_16_14bytes): 713 movq %xmm0, -14(%edx) 714 movl %eax, -6(%edx) 715 movw %ax, -2(%edx) 716 SETRTNVAL 717 RETURN 718 719END (MEMSET) 720