1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include "cache.h" 32 33#ifndef L 34# define L(label) .L##label 35#endif 36 37#ifndef ALIGN 38# define ALIGN(n) .p2align n 39#endif 40 41#ifndef cfi_startproc 42# define cfi_startproc .cfi_startproc 43#endif 44 45#ifndef cfi_endproc 46# define cfi_endproc .cfi_endproc 47#endif 48 49#ifndef cfi_rel_offset 50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 51#endif 52 53#ifndef cfi_restore 54# define cfi_restore(reg) .cfi_restore reg 55#endif 56 57#ifndef cfi_adjust_cfa_offset 58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 59#endif 60 61#ifndef ENTRY 62# define ENTRY(name) \ 63 .type name, @function; \ 64 .globl name; \ 65 .p2align 4; \ 66name: \ 67 cfi_startproc 68#endif 69 70#ifndef END 71# define END(name) \ 72 cfi_endproc; \ 73 .size name, .-name 74#endif 75 76#define CFI_PUSH(REG) \ 77 cfi_adjust_cfa_offset (4); \ 78 cfi_rel_offset (REG, 0) 79 80#define CFI_POP(REG) \ 81 cfi_adjust_cfa_offset (-4); \ 82 cfi_restore (REG) 83 84#define PUSH(REG) pushl REG; CFI_PUSH (REG) 85#define POP(REG) popl REG; CFI_POP (REG) 86 87#ifdef USE_AS_BZERO 88# define DEST PARMS 89# define LEN DEST+4 90# define SETRTNVAL 91#else 92# define DEST PARMS 93# define CHR DEST+4 94# define LEN CHR+4 95# define SETRTNVAL movl DEST(%esp), %eax 96#endif 97 98#if (defined SHARED || defined __PIC__) 99# define ENTRANCE PUSH (%ebx); 100# define RETURN_END POP (%ebx); ret 101# define RETURN RETURN_END; CFI_PUSH (%ebx) 102# define PARMS 8 /* Preserve EBX. */ 103# define JMPTBL(I, B) I - B 104 105/* Load an entry in a jump table into EBX and branch to it. TABLE is a 106 jump table with relative offsets. */ 107# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 108 /* We first load PC into EBX. */ \ 109 call __x86.get_pc_thunk.bx; \ 110 /* Get the address of the jump table. */ \ 111 add $(TABLE - .), %ebx; \ 112 /* Get the entry and convert the relative offset to the \ 113 absolute address. */ \ 114 add (%ebx,%ecx,4), %ebx; \ 115 add %ecx, %edx; \ 116 /* We loaded the jump table and adjuested EDX. Go. */ \ 117 jmp *%ebx 118 119 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 120 .globl __x86.get_pc_thunk.bx 121 .hidden __x86.get_pc_thunk.bx 122 ALIGN (4) 123 .type __x86.get_pc_thunk.bx,@function 124__x86.get_pc_thunk.bx: 125 movl (%esp), %ebx 126 ret 127#else 128# define ENTRANCE 129# define RETURN_END ret 130# define RETURN RETURN_END 131# define PARMS 4 132# define JMPTBL(I, B) I 133 134/* Branch to an entry in a jump table. TABLE is a jump table with 135 absolute offsets. */ 136# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 137 add %ecx, %edx; \ 138 jmp *TABLE(,%ecx,4) 139#endif 140 141#ifndef MEMSET 142# define MEMSET memset 143#endif 144 145 .section .text.sse2,"ax",@progbits 146 ALIGN (4) 147ENTRY (MEMSET) 148 ENTRANCE 149 150 movl LEN(%esp), %ecx 151#ifdef USE_AS_BZERO 152 xor %eax, %eax 153#else 154 movzbl CHR(%esp), %eax 155 movb %al, %ah 156 /* Fill the whole EAX with pattern. */ 157 movl %eax, %edx 158 shl $16, %eax 159 or %edx, %eax 160#endif 161 movl DEST(%esp), %edx 162 cmp $32, %ecx 163 jae L(32bytesormore) 164 165L(write_less32bytes): 166 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 167 168 169 .pushsection .rodata.sse2,"a",@progbits 170 ALIGN (2) 171L(table_less_32bytes): 172 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 173 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 174 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 175 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 176 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 177 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 178 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 179 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 180 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 181 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 182 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 183 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 184 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 185 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 186 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 187 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 188 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 189 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 190 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 191 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 192 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 193 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 194 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 195 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 196 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 197 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 198 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 199 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 200 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 201 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 202 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 203 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 204 .popsection 205 206 ALIGN (4) 207L(write_28bytes): 208 movl %eax, -28(%edx) 209L(write_24bytes): 210 movl %eax, -24(%edx) 211L(write_20bytes): 212 movl %eax, -20(%edx) 213L(write_16bytes): 214 movl %eax, -16(%edx) 215L(write_12bytes): 216 movl %eax, -12(%edx) 217L(write_8bytes): 218 movl %eax, -8(%edx) 219L(write_4bytes): 220 movl %eax, -4(%edx) 221L(write_0bytes): 222 SETRTNVAL 223 RETURN 224 225 ALIGN (4) 226L(write_29bytes): 227 movl %eax, -29(%edx) 228L(write_25bytes): 229 movl %eax, -25(%edx) 230L(write_21bytes): 231 movl %eax, -21(%edx) 232L(write_17bytes): 233 movl %eax, -17(%edx) 234L(write_13bytes): 235 movl %eax, -13(%edx) 236L(write_9bytes): 237 movl %eax, -9(%edx) 238L(write_5bytes): 239 movl %eax, -5(%edx) 240L(write_1bytes): 241 movb %al, -1(%edx) 242 SETRTNVAL 243 RETURN 244 245 ALIGN (4) 246L(write_30bytes): 247 movl %eax, -30(%edx) 248L(write_26bytes): 249 movl %eax, -26(%edx) 250L(write_22bytes): 251 movl %eax, -22(%edx) 252L(write_18bytes): 253 movl %eax, -18(%edx) 254L(write_14bytes): 255 movl %eax, -14(%edx) 256L(write_10bytes): 257 movl %eax, -10(%edx) 258L(write_6bytes): 259 movl %eax, -6(%edx) 260L(write_2bytes): 261 movw %ax, -2(%edx) 262 SETRTNVAL 263 RETURN 264 265 ALIGN (4) 266L(write_31bytes): 267 movl %eax, -31(%edx) 268L(write_27bytes): 269 movl %eax, -27(%edx) 270L(write_23bytes): 271 movl %eax, -23(%edx) 272L(write_19bytes): 273 movl %eax, -19(%edx) 274L(write_15bytes): 275 movl %eax, -15(%edx) 276L(write_11bytes): 277 movl %eax, -11(%edx) 278L(write_7bytes): 279 movl %eax, -7(%edx) 280L(write_3bytes): 281 movw %ax, -3(%edx) 282 movb %al, -1(%edx) 283 SETRTNVAL 284 RETURN 285 286 ALIGN (4) 287/* ECX > 32 and EDX is 4 byte aligned. */ 288L(32bytesormore): 289 /* Fill xmm0 with the pattern. */ 290#ifdef USE_AS_BZERO 291 pxor %xmm0, %xmm0 292#else 293 movd %eax, %xmm0 294 pshufd $0, %xmm0, %xmm0 295#endif 296 testl $0xf, %edx 297 jz L(aligned_16) 298/* ECX > 32 and EDX is not 16 byte aligned. */ 299L(not_aligned_16): 300 movdqu %xmm0, (%edx) 301 movl %edx, %eax 302 and $-16, %edx 303 add $16, %edx 304 sub %edx, %eax 305 add %eax, %ecx 306 movd %xmm0, %eax 307 308 ALIGN (4) 309L(aligned_16): 310 cmp $128, %ecx 311 jae L(128bytesormore) 312 313L(aligned_16_less128bytes): 314 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 315 316 ALIGN (4) 317L(128bytesormore): 318#ifdef SHARED_CACHE_SIZE 319 PUSH (%ebx) 320 mov $SHARED_CACHE_SIZE, %ebx 321#else 322# if (defined SHARED || defined __PIC__) 323 call __x86.get_pc_thunk.bx 324 add $_GLOBAL_OFFSET_TABLE_, %ebx 325 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 326# else 327 PUSH (%ebx) 328 mov __x86_shared_cache_size, %ebx 329# endif 330#endif 331 cmp %ebx, %ecx 332 jae L(128bytesormore_nt_start) 333 334 335#ifdef DATA_CACHE_SIZE 336 POP (%ebx) 337# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 338 cmp $DATA_CACHE_SIZE, %ecx 339#else 340# if (defined SHARED || defined __PIC__) 341# define RESTORE_EBX_STATE 342 call __x86.get_pc_thunk.bx 343 add $_GLOBAL_OFFSET_TABLE_, %ebx 344 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 345# else 346 POP (%ebx) 347# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 348 cmp __x86_data_cache_size, %ecx 349# endif 350#endif 351 352 jae L(128bytes_L2_normal) 353 subl $128, %ecx 354L(128bytesormore_normal): 355 sub $128, %ecx 356 movdqa %xmm0, (%edx) 357 movdqa %xmm0, 0x10(%edx) 358 movdqa %xmm0, 0x20(%edx) 359 movdqa %xmm0, 0x30(%edx) 360 movdqa %xmm0, 0x40(%edx) 361 movdqa %xmm0, 0x50(%edx) 362 movdqa %xmm0, 0x60(%edx) 363 movdqa %xmm0, 0x70(%edx) 364 lea 128(%edx), %edx 365 jb L(128bytesless_normal) 366 367 368 sub $128, %ecx 369 movdqa %xmm0, (%edx) 370 movdqa %xmm0, 0x10(%edx) 371 movdqa %xmm0, 0x20(%edx) 372 movdqa %xmm0, 0x30(%edx) 373 movdqa %xmm0, 0x40(%edx) 374 movdqa %xmm0, 0x50(%edx) 375 movdqa %xmm0, 0x60(%edx) 376 movdqa %xmm0, 0x70(%edx) 377 lea 128(%edx), %edx 378 jae L(128bytesormore_normal) 379 380L(128bytesless_normal): 381 add $128, %ecx 382 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 383 384 ALIGN (4) 385L(128bytes_L2_normal): 386 prefetcht0 0x380(%edx) 387 prefetcht0 0x3c0(%edx) 388 sub $128, %ecx 389 movdqa %xmm0, (%edx) 390 movaps %xmm0, 0x10(%edx) 391 movaps %xmm0, 0x20(%edx) 392 movaps %xmm0, 0x30(%edx) 393 movaps %xmm0, 0x40(%edx) 394 movaps %xmm0, 0x50(%edx) 395 movaps %xmm0, 0x60(%edx) 396 movaps %xmm0, 0x70(%edx) 397 add $128, %edx 398 cmp $128, %ecx 399 jae L(128bytes_L2_normal) 400 401L(128bytesless_L2_normal): 402 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 403 404 RESTORE_EBX_STATE 405L(128bytesormore_nt_start): 406 sub %ebx, %ecx 407 mov %ebx, %eax 408 and $0x7f, %eax 409 add %eax, %ecx 410 movd %xmm0, %eax 411 ALIGN (4) 412L(128bytesormore_shared_cache_loop): 413 prefetcht0 0x3c0(%edx) 414 prefetcht0 0x380(%edx) 415 sub $0x80, %ebx 416 movdqa %xmm0, (%edx) 417 movdqa %xmm0, 0x10(%edx) 418 movdqa %xmm0, 0x20(%edx) 419 movdqa %xmm0, 0x30(%edx) 420 movdqa %xmm0, 0x40(%edx) 421 movdqa %xmm0, 0x50(%edx) 422 movdqa %xmm0, 0x60(%edx) 423 movdqa %xmm0, 0x70(%edx) 424 add $0x80, %edx 425 cmp $0x80, %ebx 426 jae L(128bytesormore_shared_cache_loop) 427 cmp $0x80, %ecx 428 jb L(shared_cache_loop_end) 429 ALIGN (4) 430L(128bytesormore_nt): 431 sub $0x80, %ecx 432 movntdq %xmm0, (%edx) 433 movntdq %xmm0, 0x10(%edx) 434 movntdq %xmm0, 0x20(%edx) 435 movntdq %xmm0, 0x30(%edx) 436 movntdq %xmm0, 0x40(%edx) 437 movntdq %xmm0, 0x50(%edx) 438 movntdq %xmm0, 0x60(%edx) 439 movntdq %xmm0, 0x70(%edx) 440 add $0x80, %edx 441 cmp $0x80, %ecx 442 jae L(128bytesormore_nt) 443 sfence 444L(shared_cache_loop_end): 445#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 446 POP (%ebx) 447#endif 448 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 449 450 451 .pushsection .rodata.sse2,"a",@progbits 452 ALIGN (2) 453L(table_16_128bytes): 454 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 549 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 550 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 551 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 552 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 553 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 554 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 555 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 556 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 557 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 558 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 559 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 560 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 561 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 562 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 563 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 564 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 565 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 566 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 567 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 568 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 569 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 570 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 571 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 572 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 573 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 574 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 575 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 576 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 577 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 578 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 579 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 580 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 581 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 582 .popsection 583 584 ALIGN (4) 585L(aligned_16_112bytes): 586 movdqa %xmm0, -112(%edx) 587L(aligned_16_96bytes): 588 movdqa %xmm0, -96(%edx) 589L(aligned_16_80bytes): 590 movdqa %xmm0, -80(%edx) 591L(aligned_16_64bytes): 592 movdqa %xmm0, -64(%edx) 593L(aligned_16_48bytes): 594 movdqa %xmm0, -48(%edx) 595L(aligned_16_32bytes): 596 movdqa %xmm0, -32(%edx) 597L(aligned_16_16bytes): 598 movdqa %xmm0, -16(%edx) 599L(aligned_16_0bytes): 600 SETRTNVAL 601 RETURN 602 603 ALIGN (4) 604L(aligned_16_113bytes): 605 movdqa %xmm0, -113(%edx) 606L(aligned_16_97bytes): 607 movdqa %xmm0, -97(%edx) 608L(aligned_16_81bytes): 609 movdqa %xmm0, -81(%edx) 610L(aligned_16_65bytes): 611 movdqa %xmm0, -65(%edx) 612L(aligned_16_49bytes): 613 movdqa %xmm0, -49(%edx) 614L(aligned_16_33bytes): 615 movdqa %xmm0, -33(%edx) 616L(aligned_16_17bytes): 617 movdqa %xmm0, -17(%edx) 618L(aligned_16_1bytes): 619 movb %al, -1(%edx) 620 SETRTNVAL 621 RETURN 622 623 ALIGN (4) 624L(aligned_16_114bytes): 625 movdqa %xmm0, -114(%edx) 626L(aligned_16_98bytes): 627 movdqa %xmm0, -98(%edx) 628L(aligned_16_82bytes): 629 movdqa %xmm0, -82(%edx) 630L(aligned_16_66bytes): 631 movdqa %xmm0, -66(%edx) 632L(aligned_16_50bytes): 633 movdqa %xmm0, -50(%edx) 634L(aligned_16_34bytes): 635 movdqa %xmm0, -34(%edx) 636L(aligned_16_18bytes): 637 movdqa %xmm0, -18(%edx) 638L(aligned_16_2bytes): 639 movw %ax, -2(%edx) 640 SETRTNVAL 641 RETURN 642 643 ALIGN (4) 644L(aligned_16_115bytes): 645 movdqa %xmm0, -115(%edx) 646L(aligned_16_99bytes): 647 movdqa %xmm0, -99(%edx) 648L(aligned_16_83bytes): 649 movdqa %xmm0, -83(%edx) 650L(aligned_16_67bytes): 651 movdqa %xmm0, -67(%edx) 652L(aligned_16_51bytes): 653 movdqa %xmm0, -51(%edx) 654L(aligned_16_35bytes): 655 movdqa %xmm0, -35(%edx) 656L(aligned_16_19bytes): 657 movdqa %xmm0, -19(%edx) 658L(aligned_16_3bytes): 659 movw %ax, -3(%edx) 660 movb %al, -1(%edx) 661 SETRTNVAL 662 RETURN 663 664 ALIGN (4) 665L(aligned_16_116bytes): 666 movdqa %xmm0, -116(%edx) 667L(aligned_16_100bytes): 668 movdqa %xmm0, -100(%edx) 669L(aligned_16_84bytes): 670 movdqa %xmm0, -84(%edx) 671L(aligned_16_68bytes): 672 movdqa %xmm0, -68(%edx) 673L(aligned_16_52bytes): 674 movdqa %xmm0, -52(%edx) 675L(aligned_16_36bytes): 676 movdqa %xmm0, -36(%edx) 677L(aligned_16_20bytes): 678 movdqa %xmm0, -20(%edx) 679L(aligned_16_4bytes): 680 movl %eax, -4(%edx) 681 SETRTNVAL 682 RETURN 683 684 ALIGN (4) 685L(aligned_16_117bytes): 686 movdqa %xmm0, -117(%edx) 687L(aligned_16_101bytes): 688 movdqa %xmm0, -101(%edx) 689L(aligned_16_85bytes): 690 movdqa %xmm0, -85(%edx) 691L(aligned_16_69bytes): 692 movdqa %xmm0, -69(%edx) 693L(aligned_16_53bytes): 694 movdqa %xmm0, -53(%edx) 695L(aligned_16_37bytes): 696 movdqa %xmm0, -37(%edx) 697L(aligned_16_21bytes): 698 movdqa %xmm0, -21(%edx) 699L(aligned_16_5bytes): 700 movl %eax, -5(%edx) 701 movb %al, -1(%edx) 702 SETRTNVAL 703 RETURN 704 705 ALIGN (4) 706L(aligned_16_118bytes): 707 movdqa %xmm0, -118(%edx) 708L(aligned_16_102bytes): 709 movdqa %xmm0, -102(%edx) 710L(aligned_16_86bytes): 711 movdqa %xmm0, -86(%edx) 712L(aligned_16_70bytes): 713 movdqa %xmm0, -70(%edx) 714L(aligned_16_54bytes): 715 movdqa %xmm0, -54(%edx) 716L(aligned_16_38bytes): 717 movdqa %xmm0, -38(%edx) 718L(aligned_16_22bytes): 719 movdqa %xmm0, -22(%edx) 720L(aligned_16_6bytes): 721 movl %eax, -6(%edx) 722 movw %ax, -2(%edx) 723 SETRTNVAL 724 RETURN 725 726 ALIGN (4) 727L(aligned_16_119bytes): 728 movdqa %xmm0, -119(%edx) 729L(aligned_16_103bytes): 730 movdqa %xmm0, -103(%edx) 731L(aligned_16_87bytes): 732 movdqa %xmm0, -87(%edx) 733L(aligned_16_71bytes): 734 movdqa %xmm0, -71(%edx) 735L(aligned_16_55bytes): 736 movdqa %xmm0, -55(%edx) 737L(aligned_16_39bytes): 738 movdqa %xmm0, -39(%edx) 739L(aligned_16_23bytes): 740 movdqa %xmm0, -23(%edx) 741L(aligned_16_7bytes): 742 movl %eax, -7(%edx) 743 movw %ax, -3(%edx) 744 movb %al, -1(%edx) 745 SETRTNVAL 746 RETURN 747 748 ALIGN (4) 749L(aligned_16_120bytes): 750 movdqa %xmm0, -120(%edx) 751L(aligned_16_104bytes): 752 movdqa %xmm0, -104(%edx) 753L(aligned_16_88bytes): 754 movdqa %xmm0, -88(%edx) 755L(aligned_16_72bytes): 756 movdqa %xmm0, -72(%edx) 757L(aligned_16_56bytes): 758 movdqa %xmm0, -56(%edx) 759L(aligned_16_40bytes): 760 movdqa %xmm0, -40(%edx) 761L(aligned_16_24bytes): 762 movdqa %xmm0, -24(%edx) 763L(aligned_16_8bytes): 764 movq %xmm0, -8(%edx) 765 SETRTNVAL 766 RETURN 767 768 ALIGN (4) 769L(aligned_16_121bytes): 770 movdqa %xmm0, -121(%edx) 771L(aligned_16_105bytes): 772 movdqa %xmm0, -105(%edx) 773L(aligned_16_89bytes): 774 movdqa %xmm0, -89(%edx) 775L(aligned_16_73bytes): 776 movdqa %xmm0, -73(%edx) 777L(aligned_16_57bytes): 778 movdqa %xmm0, -57(%edx) 779L(aligned_16_41bytes): 780 movdqa %xmm0, -41(%edx) 781L(aligned_16_25bytes): 782 movdqa %xmm0, -25(%edx) 783L(aligned_16_9bytes): 784 movq %xmm0, -9(%edx) 785 movb %al, -1(%edx) 786 SETRTNVAL 787 RETURN 788 789 ALIGN (4) 790L(aligned_16_122bytes): 791 movdqa %xmm0, -122(%edx) 792L(aligned_16_106bytes): 793 movdqa %xmm0, -106(%edx) 794L(aligned_16_90bytes): 795 movdqa %xmm0, -90(%edx) 796L(aligned_16_74bytes): 797 movdqa %xmm0, -74(%edx) 798L(aligned_16_58bytes): 799 movdqa %xmm0, -58(%edx) 800L(aligned_16_42bytes): 801 movdqa %xmm0, -42(%edx) 802L(aligned_16_26bytes): 803 movdqa %xmm0, -26(%edx) 804L(aligned_16_10bytes): 805 movq %xmm0, -10(%edx) 806 movw %ax, -2(%edx) 807 SETRTNVAL 808 RETURN 809 810 ALIGN (4) 811L(aligned_16_123bytes): 812 movdqa %xmm0, -123(%edx) 813L(aligned_16_107bytes): 814 movdqa %xmm0, -107(%edx) 815L(aligned_16_91bytes): 816 movdqa %xmm0, -91(%edx) 817L(aligned_16_75bytes): 818 movdqa %xmm0, -75(%edx) 819L(aligned_16_59bytes): 820 movdqa %xmm0, -59(%edx) 821L(aligned_16_43bytes): 822 movdqa %xmm0, -43(%edx) 823L(aligned_16_27bytes): 824 movdqa %xmm0, -27(%edx) 825L(aligned_16_11bytes): 826 movq %xmm0, -11(%edx) 827 movw %ax, -3(%edx) 828 movb %al, -1(%edx) 829 SETRTNVAL 830 RETURN 831 832 ALIGN (4) 833L(aligned_16_124bytes): 834 movdqa %xmm0, -124(%edx) 835L(aligned_16_108bytes): 836 movdqa %xmm0, -108(%edx) 837L(aligned_16_92bytes): 838 movdqa %xmm0, -92(%edx) 839L(aligned_16_76bytes): 840 movdqa %xmm0, -76(%edx) 841L(aligned_16_60bytes): 842 movdqa %xmm0, -60(%edx) 843L(aligned_16_44bytes): 844 movdqa %xmm0, -44(%edx) 845L(aligned_16_28bytes): 846 movdqa %xmm0, -28(%edx) 847L(aligned_16_12bytes): 848 movq %xmm0, -12(%edx) 849 movl %eax, -4(%edx) 850 SETRTNVAL 851 RETURN 852 853 ALIGN (4) 854L(aligned_16_125bytes): 855 movdqa %xmm0, -125(%edx) 856L(aligned_16_109bytes): 857 movdqa %xmm0, -109(%edx) 858L(aligned_16_93bytes): 859 movdqa %xmm0, -93(%edx) 860L(aligned_16_77bytes): 861 movdqa %xmm0, -77(%edx) 862L(aligned_16_61bytes): 863 movdqa %xmm0, -61(%edx) 864L(aligned_16_45bytes): 865 movdqa %xmm0, -45(%edx) 866L(aligned_16_29bytes): 867 movdqa %xmm0, -29(%edx) 868L(aligned_16_13bytes): 869 movq %xmm0, -13(%edx) 870 movl %eax, -5(%edx) 871 movb %al, -1(%edx) 872 SETRTNVAL 873 RETURN 874 875 ALIGN (4) 876L(aligned_16_126bytes): 877 movdqa %xmm0, -126(%edx) 878L(aligned_16_110bytes): 879 movdqa %xmm0, -110(%edx) 880L(aligned_16_94bytes): 881 movdqa %xmm0, -94(%edx) 882L(aligned_16_78bytes): 883 movdqa %xmm0, -78(%edx) 884L(aligned_16_62bytes): 885 movdqa %xmm0, -62(%edx) 886L(aligned_16_46bytes): 887 movdqa %xmm0, -46(%edx) 888L(aligned_16_30bytes): 889 movdqa %xmm0, -30(%edx) 890L(aligned_16_14bytes): 891 movq %xmm0, -14(%edx) 892 movl %eax, -6(%edx) 893 movw %ax, -2(%edx) 894 SETRTNVAL 895 RETURN 896 897 ALIGN (4) 898L(aligned_16_127bytes): 899 movdqa %xmm0, -127(%edx) 900L(aligned_16_111bytes): 901 movdqa %xmm0, -111(%edx) 902L(aligned_16_95bytes): 903 movdqa %xmm0, -95(%edx) 904L(aligned_16_79bytes): 905 movdqa %xmm0, -79(%edx) 906L(aligned_16_63bytes): 907 movdqa %xmm0, -63(%edx) 908L(aligned_16_47bytes): 909 movdqa %xmm0, -47(%edx) 910L(aligned_16_31bytes): 911 movdqa %xmm0, -31(%edx) 912L(aligned_16_15bytes): 913 movq %xmm0, -15(%edx) 914 movl %eax, -7(%edx) 915 movw %ax, -3(%edx) 916 movb %al, -1(%edx) 917 SETRTNVAL 918 RETURN_END 919 920END (MEMSET) 921