1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include "cache.h" 32 33#ifndef MEMSET 34# define MEMSET memset 35#endif 36 37#ifndef L 38# define L(label) .L##label 39#endif 40 41#ifndef ALIGN 42# define ALIGN(n) .p2align n 43#endif 44 45#ifndef cfi_startproc 46# define cfi_startproc .cfi_startproc 47#endif 48 49#ifndef cfi_endproc 50# define cfi_endproc .cfi_endproc 51#endif 52 53#ifndef ENTRY 54# define ENTRY(name) \ 55 .type name, @function; \ 56 .globl name; \ 57name: \ 58 cfi_startproc 59#endif 60 61#ifndef END 62# define END(name) \ 63 cfi_endproc; \ 64 .size name, .-name 65#endif 66 67 .section .text.sse2,"ax",@progbits 68ENTRY (MEMSET) 69 movq %rdi, %rax 70#ifdef USE_AS_BZERO_P 71 mov %rsi, %rdx 72 xor %rcx, %rcx 73#else 74 and $0xff, %rsi 75 mov $0x0101010101010101, %rcx 76 imul %rsi, %rcx 77#endif 78 cmpq $16, %rdx 79 jae L(16bytesormore) 80 testb $8, %dl 81 jnz L(8_15bytes) 82 testb $4, %dl 83 jnz L(4_7bytes) 84 testb $2, %dl 85 jnz L(2_3bytes) 86 testb $1, %dl 87 jz L(return) 88 movb %cl, (%rdi) 89L(return): 90 ret 91 92L(8_15bytes): 93 movq %rcx, (%rdi) 94 movq %rcx, -8(%rdi, %rdx) 95 ret 96 97L(4_7bytes): 98 movl %ecx, (%rdi) 99 movl %ecx, -4(%rdi, %rdx) 100 ret 101 102L(2_3bytes): 103 movw %cx, (%rdi) 104 movw %cx, -2(%rdi, %rdx) 105 ret 106 107 ALIGN (4) 108L(16bytesormore): 109#ifdef USE_AS_BZERO_P 110 pxor %xmm0, %xmm0 111#else 112 movd %rcx, %xmm0 113 pshufd $0, %xmm0, %xmm0 114#endif 115 movdqu %xmm0, (%rdi) 116 movdqu %xmm0, -16(%rdi, %rdx) 117 cmpq $32, %rdx 118 jbe L(32bytesless) 119 movdqu %xmm0, 16(%rdi) 120 movdqu %xmm0, -32(%rdi, %rdx) 121 cmpq $64, %rdx 122 jbe L(64bytesless) 123 movdqu %xmm0, 32(%rdi) 124 movdqu %xmm0, 48(%rdi) 125 movdqu %xmm0, -64(%rdi, %rdx) 126 movdqu %xmm0, -48(%rdi, %rdx) 127 cmpq $128, %rdx 128 ja L(128bytesmore) 129L(32bytesless): 130L(64bytesless): 131 ret 132 133 ALIGN (4) 134L(128bytesmore): 135 leaq 64(%rdi), %rcx 136 andq $-64, %rcx 137 movq %rdx, %r8 138 addq %rdi, %rdx 139 andq $-64, %rdx 140 cmpq %rcx, %rdx 141 je L(return) 142 143#ifdef SHARED_CACHE_SIZE 144 cmp $SHARED_CACHE_SIZE, %r8 145#else 146 cmp __x86_64_shared_cache_size(%rip), %r8 147#endif 148 ja L(128bytesmore_nt) 149 150 ALIGN (4) 151L(128bytesmore_normal): 152 movdqa %xmm0, (%rcx) 153 movaps %xmm0, 0x10(%rcx) 154 movaps %xmm0, 0x20(%rcx) 155 movaps %xmm0, 0x30(%rcx) 156 addq $64, %rcx 157 cmpq %rcx, %rdx 158 jne L(128bytesmore_normal) 159 ret 160 161 ALIGN (4) 162L(128bytesmore_nt): 163 movntdq %xmm0, (%rcx) 164 movntdq %xmm0, 0x10(%rcx) 165 movntdq %xmm0, 0x20(%rcx) 166 movntdq %xmm0, 0x30(%rcx) 167 leaq 64(%rcx), %rcx 168 cmpq %rcx, %rdx 169 jne L(128bytesmore_nt) 170 sfence 171 ret 172 173END (MEMSET) 174