1 #ifndef ETHERBOOT_BITS_STRING_H
2 #define ETHERBOOT_BITS_STRING_H
3 /*
4 * Taken from Linux /usr/include/asm/string.h
5 * All except memcpy, memmove, memset and memcmp removed.
6 *
7 * Non-standard memswap() function added because it saves quite a bit
8 * of code (mbrown@fensystems.co.uk).
9 */
10
11 /*
12 * This string-include defines all string functions as inline
13 * functions. Use gcc. It also assumes ds=es=data space, this should be
14 * normal. Most of the string-functions are rather heavily hand-optimized,
15 * see especially strtok,strstr,str[c]spn. They should work, but are not
16 * very easy to understand. Everything is done entirely within the register
17 * set, making the functions fast and clean. String instructions have been
18 * used through-out, making for "slightly" unclear code :-)
19 *
20 * NO Copyright (C) 1991, 1992 Linus Torvalds,
21 * consider these trivial functions to be PD.
22 */
23
24 FILE_LICENCE ( PUBLIC_DOMAIN );
25
26 #define __HAVE_ARCH_MEMCPY
27
28 extern void * __memcpy ( void *dest, const void *src, size_t len );
29
30 #if 0
31 static inline __attribute__ (( always_inline )) void *
32 __memcpy ( void *dest, const void *src, size_t len ) {
33 int d0, d1, d2;
34 __asm__ __volatile__ ( "rep ; movsb"
35 : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
36 : "0" ( len ), "1" ( src ), "2" ( dest )
37 : "memory" );
38 return dest;
39 }
40 #endif
41
42 static inline __attribute__ (( always_inline )) void *
__constant_memcpy(void * dest,const void * src,size_t len)43 __constant_memcpy ( void *dest, const void *src, size_t len ) {
44 union {
45 uint32_t u32[2];
46 uint16_t u16[4];
47 uint8_t u8[8];
48 } __attribute__ (( __may_alias__ )) *dest_u = dest;
49 const union {
50 uint32_t u32[2];
51 uint16_t u16[4];
52 uint8_t u8[8];
53 } __attribute__ (( __may_alias__ )) *src_u = src;
54 const void *esi;
55 void *edi;
56
57 switch ( len ) {
58 case 0 : /* 0 bytes */
59 return dest;
60 /*
61 * Single-register moves; these are always better than a
62 * string operation. We can clobber an arbitrary two
63 * registers (data, source, dest can re-use source register)
64 * instead of being restricted to esi and edi. There's also a
65 * much greater potential for optimising with nearby code.
66 *
67 */
68 case 1 : /* 4 bytes */
69 dest_u->u8[0] = src_u->u8[0];
70 return dest;
71 case 2 : /* 6 bytes */
72 dest_u->u16[0] = src_u->u16[0];
73 return dest;
74 case 4 : /* 4 bytes */
75 dest_u->u32[0] = src_u->u32[0];
76 return dest;
77 /*
78 * Double-register moves; these are probably still a win.
79 *
80 */
81 case 3 : /* 12 bytes */
82 dest_u->u16[0] = src_u->u16[0];
83 dest_u->u8[2] = src_u->u8[2];
84 return dest;
85 case 5 : /* 10 bytes */
86 dest_u->u32[0] = src_u->u32[0];
87 dest_u->u8[4] = src_u->u8[4];
88 return dest;
89 case 6 : /* 12 bytes */
90 dest_u->u32[0] = src_u->u32[0];
91 dest_u->u16[2] = src_u->u16[2];
92 return dest;
93 case 8 : /* 10 bytes */
94 dest_u->u32[0] = src_u->u32[0];
95 dest_u->u32[1] = src_u->u32[1];
96 return dest;
97 }
98
99 /* Even if we have to load up esi and edi ready for a string
100 * operation, we can sometimes save space by using multiple
101 * single-byte "movs" operations instead of loading up ecx and
102 * using "rep movsb".
103 *
104 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
105 * to allow for saving/restoring ecx 50% of the time.
106 *
107 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
108 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
109 * but "movsl" moves twice as much data, so it balances out).
110 *
111 * The cutoff point therefore occurs around 26 bytes; the byte
112 * requirements for each method are:
113 *
114 * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
115 * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
116 * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
117 */
118
119 esi = src;
120 edi = dest;
121
122 if ( len >= 26 )
123 return __memcpy ( dest, src, len );
124
125 if ( len >= 6*4 )
126 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
127 : "0" ( edi ), "1" ( esi ) : "memory" );
128 if ( len >= 5*4 )
129 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
130 : "0" ( edi ), "1" ( esi ) : "memory" );
131 if ( len >= 4*4 )
132 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
133 : "0" ( edi ), "1" ( esi ) : "memory" );
134 if ( len >= 3*4 )
135 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
136 : "0" ( edi ), "1" ( esi ) : "memory" );
137 if ( len >= 2*4 )
138 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
139 : "0" ( edi ), "1" ( esi ) : "memory" );
140 if ( len >= 1*4 )
141 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
142 : "0" ( edi ), "1" ( esi ) : "memory" );
143 if ( ( len % 4 ) >= 2 )
144 __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
145 : "0" ( edi ), "1" ( esi ) : "memory" );
146 if ( ( len % 2 ) >= 1 )
147 __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
148 : "0" ( edi ), "1" ( esi ) : "memory" );
149
150 return dest;
151 }
152
153 #define memcpy( dest, src, len ) \
154 ( __builtin_constant_p ( (len) ) ? \
155 __constant_memcpy ( (dest), (src), (len) ) : \
156 __memcpy ( (dest), (src), (len) ) )
157
158 #define __HAVE_ARCH_MEMMOVE
memmove(void * dest,const void * src,size_t n)159 static inline void * memmove(void * dest,const void * src, size_t n)
160 {
161 int d0, d1, d2;
162 if (dest<src)
163 __asm__ __volatile__(
164 "cld\n\t"
165 "rep\n\t"
166 "movsb"
167 : "=&c" (d0), "=&S" (d1), "=&D" (d2)
168 :"0" (n),"1" (src),"2" (dest)
169 : "memory");
170 else
171 __asm__ __volatile__(
172 "std\n\t"
173 "rep\n\t"
174 "movsb\n\t"
175 "cld"
176 : "=&c" (d0), "=&S" (d1), "=&D" (d2)
177 :"0" (n),
178 "1" (n-1+(const char *)src),
179 "2" (n-1+(char *)dest)
180 :"memory");
181 return dest;
182 }
183
184 #define __HAVE_ARCH_MEMSET
memset(void * s,int c,size_t count)185 static inline void * memset(void *s, int c,size_t count)
186 {
187 int d0, d1;
188 __asm__ __volatile__(
189 "cld\n\t"
190 "rep\n\t"
191 "stosb"
192 : "=&c" (d0), "=&D" (d1)
193 :"a" (c),"1" (s),"0" (count)
194 :"memory");
195 return s;
196 }
197
198 #define __HAVE_ARCH_MEMSWAP
memswap(void * dest,void * src,size_t n)199 static inline void * memswap(void *dest, void *src, size_t n)
200 {
201 int d0, d1, d2, d3;
202 __asm__ __volatile__(
203 "\n1:\t"
204 "movb (%%edi),%%al\n\t"
205 "xchgb (%%esi),%%al\n\t"
206 "incl %%esi\n\t"
207 "stosb\n\t"
208 "loop 1b"
209 : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
210 : "0" (n), "1" (src), "2" (dest)
211 : "memory" );
212 return dest;
213 }
214
215 #define __HAVE_ARCH_STRNCMP
strncmp(const char * cs,const char * ct,size_t count)216 static inline int strncmp(const char * cs,const char * ct,size_t count)
217 {
218 register int __res;
219 int d0, d1, d2;
220 __asm__ __volatile__(
221 "1:\tdecl %3\n\t"
222 "js 2f\n\t"
223 "lodsb\n\t"
224 "scasb\n\t"
225 "jne 3f\n\t"
226 "testb %%al,%%al\n\t"
227 "jne 1b\n"
228 "2:\txorl %%eax,%%eax\n\t"
229 "jmp 4f\n"
230 "3:\tsbbl %%eax,%%eax\n\t"
231 "orb $1,%%al\n"
232 "4:"
233 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
234 :"1" (cs),"2" (ct),"3" (count));
235 return __res;
236 }
237
238 #define __HAVE_ARCH_STRLEN
strlen(const char * s)239 static inline size_t strlen(const char * s)
240 {
241 int d0;
242 register int __res;
243 __asm__ __volatile__(
244 "repne\n\t"
245 "scasb\n\t"
246 "notl %0\n\t"
247 "decl %0"
248 :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
249 return __res;
250 }
251
252 #endif /* ETHERBOOT_BITS_STRING_H */
253