1 #ifndef ETHERBOOT_BITS_STRING_H
2 #define ETHERBOOT_BITS_STRING_H
3 /*
4  * Taken from Linux /usr/include/asm/string.h
5  * All except memcpy, memmove, memset and memcmp removed.
6  *
7  * Non-standard memswap() function added because it saves quite a bit
8  * of code (mbrown@fensystems.co.uk).
9  */
10 
11 /*
12  * This string-include defines all string functions as inline
13  * functions. Use gcc. It also assumes ds=es=data space, this should be
14  * normal. Most of the string-functions are rather heavily hand-optimized,
15  * see especially strtok,strstr,str[c]spn. They should work, but are not
16  * very easy to understand. Everything is done entirely within the register
17  * set, making the functions fast and clean. String instructions have been
18  * used through-out, making for "slightly" unclear code :-)
19  *
20  *		NO Copyright (C) 1991, 1992 Linus Torvalds,
21  *		consider these trivial functions to be PD.
22  */
23 
24 FILE_LICENCE ( PUBLIC_DOMAIN );
25 
26 #define __HAVE_ARCH_MEMCPY
27 
28 extern void * __memcpy ( void *dest, const void *src, size_t len );
29 
30 #if 0
31 static inline __attribute__ (( always_inline )) void *
32 __memcpy ( void *dest, const void *src, size_t len ) {
33 	int d0, d1, d2;
34 	__asm__ __volatile__ ( "rep ; movsb"
35 			       : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
36 			       : "0" ( len ), "1" ( src ), "2" ( dest )
37 			       : "memory" );
38 	return dest;
39 }
40 #endif
41 
42 static inline __attribute__ (( always_inline )) void *
__constant_memcpy(void * dest,const void * src,size_t len)43 __constant_memcpy ( void *dest, const void *src, size_t len ) {
44 	union {
45 		uint32_t u32[2];
46 		uint16_t u16[4];
47 		uint8_t  u8[8];
48 	} __attribute__ (( __may_alias__ )) *dest_u = dest;
49 	const union {
50 		uint32_t u32[2];
51 		uint16_t u16[4];
52 		uint8_t  u8[8];
53 	} __attribute__ (( __may_alias__ )) *src_u = src;
54 	const void *esi;
55 	void *edi;
56 
57 	switch ( len ) {
58 	case 0 : /* 0 bytes */
59 		return dest;
60 	/*
61 	 * Single-register moves; these are always better than a
62 	 * string operation.  We can clobber an arbitrary two
63 	 * registers (data, source, dest can re-use source register)
64 	 * instead of being restricted to esi and edi.  There's also a
65 	 * much greater potential for optimising with nearby code.
66 	 *
67 	 */
68 	case 1 : /* 4 bytes */
69 		dest_u->u8[0]  = src_u->u8[0];
70 		return dest;
71 	case 2 : /* 6 bytes */
72 		dest_u->u16[0] = src_u->u16[0];
73 		return dest;
74 	case 4 : /* 4 bytes */
75 		dest_u->u32[0] = src_u->u32[0];
76 		return dest;
77 	/*
78 	 * Double-register moves; these are probably still a win.
79 	 *
80 	 */
81 	case 3 : /* 12 bytes */
82 		dest_u->u16[0] = src_u->u16[0];
83 		dest_u->u8[2]  = src_u->u8[2];
84 		return dest;
85 	case 5 : /* 10 bytes */
86 		dest_u->u32[0] = src_u->u32[0];
87 		dest_u->u8[4]  = src_u->u8[4];
88 		return dest;
89 	case 6 : /* 12 bytes */
90 		dest_u->u32[0] = src_u->u32[0];
91 		dest_u->u16[2] = src_u->u16[2];
92 		return dest;
93 	case 8 : /* 10 bytes */
94 		dest_u->u32[0] = src_u->u32[0];
95 		dest_u->u32[1] = src_u->u32[1];
96 		return dest;
97 	}
98 
99 	/* Even if we have to load up esi and edi ready for a string
100 	 * operation, we can sometimes save space by using multiple
101 	 * single-byte "movs" operations instead of loading up ecx and
102 	 * using "rep movsb".
103 	 *
104 	 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
105 	 * to allow for saving/restoring ecx 50% of the time.
106 	 *
107 	 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
108 	 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
109 	 * but "movsl" moves twice as much data, so it balances out).
110 	 *
111 	 * The cutoff point therefore occurs around 26 bytes; the byte
112 	 * requirements for each method are:
113 	 *
114 	 * len		   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
115 	 * #bytes (ecx)	    8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
116 	 * #bytes (no ecx)  4  5  6  7  5  6  7  8  6  7  8  9  7  8  9 10
117 	 */
118 
119 	esi = src;
120 	edi = dest;
121 
122 	if ( len >= 26 )
123 		return __memcpy ( dest, src, len );
124 
125 	if ( len >= 6*4 )
126 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
127 				       : "0" ( edi ), "1" ( esi ) : "memory" );
128 	if ( len >= 5*4 )
129 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
130 				       : "0" ( edi ), "1" ( esi ) : "memory" );
131 	if ( len >= 4*4 )
132 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
133 				       : "0" ( edi ), "1" ( esi ) : "memory" );
134 	if ( len >= 3*4 )
135 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
136 				       : "0" ( edi ), "1" ( esi ) : "memory" );
137 	if ( len >= 2*4 )
138 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
139 				       : "0" ( edi ), "1" ( esi ) : "memory" );
140 	if ( len >= 1*4 )
141 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
142 				       : "0" ( edi ), "1" ( esi ) : "memory" );
143 	if ( ( len % 4 ) >= 2 )
144 		__asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
145 				       : "0" ( edi ), "1" ( esi ) : "memory" );
146 	if ( ( len % 2 ) >= 1 )
147 		__asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
148 				       : "0" ( edi ), "1" ( esi ) : "memory" );
149 
150 	return dest;
151 }
152 
153 #define memcpy( dest, src, len )			\
154 	( __builtin_constant_p ( (len) ) ?		\
155 	  __constant_memcpy ( (dest), (src), (len) ) :	\
156 	  __memcpy ( (dest), (src), (len) ) )
157 
158 #define __HAVE_ARCH_MEMMOVE
memmove(void * dest,const void * src,size_t n)159 static inline void * memmove(void * dest,const void * src, size_t n)
160 {
161 int d0, d1, d2;
162 if (dest<src)
163 __asm__ __volatile__(
164 	"cld\n\t"
165 	"rep\n\t"
166 	"movsb"
167 	: "=&c" (d0), "=&S" (d1), "=&D" (d2)
168 	:"0" (n),"1" (src),"2" (dest)
169 	: "memory");
170 else
171 __asm__ __volatile__(
172 	"std\n\t"
173 	"rep\n\t"
174 	"movsb\n\t"
175 	"cld"
176 	: "=&c" (d0), "=&S" (d1), "=&D" (d2)
177 	:"0" (n),
178 	 "1" (n-1+(const char *)src),
179 	 "2" (n-1+(char *)dest)
180 	:"memory");
181 return dest;
182 }
183 
184 #define __HAVE_ARCH_MEMSET
memset(void * s,int c,size_t count)185 static inline void * memset(void *s, int c,size_t count)
186 {
187 int d0, d1;
188 __asm__ __volatile__(
189 	"cld\n\t"
190 	"rep\n\t"
191 	"stosb"
192 	: "=&c" (d0), "=&D" (d1)
193 	:"a" (c),"1" (s),"0" (count)
194 	:"memory");
195 return s;
196 }
197 
198 #define __HAVE_ARCH_MEMSWAP
memswap(void * dest,void * src,size_t n)199 static inline void * memswap(void *dest, void *src, size_t n)
200 {
201 int d0, d1, d2, d3;
202 __asm__ __volatile__(
203 	"\n1:\t"
204 	"movb (%%edi),%%al\n\t"
205 	"xchgb (%%esi),%%al\n\t"
206 	"incl %%esi\n\t"
207 	"stosb\n\t"
208 	"loop 1b"
209 	: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
210 	: "0" (n), "1" (src), "2" (dest)
211 	: "memory" );
212 return dest;
213 }
214 
215 #define __HAVE_ARCH_STRNCMP
strncmp(const char * cs,const char * ct,size_t count)216 static inline int strncmp(const char * cs,const char * ct,size_t count)
217 {
218 register int __res;
219 int d0, d1, d2;
220 __asm__ __volatile__(
221 	"1:\tdecl %3\n\t"
222 	"js 2f\n\t"
223 	"lodsb\n\t"
224 	"scasb\n\t"
225 	"jne 3f\n\t"
226 	"testb %%al,%%al\n\t"
227 	"jne 1b\n"
228 	"2:\txorl %%eax,%%eax\n\t"
229 	"jmp 4f\n"
230 	"3:\tsbbl %%eax,%%eax\n\t"
231 	"orb $1,%%al\n"
232 	"4:"
233 		     :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
234 		     :"1" (cs),"2" (ct),"3" (count));
235 return __res;
236 }
237 
238 #define __HAVE_ARCH_STRLEN
strlen(const char * s)239 static inline size_t strlen(const char * s)
240 {
241 int d0;
242 register int __res;
243 __asm__ __volatile__(
244 	"repne\n\t"
245 	"scasb\n\t"
246 	"notl %0\n\t"
247 	"decl %0"
248 	:"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
249 return __res;
250 }
251 
252 #endif /* ETHERBOOT_BITS_STRING_H */
253