1 /*
2  * Copyright (c) 2017 Imagination Technologies.
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *      * Redistributions of source code must retain the above copyright
11  *        notice, this list of conditions and the following disclaimer.
12  *      * Redistributions in binary form must reproduce the above copyright
13  *        notice, this list of conditions and the following disclaimer
14  *        in the documentation and/or other materials provided with
15  *        the distribution.
16  *      * Neither the name of Imagination Technologies nor the names of its
17  *        contributors may be used to endorse or promote products derived
18  *        from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <string.h>
34 
35 #if !defined(UNALIGNED_INSTR_SUPPORT)
36 /* does target have unaligned lw/ld/ualw/uald instructions? */
37 #define UNALIGNED_INSTR_SUPPORT 0
38 #if __mips_isa_rev < 6 && !__mips1
39 #undef UNALIGNED_INSTR_SUPPORT
40 #define UNALIGNED_INSTR_SUPPORT 1
41 #endif
42 #endif
43 
44 #if !defined(HW_UNALIGNED_SUPPORT)
45 /* Does target have hardware support for unaligned accesses?  */
46 #define HW_UNALIGNED_SUPPORT 0
47 #if __mips_isa_rev >= 6
48 #undef HW_UNALIGNED_SUPPORT
49 #define HW_UNALIGNED_SUPPORT 1
50 #endif
51 #endif
52 
53 #define ENABLE_PREFETCH     1
54 
55 #if ENABLE_PREFETCH
56 #define PREFETCH(addr)  __builtin_prefetch (addr, 0, 1);
57 #else
58 #define PREFETCH(addr)
59 #endif
60 
61 #if _MIPS_SIM == _ABIO32
62 typedef unsigned long reg_t;
63 typedef struct
64 {
65   reg_t B0:8, B1:8, B2:8, B3:8;
66 } bits_t;
67 #else
68 typedef unsigned long long reg_t;
69 typedef struct
70 {
71   reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
72 } bits_t;
73 #endif
74 
75 typedef union
76 {
77   reg_t v;
78   bits_t b;
79 } bitfields_t;
80 
81 #define DO_BYTE(a, i)   \
82   a[i] = bw.b.B##i;     \
83   len--;                \
84   if(!len) return ret;  \
85 
86 /* This code is called when aligning a pointer, there are remaining bytes
87    after doing word compares, or architecture does not have some form
88    of unaligned support.  */
89 static inline void * __attribute__ ((always_inline))
do_bytes(void * a,const void * b,unsigned long len,void * ret)90 do_bytes (void *a, const void *b, unsigned long len, void *ret)
91 {
92   unsigned char *x = (unsigned char *) a;
93   unsigned char *y = (unsigned char *) b;
94   unsigned long i;
95 
96   /* 'len' might be zero here, so preloading the first two values
97      before the loop may access unallocated memory.  */
98   for (i = 0; i < len; i++)
99   {
100     *x = *y;
101     x++;
102     y++;
103   }
104   return ret;
105 }
106 
107 static inline void * __attribute__ ((always_inline))
do_bytes_backward(void * a,const void * b,unsigned long len,void * ret)108 do_bytes_backward (void *a, const void *b, unsigned long len, void *ret)
109 {
110   unsigned char *x = (unsigned char *) a;
111   unsigned char *y = (unsigned char *) b;
112   unsigned long i;
113 
114   /* 'len' might be zero here, so preloading the first two values
115      before the loop may access unallocated memory.  */
116   for (i = 0; i < len; i++) {
117     *--x = *--y;
118   }
119   return ret;
120 }
121 
122 static inline void * __attribute__ ((always_inline))
do_bytes_aligned(void * a,const void * b,unsigned long len,void * ret)123 do_bytes_aligned (void *a, const void *b, unsigned long len, void *ret)
124 {
125   unsigned char *x = (unsigned char *) a;
126 
127   if(len > 0) {
128     bitfields_t bw;
129     bw.v = *((reg_t*) b);
130 
131 #if __mips64
132     DO_BYTE(x, 0);
133     DO_BYTE(x, 1);
134     DO_BYTE(x, 2);
135     DO_BYTE(x, 3);
136     DO_BYTE(x, 4);
137     DO_BYTE(x, 5);
138     DO_BYTE(x, 6);
139     DO_BYTE(x, 7);
140 #else
141     DO_BYTE(x, 0);
142     DO_BYTE(x, 1);
143     DO_BYTE(x, 2);
144     DO_BYTE(x, 3);
145 #endif
146   }
147 
148   return ret;
149 }
150 
151 #if !HW_UNALIGNED_SUPPORT
152 #if UNALIGNED_INSTR_SUPPORT
153 /* for MIPS GCC, there are no unaligned builtins - so this struct forces
154    the compiler to treat the pointer access as unaligned.  */
155 struct ulw
156 {
157   reg_t uli;
158 } __attribute__ ((packed));
159 
160 #define STORE_UNALIGNED_8(a, b)                      \
161 {                                                    \
162   reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];  \
163   reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];  \
164   a[0].uli = y0;                                     \
165   a[1].uli = y1;                                     \
166   a[2].uli = y2;                                     \
167   a[3].uli = y3;                                     \
168   a[4].uli = y4;                                     \
169   a[5].uli = y5;                                     \
170   a[6].uli = y6;                                     \
171   a[7].uli = y7;                                     \
172 }
173 
174 #define STORE_UNALIGNED_4(a, b)                      \
175 {                                                    \
176   reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];  \
177   a[0].uli = y0;                                     \
178   a[1].uli = y1;                                     \
179   a[2].uli = y2;                                     \
180   a[3].uli = y3;                                     \
181 }
182 
183 /* first pointer is not aligned while second pointer is.  */
184 static void *
unaligned_words_forward(struct ulw * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)185 unaligned_words_forward (struct ulw *a, const reg_t * b,
186                          unsigned long words, unsigned long bytes, void *ret)
187 {
188 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
189   unsigned long i, words_by_8, words_by_1;
190   words_by_1 = words % 8;
191   words_by_8 = words >> 3;
192   for (; words_by_8 > 0; words_by_8--) {
193     if(words_by_8 != 1)
194       PREFETCH (b + 8);
195     STORE_UNALIGNED_8(a, b);
196     a += 8;
197     b += 8;
198   }
199 #else
200   unsigned long i, words_by_4, words_by_1;
201   words_by_1 = words % 4;
202   words_by_4 = words >> 2;
203   for (; words_by_4 > 0; words_by_4--) {
204     if(words_by_4 != 1)
205       PREFETCH (b + 4);
206     STORE_UNALIGNED_4(a, b);
207     a += 4;
208     b += 4;
209   }
210 #endif
211 
212   /* do remaining words.  */
213   for (i = 0; i < words_by_1; i++) {
214     a->uli = *b;
215     a += 1;
216     b += 1;
217   }
218 
219   /* mop up any remaining bytes.  */
220   return do_bytes_aligned (a, b, bytes, ret);
221 }
222 
223 static void *
unaligned_words_backward(struct ulw * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)224 unaligned_words_backward (struct ulw *a, const reg_t * b,
225                           unsigned long words, unsigned long bytes, void *ret)
226 {
227 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
228   unsigned long i, words_by_8, words_by_1;
229   words_by_1 = words % 8;
230   words_by_8 = words >> 3;
231   for (; words_by_8 > 0; words_by_8--) {
232     if(words_by_8 != 1)
233       PREFETCH (b - 16);
234     a -= 8;
235     b -= 8;
236     STORE_UNALIGNED_8(a, b);
237   }
238 #else
239   unsigned long i, words_by_4, words_by_1;
240   words_by_1 = words % 4;
241   words_by_4 = words >> 2;
242   for (; words_by_4 > 0; words_by_4--) {
243     if(words_by_4 != 1)
244       PREFETCH (b - 8);
245     a -= 4;
246     b -= 4;
247     STORE_UNALIGNED_4(a, b);
248   }
249 #endif
250 
251   /* do remaining words.  */
252   for (i = 0; i < words_by_1; i++) {
253     a -= 1;
254     b -= 1;
255     a->uli = *b;
256   }
257 
258   /* mop up any remaining bytes.  */
259   return do_bytes_backward (a, b, bytes, ret);
260 }
261 
262 #else
263 /* no HW support or unaligned lw/ld/ualw/uald instructions.  */
264 static void *
unaligned_words_forward(reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)265 unaligned_words_forward (reg_t * a, const reg_t * b,
266                          unsigned long words, unsigned long bytes, void *ret)
267 {
268   return do_bytes_aligned (a, b, (sizeof (reg_t) * words) + bytes, ret);
269 }
270 
271 static void *
unaligned_words_backward(reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)272 unaligned_words_backward (reg_t * a, const reg_t * b,
273                           unsigned long words, unsigned long bytes, void *ret)
274 {
275   return do_bytes_backward (a, b, (sizeof (reg_t) * words) + bytes, ret);
276 }
277 
278 #endif /* UNALIGNED_INSTR_SUPPORT */
279 #endif /* HW_UNALIGNED_SUPPORT */
280 
281 /* both pointers are aligned, or first isn't and HW support for unaligned.  */
282 
283 #define STORE_ALIGNED_8(a, b)                        \
284 {                                                    \
285   reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];  \
286   reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];  \
287   a[0] = x0;                                         \
288   a[1] = x1;                                         \
289   a[2] = x2;                                         \
290   a[3] = x3;                                         \
291   a[4] = x4;                                         \
292   a[5] = x5;                                         \
293   a[6] = x6;                                         \
294   a[7] = x7;                                         \
295 }
296 
297 #define STORE_ALIGNED_4(a, b)                        \
298 {                                                    \
299   reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];  \
300   a[0] = x0;                                         \
301   a[1] = x1;                                         \
302   a[2] = x2;                                         \
303   a[3] = x3;                                         \
304 }
305 
306 static void *
aligned_words_forward(reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)307 aligned_words_forward (reg_t * a, const reg_t * b,
308                        unsigned long words, unsigned long bytes, void *ret)
309 {
310 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
311   unsigned long i, words_by_8, words_by_1;
312   words_by_1 = words % 8;
313   words_by_8 = words >> 3;
314   for (; words_by_8 > 0; words_by_8--) {
315     if(words_by_8 != 1)
316       PREFETCH (b + 8);
317     STORE_ALIGNED_8(a, b);
318     a += 8;
319     b += 8;
320   }
321 #else
322   unsigned long i, words_by_4, words_by_1;
323   words_by_1 = words % 4;
324   words_by_4 = words >> 2;
325   for (; words_by_4 > 0; words_by_4--) {
326     if(words_by_4 != 1)
327       PREFETCH (b + 4);
328     STORE_ALIGNED_4(a, b);
329     a += 4;
330     b += 4;
331   }
332 #endif
333 
334   /* do remaining words.  */
335   for (i = 0; i < words_by_1; i++) {
336     *a = *b;
337     a += 1;
338     b += 1;
339   }
340 
341   /* mop up any remaining bytes.  */
342   return do_bytes_aligned (a, b, bytes, ret);
343 }
344 
345 
346 static void *
aligned_words_backward(reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes,void * ret)347 aligned_words_backward (reg_t * a, const reg_t * b,
348                         unsigned long words, unsigned long bytes, void *ret)
349 {
350 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
351   unsigned long i, words_by_8, words_by_1;
352   words_by_1 = words % 8;
353   words_by_8 = words >> 3;
354   for (; words_by_8 > 0; words_by_8--) {
355     if(words_by_8 != 1)
356       PREFETCH (b - 16);
357     a -= 8;
358     b -= 8;
359     STORE_ALIGNED_8(a, b);
360   }
361 #else
362   unsigned long i, words_by_4, words_by_1;
363   words_by_1 = words % 4;
364   words_by_4 = words >> 2;
365   for (; words_by_4 > 0; words_by_4--) {
366     if(words_by_4 != 1)
367       PREFETCH (b - 8);
368     a -= 4;
369     b -= 4;
370     STORE_ALIGNED_4(a, b);
371   }
372 #endif
373 
374   /* do remaining words.  */
375   for (i = 0; i < words_by_1; i++) {
376     a -= 1;
377     b -= 1;
378     *a = *b;
379   }
380 
381   /* mop up any remaining bytes.  */
382   return do_bytes_backward (a, b, bytes, ret);
383 }
384 
385 void *
memmove(void * dst0,const void * src0,size_t length)386 memmove (void *dst0, const void *src0, size_t length) __overloadable
387 {
388   unsigned long bytes, words;
389   void *ret = dst0;
390 
391   if (length == 0 || dst0 == src0)      /* nothing to do */
392     return dst0;
393 
394   if ((unsigned long)dst0 < (unsigned long)src0) {
395     /* Copy forwards. */
396     /* This shouldn't hit that often. */
397     if (length < sizeof (reg_t) * 4) {
398       return do_bytes (dst0, src0, length, ret);
399     }
400 
401     /* Align the second pointer to word/dword alignment.
402        Note that the pointer is only 32-bits for o32/n32 ABIs. For
403        n32, loads are done as 64-bit while address remains 32-bit.   */
404     bytes = ((unsigned long) src0) % sizeof (reg_t);
405     if (bytes) {
406       bytes = sizeof (reg_t) - bytes;
407       if (bytes > length)
408         bytes = length;
409       do_bytes (dst0, src0, bytes, ret);
410       if (length == bytes)
411         return ret;
412       length -= bytes;
413       dst0 = (void *) (((unsigned char *) dst0) + bytes);
414       src0 = (const void *) (((unsigned char *) src0) + bytes);
415     }
416 
417     /* Second pointer now aligned.  */
418     words = length / sizeof (reg_t);
419     bytes = length % sizeof (reg_t);
420 #if HW_UNALIGNED_SUPPORT
421     /* treat possible unaligned first pointer as aligned.  */
422     return aligned_words_forward (dst0, src0, words, bytes, ret);
423 #else
424     if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
425       return aligned_words_forward (dst0, src0, words, bytes, ret);
426     }
427     /* need to use unaligned instructions on first pointer.  */
428     return unaligned_words_forward (dst0, src0, words, bytes, ret);
429 #endif
430   } else {
431     /* Copy backwards. */
432     dst0 = (void *) (((unsigned char *) dst0) + length);
433     src0 = (const void *) (((unsigned char *) src0) + length);
434 
435     /* This shouldn't hit that often. */
436     if (length < sizeof (reg_t) * 4) {
437       return do_bytes_backward (dst0, src0, length, ret);
438     }
439 
440     /* Align the second pointer to word/dword alignment.
441        Note that the pointer is only 32-bits for o32/n32 ABIs. For
442        n32, loads are done as 64-bit while address remains 32-bit.   */
443     bytes = ((unsigned long) src0) % sizeof (reg_t);
444     if (bytes) {
445       if (bytes > length)
446         bytes = length;
447       do_bytes_backward (dst0, src0, bytes, ret);
448       if (length == bytes)
449         return ret;
450       length -= bytes;
451       dst0 = (void *) (((unsigned char *) dst0) - bytes);
452       src0 = (const void *) (((unsigned char *) src0) - bytes);
453     }
454 
455     words = length / sizeof (reg_t);
456     bytes = length % sizeof (reg_t);
457 #if HW_UNALIGNED_SUPPORT
458     /* treat possible unaligned first pointer as aligned.  */
459     return aligned_words_backward ((void *)dst0, (void *)src0, words, bytes, ret);
460 #else
461     if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
462       return aligned_words_backward (dst0, src0, words, bytes, ret);
463     }
464     /* need to use unaligned instructions on first pointer.  */
465     return unaligned_words_backward (dst0, src0, words, bytes, ret);
466 #endif
467   }
468 }
469