1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vpx_config.h"
13 #include "vp8_rtcd.h"
14 #include "vpx/vpx_integer.h"
15 
16 #if HAVE_DSPR2
prefetch_load_int(unsigned char * src)17 inline void prefetch_load_int(unsigned char *src)
18 {
19     __asm__ __volatile__ (
20         "pref   0,  0(%[src])   \n\t"
21         :
22         : [src] "r" (src)
23     );
24 }
25 
26 
vp8_copy_mem16x16_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)27 __inline void vp8_copy_mem16x16_dspr2(
28     unsigned char *RESTRICT src,
29     int src_stride,
30     unsigned char *RESTRICT dst,
31     int dst_stride)
32 {
33     int r;
34     unsigned int a0, a1, a2, a3;
35 
36     for (r = 16; r--;)
37     {
38         /* load src data in cache memory */
39         prefetch_load_int(src + src_stride);
40 
41         /* use unaligned memory load and store */
42         __asm__ __volatile__ (
43             "ulw    %[a0], 0(%[src])            \n\t"
44             "ulw    %[a1], 4(%[src])            \n\t"
45             "ulw    %[a2], 8(%[src])            \n\t"
46             "ulw    %[a3], 12(%[src])           \n\t"
47             "sw     %[a0], 0(%[dst])            \n\t"
48             "sw     %[a1], 4(%[dst])            \n\t"
49             "sw     %[a2], 8(%[dst])            \n\t"
50             "sw     %[a3], 12(%[dst])           \n\t"
51             : [a0] "=&r" (a0), [a1] "=&r" (a1),
52               [a2] "=&r" (a2), [a3] "=&r" (a3)
53             : [src] "r" (src), [dst] "r" (dst)
54         );
55 
56         src += src_stride;
57         dst += dst_stride;
58     }
59 }
60 
61 
vp8_copy_mem8x8_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)62 __inline void vp8_copy_mem8x8_dspr2(
63     unsigned char *RESTRICT src,
64     int src_stride,
65     unsigned char *RESTRICT dst,
66     int dst_stride)
67 {
68     int r;
69     unsigned int a0, a1;
70 
71     /* load src data in cache memory */
72     prefetch_load_int(src + src_stride);
73 
74     for (r = 8; r--;)
75     {
76         /* use unaligned memory load and store */
77         __asm__ __volatile__ (
78             "ulw    %[a0], 0(%[src])            \n\t"
79             "ulw    %[a1], 4(%[src])            \n\t"
80             "sw     %[a0], 0(%[dst])            \n\t"
81             "sw     %[a1], 4(%[dst])            \n\t"
82             : [a0] "=&r" (a0), [a1] "=&r" (a1)
83             : [src] "r" (src), [dst] "r" (dst)
84         );
85 
86         src += src_stride;
87         dst += dst_stride;
88     }
89 }
90 
91 
vp8_copy_mem8x4_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)92 __inline void vp8_copy_mem8x4_dspr2(
93     unsigned char *RESTRICT src,
94     int src_stride,
95     unsigned char *RESTRICT dst,
96     int dst_stride)
97 {
98     int r;
99     unsigned int a0, a1;
100 
101     /* load src data in cache memory */
102     prefetch_load_int(src + src_stride);
103 
104     for (r = 4; r--;)
105     {
106         /* use unaligned memory load and store */
107         __asm__ __volatile__ (
108             "ulw    %[a0], 0(%[src])            \n\t"
109             "ulw    %[a1], 4(%[src])            \n\t"
110             "sw     %[a0], 0(%[dst])            \n\t"
111             "sw     %[a1], 4(%[dst])            \n\t"
112            : [a0] "=&r" (a0), [a1] "=&r" (a1)
113            : [src] "r" (src), [dst] "r" (dst)
114         );
115 
116         src += src_stride;
117         dst += dst_stride;
118     }
119 }
120 
121 #endif
122