1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "vpx_config.h"
13 #include "vp8_rtcd.h"
14 #include "vpx/vpx_integer.h"
15
16 #if HAVE_DSPR2
prefetch_load_int(unsigned char * src)17 inline void prefetch_load_int(unsigned char *src)
18 {
19 __asm__ __volatile__ (
20 "pref 0, 0(%[src]) \n\t"
21 :
22 : [src] "r" (src)
23 );
24 }
25
26
vp8_copy_mem16x16_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)27 __inline void vp8_copy_mem16x16_dspr2(
28 unsigned char *RESTRICT src,
29 int src_stride,
30 unsigned char *RESTRICT dst,
31 int dst_stride)
32 {
33 int r;
34 unsigned int a0, a1, a2, a3;
35
36 for (r = 16; r--;)
37 {
38 /* load src data in cache memory */
39 prefetch_load_int(src + src_stride);
40
41 /* use unaligned memory load and store */
42 __asm__ __volatile__ (
43 "ulw %[a0], 0(%[src]) \n\t"
44 "ulw %[a1], 4(%[src]) \n\t"
45 "ulw %[a2], 8(%[src]) \n\t"
46 "ulw %[a3], 12(%[src]) \n\t"
47 "sw %[a0], 0(%[dst]) \n\t"
48 "sw %[a1], 4(%[dst]) \n\t"
49 "sw %[a2], 8(%[dst]) \n\t"
50 "sw %[a3], 12(%[dst]) \n\t"
51 : [a0] "=&r" (a0), [a1] "=&r" (a1),
52 [a2] "=&r" (a2), [a3] "=&r" (a3)
53 : [src] "r" (src), [dst] "r" (dst)
54 );
55
56 src += src_stride;
57 dst += dst_stride;
58 }
59 }
60
61
vp8_copy_mem8x8_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)62 __inline void vp8_copy_mem8x8_dspr2(
63 unsigned char *RESTRICT src,
64 int src_stride,
65 unsigned char *RESTRICT dst,
66 int dst_stride)
67 {
68 int r;
69 unsigned int a0, a1;
70
71 /* load src data in cache memory */
72 prefetch_load_int(src + src_stride);
73
74 for (r = 8; r--;)
75 {
76 /* use unaligned memory load and store */
77 __asm__ __volatile__ (
78 "ulw %[a0], 0(%[src]) \n\t"
79 "ulw %[a1], 4(%[src]) \n\t"
80 "sw %[a0], 0(%[dst]) \n\t"
81 "sw %[a1], 4(%[dst]) \n\t"
82 : [a0] "=&r" (a0), [a1] "=&r" (a1)
83 : [src] "r" (src), [dst] "r" (dst)
84 );
85
86 src += src_stride;
87 dst += dst_stride;
88 }
89 }
90
91
vp8_copy_mem8x4_dspr2(unsigned char * RESTRICT src,int src_stride,unsigned char * RESTRICT dst,int dst_stride)92 __inline void vp8_copy_mem8x4_dspr2(
93 unsigned char *RESTRICT src,
94 int src_stride,
95 unsigned char *RESTRICT dst,
96 int dst_stride)
97 {
98 int r;
99 unsigned int a0, a1;
100
101 /* load src data in cache memory */
102 prefetch_load_int(src + src_stride);
103
104 for (r = 4; r--;)
105 {
106 /* use unaligned memory load and store */
107 __asm__ __volatile__ (
108 "ulw %[a0], 0(%[src]) \n\t"
109 "ulw %[a1], 4(%[src]) \n\t"
110 "sw %[a0], 0(%[dst]) \n\t"
111 "sw %[a1], 4(%[dst]) \n\t"
112 : [a0] "=&r" (a0), [a1] "=&r" (a1)
113 : [src] "r" (src), [dst] "r" (dst)
114 );
115
116 src += src_stride;
117 dst += dst_stride;
118 }
119 }
120
121 #endif
122