1# D30V parallel optimization test
2# assemble with "-O"
3
4	.text
5start:
6	abs	r1,r2
7	abs	r3,r4
8
9	notfg	f0,f4
10	notfg	f1,f2
11
12	abs	r1,r2
13	notfg	f1,f2
14
15# both change C flag
16	add	r1,r2,r3
17	notfg	C,f0
18
19# one uses and one changes C flag
20	add	r1,r2,r3
21	notfg	f0,C
22
23	bra	.
24	abs	r1,r2
25
26	abs	r1,r2
27	bra	.
28
29	bsr	.
30	abs	r1,r2
31
32	abs	r1,r2
33	abs	r1,r2
34	bsr	.
35
36	ldb	r1,@(r2,r3)
37	stb	r7,@(r8,r9)
38
39	stb	r7,@(r8,r9)
40	ldb	r1,@(r2,r3)
41
42	ldb	r7,@(r8,r9)
43	ldb	r1,@(r2,r3)
44
45	stb	r7,@(r8,r9)
46	stb	r1,@(r2,r3)
47
48	add     r3, r3, r6
49	stw     r2, @(r3, 0)
50
51# should be serial because of conditional execution
52        cmple   f0,r4,r5
53        jmp/tx  0x0
54
55        cmple   f0,r4,r5
56        jmp/fx  0x0
57
58        cmple   f0,r4,r5
59        jmp/xt  0x0
60
61        cmple   f0,r4,r5
62        jmp/xf  0x0
63
64        cmple   f0,r4,r5
65        jmp/tt  0x0
66
67        cmple   f0,r4,r5
68        jmp/tf  0x0
69
70        cmple   f1,r4,r5
71        jmp/tx  0x0
72
73        cmple   f1,r4,r5
74        jmp/xt  0x0
75
76	# serial because of the r4 dependency
77	add	r4, r0, 1
78	cmple	f0, r4, r5
79
80	# parallel
81	add	r4, r0, 1
82	cmple	f0, r3, r5
83
84	# serial because ld2w loads r5
85	ld2w	r4,@(r0,r6)
86	adds	r5,r19,r20
87
88	# serial because ld2w loads r5
89	ld2w	r4,@(r0,r6)
90	adds	r3,r5,r20
91
92	# parallel even though ld2w uses r6 and adds changes it
93	ld2w	r4,@(r0,r6)
94	adds	r6,r19,r20
95
96	# parallel
97	ld2w	r4,@(r0,r6)
98	adds	r7,r19,r20
99
100	# parallel
101	ld2w	r4,@(r0,r6)
102	adds	r7,r0,r20
103
104	# parallel even though st2w uses r5 and adds modifies it
105	st2w	r4,@(r0,r6)
106	adds	r5,r19,r20
107
108	# parallel, both use but don't modify r5
109	st2w	r4,@(r0,r6)
110	adds	r3,r5,r20
111
112	# parallel even though st2w uses r6 and adds changes it
113	st2w	r4,@(r0,r6)
114	adds	r6,r19,r20
115
116	# parallel
117	st2w	r4,@(r0,r6)
118	adds	r7,r19,r20
119
120	# parallel
121	st2w	r4,@(r0,r6)
122	adds	r7,r0,r20
123
124# test memory dependencies
125
126	# always serial because one could overwrite the other
127	st2w	r10,@(r3,r4)
128	st2w	r40,@(r43,r44)
129
130	# always serial
131	stw	r1,@(r2,r3)
132	ldw	r41,@(r42,r43)
133
134	# reads can happen in parallel but the current architecture
135	# doesn't support it
136	ldw	r1,@(r2,r3)
137	ldb	r41,@(r42,r43)
138
139# test post increment and decrement dependencies
140
141	# serial
142	ldw	r4,@(r6+,r11)
143	adds	r9,r6,2
144
145	# parallel, modification to r6 happens last
146	adds	r9,r6,2
147	ldw	r4,@(r6-,r11)
148
149	# serial
150	stw	r4,@(r6-,r11)
151	adds	r9,r6,2
152
153	# parallel
154	ldw	r4,@(r6,r11)
155	adds	r9,r6,2
156
157	# parallel
158	adds	r9,r6,2
159	ldw	r4,@(r6,r11)
160
161# if the first instruction is a jmp, don't parallelize
162	jmp	0
163	abs	r1,r2
164
165	jsr	0
166	abs	r1,r2
167
168	.align	3
169
170	bra	0
171	abs	r1,r2
172
173	bsr	0
174	abs	r1,r2
175
176# Explicitly prohibited from parallel execution.
177# The labels are here to prevent instruction pairs
178#  from being merged with following pairs.
179
180label1:
181	st2w     r2, @(r2, r3)
182	addhlll  r4, r5, r6
183label2:
184	st4hb    r8, @(r8, r9)
185	subhllh  r10, r11, r12
186label3:
187	ld2w     r14, @(r14, r15)
188	mulhxhl  r16, r17, r18
189label4:
190	ldw      r19, @(r20, r21)
191	mulx2h   r22, r23, r24
192label5:
193	ldh      r25, @(r26, r27)
194	mul2h    r28, r29, r30
195
196# Insertion of NOPs required to prevent pipeline clashes.
197
198label6:
199	mul r1,r2,r3
200	mulhxll r4,r5,r6
201        add r7, r8, r9
202label7:
203
204        mul  r2,r3,r4
205        ldw  r5, @(r6,r0)
206
207        ldw  r10, @(r11, r0) <- mul r7,r8,r9
208
209        mul  r12,r13,r14 -> ldw r15, @(r16, r0)
210
211        mac1 r2,r3,r4
212        ldw  r5, @(r6,r0)
213
214        ldw  r10, @(r11, r0) <- mac0 r7,r8,r9
215        ldw  r10, @(r11, r0)
216
217