1#!/bin/bash
2#
3# Test various instructions to check whether half<->full widening/narrowing
4# works.  The basic premise is to perform the same instruction with and
5# without the widening/narrowing folded in and check if the results match.
6#
7# Note this doesn't currently diferentiate between signed/unsigned/bool,
8# and just assumes int is signed (since unsigned is basically(ish) like
9# signed but without sign extension)
10#
11# TODO probably good pick numeric src values that are better at triggering
12# edge cases, while still not loosing precision in a full->half->full
13# seqeuence.. but some instructions like absneg don't even appear to be
14# subtlely wrong when you try to fold in a precision conversion.
15#
16# add '-v' arg to see the result values
17
18set -e
19
20#
21# Templates for float->float instructions:
22#
23f2f_instrs=(
24	'add.f $dst, $src1, $src2'
25	'min.f $dst, $src1, $src2'
26	'min.f $dst, $src2, $src1'
27	'max.f $dst, $src1, $src2'
28	'max.f $dst, $src2, $src1'
29	'mul.f $dst, $src1, $src2'
30	'sign.f $dst, $src1'
31	'absneg.f $dst, \(neg\)$src1'
32	'absneg.f $dst, \(abs\)$src1'
33	'floor.f $dst, $src1'
34	'ceil.f $dst, $src1'
35	'rndne.f $dst, $src1'
36	'rndaz.f $dst, $src1'
37	'trunc.f $dst, $src1'
38)
39
40#
41# Templates for float->int instructions:
42#
43f2i_instrs=(
44	'cmps.f.gt $dst, $src1, $src2'
45	'cmps.f.lt $dst, $src1, $src2'
46	'cmpv.f.gt $dst, $src1, $src2'
47	'cmpv.f.lt $dst, $src1, $src2'
48)
49
50#
51# Templates for int->int instructions:
52#
53i2i_instrs=(
54	'add.u $dst, $src1, $src2'
55	'add.s $dst, $src1, $src2'
56	'sub.u $dst, $src1, $src2'
57	'sub.s $dst, $src1, $src2'
58	'cmps.f.gt $dst, $src1, $src2'
59	'cmps.f.lt $dst, $src1, $src2'
60	'min.u $dst, $src1, $src2'
61	'min.u $dst, $src2, $src1'
62	'min.s $dst, $src1, $src2'
63	'min.s $dst, $src2, $src1'
64	'max.u $dst, $src1, $src2'
65	'max.u $dst, $src2, $src1'
66	'max.s $dst, $src1, $src2'
67	'max.s $dst, $src2, $src1'
68	'absneg.s $dst, \(neg\)$src1'
69	'absneg.s $dst, \(abs\)$src1'
70	'and.b $dst, $src2, $src3'
71	'or.b $dst, $src1, $src2'
72	'not.b $dst, $src1'
73	'xor.b $dst, $src1, $src2'
74	'cmpv.u.gt $dst, $src1, $src2'
75	'cmpv.u.lt $dst, $src1, $src2'
76	'cmpv.s.gt $dst, $src1, $src2'
77	'cmpv.s.lt $dst, $src1, $src2'
78	'mul.u24 $dst, $src1, $src2'
79	'mul.s24 $dst, $src1, $src2'
80	'mull.u $dst, $src1, $src2'
81	'bfrev.b $dst, $src1'
82	'clz.s $dst, $src2'
83	'clz.b $dst, $src2'
84	'shl.b $dst, $src1, $src2'
85	'shr.b $dst, $src3, $src1'
86	'ashr.b $dst, $src3, $src1'
87	'mgen.b $dst, $src1, $src2'
88	'getbit.b $dst, $src3, $src2'
89	'setrm $dst, $src1'
90	'cbits.b $dst, $src3'
91	'shb $dst, $src1, $src2'
92	'msad $dst, $src1, $src2'
93)
94
95#
96# Helper to expand instruction template:
97#
98expand() {
99	instr=$1
100	dst=$2
101	src1=$3
102	src2=$4
103	src3=$5
104	eval echo $instr
105}
106
107expand_test() {
108	instr=$1
109
110	echo '; control, half->half:'
111	expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z"
112	echo '; test, full->half:'
113	expand $instr "hr1.y" "r1.x" "r1.y" "r1.z"
114
115	echo '; control, full->full:'
116	expand $instr "r2.x" "r1.x" "r1.y" "r1.z"
117	echo '; test, half->full:'
118	expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z"
119
120	echo "(rpt5)nop"
121}
122
123#
124# Helpers to construct test program assembly:
125#
126header_asm() {
127	cat <<EOF
128@localsize 1, 1, 1
129@buf 4  ; g[0]
130EOF
131}
132
133footer_asm() {
134	cat <<EOF
135; dest offsets:
136mov.u32u32 r3.x, 0
137mov.u32u32 r3.y, 1
138mov.u32u32 r3.z, 2
139mov.u32u32 r3.w, 3
140(rpt5)nop
141
142; and store results:
143stib.untyped.1d.u32.1 g[0] + r3.x, r2.x   ; control: full->full
144stib.untyped.1d.u32.1 g[0] + r3.y, r2.y   ; test:    half->full
145stib.untyped.1d.u32.1 g[0] + r3.z, r2.z   ; control: half->half
146stib.untyped.1d.u32.1 g[0] + r3.w, r2.w   ; test:    full->half
147(sy)nop
148end
149EOF
150}
151
152setup_asm_float() {
153	cat <<EOF
154; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
155; r1->r2 avail for full, r1 for src, r2 for dst
156cov.f32f16 hr0.x, (1.0)
157cov.f32f16 hr0.y, (2.0)
158cov.f32f16 hr0.z, (3.0)
159mov.f32f32 r1.x,  (1.0)
160mov.f32f32 r1.y,  (2.0)
161mov.f32f32 r1.z,  (3.0)
162(rpt5)nop
163EOF
164}
165
166setup_asm_int() {
167	cat <<EOF
168; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
169; r1->r2 avail for full, r1 for src, r2 for dst
170cov.s32s16 hr0.x,  1
171cov.s32s16 hr0.y, -2
172cov.s32s16 hr0.z,  3
173mov.s32s32 r1.x,   1
174mov.s32s32 r1.y,  -2
175mov.s32s32 r1.z,   3
176(rpt5)nop
177EOF
178}
179
180#
181# Generate assembly code to test float->float opcode
182#
183f2f_asm() {
184	instr=$1
185
186	header_asm
187	setup_asm_float
188	expand_test $instr
189
190	cat <<EOF
191; convert half results back to full:
192cov.f16f32 r2.z, hr1.x
193cov.f16f32 r2.w, hr1.y
194EOF
195
196	footer_asm
197}
198
199#
200# Generate assembly code to test float->int opcode
201#
202f2i_asm() {
203	instr=$1
204
205	header_asm
206	setup_asm_float
207	expand_test $instr
208
209	cat <<EOF
210; convert half results back to full:
211cov.s16s32 r2.z, hr1.x
212cov.s16s32 r2.w, hr1.y
213EOF
214
215	footer_asm
216}
217
218#
219# Generate assembly code to test int->int opcode
220#
221i2i_asm() {
222	instr=$1
223
224	header_asm
225	setup_asm_int
226	expand_test $instr
227
228	cat <<EOF
229; convert half results back to full:
230cov.s16s32 r2.z, hr1.x
231cov.s16s32 r2.w, hr1.y
232EOF
233
234	footer_asm
235}
236
237
238#
239# Helper to parse computerator output and print results:
240#
241check_results() {
242	str=`cat - | grep "	" | head -1 | xargs`
243
244	if [ "$verbose" = "true" ]; then
245		echo $str
246	fi
247
248	# Split components of result buffer:
249	cf=$(echo $str | cut -f1 -d' ')
250	tf=$(echo $str | cut -f2 -d' ')
251	ch=$(echo $str | cut -f3 -d' ')
252	th=$(echo $str | cut -f4 -d' ')
253
254	# Sanity test, make sure the control results match:
255	if [ $cf != $ch ]; then
256		echo "    FAIL: control results do not match!  Half vs full op is not equivalent!"
257		echo "    full=$cf half=$ch"
258	fi
259
260	# Compare test (with conversion folded) to control:
261	if [ $cf != $tf ]; then
262		echo "    FAIL: half -> full widening result does not match control!"
263		echo "    control=$cf result=$tf"
264	fi
265	if [ $ch != $th ]; then
266		echo "    FAIL: full -> half narrowing result does not match control!"
267		echo "    control=$ch result=$th"
268	fi
269
270	# HACK without a delay different invocations
271	# of computerator seem to somehow clobber each
272	# other.. which isn't great..
273	sleep 0.1
274}
275
276#
277# Run the tests!
278#
279
280if [ "$1" = "-v" ]; then
281	verbose="true"
282fi
283
284IFS=""
285for instr in ${f2f_instrs[@]}; do
286	echo "TEST: $instr"
287	f2f_asm $instr | ./computerator -g 1,1,1 | check_results
288done
289for instr in ${f2i_instrs[@]}; do
290	echo "TEST: $instr"
291	f2i_asm $instr | ./computerator -g 1,1,1 | check_results
292done
293for instr in ${i2i_instrs[@]}; do
294	echo "TEST: $instr"
295	i2i_asm $instr | ./computerator -g 1,1,1 | check_results
296done
297
298