• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Intel SIMD MMX implementation of Viterbi ACS butterflies
2    for 64-state (k=7) convolutional code
3    Copyright 2004 Phil Karn, KA9Q
4    This code may be used under the terms of the GNU Lesser General Public License (LGPL)
5 
6    int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
7 */
8 	# MMX (64-bit SIMD) version
9 	# requires Pentium-MMX, Pentium-II or better
10 
11 	# These are offsets into struct v27, defined in viterbi27_mmx.c
12 	.set DP,128
13 	.set OLDMETRICS,132
14 	.set NEWMETRICS,136
15 	.text
16 	.global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
17 	.type update_viterbi27_blk_mmx,@function
18 	.align 16
19 
20 update_viterbi27_blk_mmx:
21 	pushl %ebp
22 	movl %esp,%ebp
23 	pushl %esi
24 	pushl %edi
25 	pushl %edx
26 	pushl %ebx
27 
28 	movl 8(%ebp),%edx	# edx = vp
29 	testl %edx,%edx
30 	jnz  0f
31 	movl -1,%eax
32 	jmp  err
33 0:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics
34 	movl NEWMETRICS(%edx),%edi	# edi -> new metrics
35 	movl DP(%edx),%edx	# edx -> decisions
36 
37 1:	movl 16(%ebp),%eax	# eax = nbits
38 	decl %eax
39 	jl   2f			# passed zero, we're done
40 	movl %eax,16(%ebp)
41 
42 	movl 12(%ebp),%ebx	# ebx = syms
43 	movw (%ebx),%ax		# ax = second symbol : first symbol
44 	addl $2,%ebx
45 	movl %ebx,12(%ebp)
46 
47 	movb %ah,%bl
48 	andl $255,%eax
49 	andl $255,%ebx
50 
51 	# shift into first array index dimension slot
52 	shll $5,%eax
53 	shll $5,%ebx
54 
55 	# each invocation of this macro will do 8 butterflies in parallel
56 	.MACRO butterfly GROUP
57 	# Compute branch metrics
58 	movq (Mettab27_1+8*\GROUP)(%eax),%mm3
59 	movq fifteens,%mm0
60 
61 	paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
62 	paddb ones,%mm3  # emulate pavgb - this may not be necessary
63 	psrlq $1,%mm3
64 	pand %mm0,%mm3
65 
66 	movq (8*\GROUP)(%esi),%mm6	# Incoming path metric, high bit = 0
67 	movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
68 	movq %mm6,%mm1
69 	movq %mm2,%mm7
70 
71 	paddb %mm3,%mm6
72 	paddb %mm3,%mm2
73 	pxor  %mm0,%mm3		 # invert branch metric
74 	paddb %mm3,%mm7		 # path metric for inverted symbols
75 	paddb %mm3,%mm1
76 
77 	# live registers 1 2 6 7
78 	# Compare mm6 and mm7;  mm1 and mm2
79 	pxor %mm3,%mm3
80 	movq %mm6,%mm4
81 	movq %mm1,%mm5
82 	psubb %mm7,%mm4		# mm4 = mm6 - mm7
83 	psubb %mm2,%mm5		# mm5 = mm1 - mm2
84 	pcmpgtb %mm3,%mm4	# mm4 = first set of decisions (ff = 1 better)
85 	pcmpgtb %mm3,%mm5	# mm5 = second set of decisions
86 
87 	# live registers 1 2 4 5 6 7
88 	# select survivors
89 	movq %mm4,%mm0
90 	pand %mm4,%mm7
91 	movq %mm5,%mm3
92 	pand %mm5,%mm2
93 	pandn %mm6,%mm0
94 	pandn %mm1,%mm3
95 	por %mm0,%mm7		# mm7 = first set of survivors
96 	por %mm3,%mm2		# mm2 = second set of survivors
97 
98 	# live registers 2 4 5 7
99 	# interleave & store decisions in mm4, mm5
100 	# interleave & store new branch metrics in mm2, mm7
101 	movq %mm4,%mm3
102 	movq %mm7,%mm0
103 	punpckhbw %mm5,%mm4
104 	punpcklbw %mm5,%mm3
105 	punpcklbw %mm2,%mm7	# interleave second 8 new metrics
106 	punpckhbw %mm2,%mm0	# interleave first 8 new metrics
107 	movq %mm4,(16*\GROUP+8)(%edx)
108 	movq %mm3,(16*\GROUP)(%edx)
109 	movq %mm7,(16*\GROUP)(%edi)
110 	movq %mm0,(16*\GROUP+8)(%edi)
111 
112 	.endm
113 
114 # invoke macro 4 times for a total of 32 butterflies
115 	butterfly GROUP=0
116 	butterfly GROUP=1
117 	butterfly GROUP=2
118 	butterfly GROUP=3
119 
120 	addl $64,%edx		# bump decision pointer
121 
122 	# swap metrics
123 	movl %esi,%eax
124 	movl %edi,%esi
125 	movl %eax,%edi
126 	jmp 1b
127 
128 2:	emms
129 	movl 8(%ebp),%ebx	# ebx = vp
130 	# stash metric pointers
131 	movl %esi,OLDMETRICS(%ebx)
132 	movl %edi,NEWMETRICS(%ebx)
133 	movl %edx,DP(%ebx)	# stash incremented value of vp->dp
134 	xorl %eax,%eax
135 err:	popl %ebx
136 	popl %edx
137 	popl %edi
138 	popl %esi
139 	popl %ebp
140 	ret
141 
142 	.data
143 	.align 8
144 fifteens:
145 	.byte 15,15,15,15,15,15,15,15
146 
147 	.align 8
148 ones:	.byte 1,1,1,1,1,1,1,1
149