neon.ll - OpenGrok cross reference for /frameworks/rs/driver/runtime/arch/neon.ll

Lines Matching refs:float
8 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
9 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
17 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
18 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
38 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
39 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
41 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
42 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
44 declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
45 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
47 declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
48 declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
54 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
55   %1 = insertelement <4 x float> undef, float %in, i32 0
56   %2 = insertelement <4 x float> %1, float %in, i32 1
57   %3 = insertelement <4 x float> %2, float %in, i32 2
58   %4 = insertelement <4 x float> %3, float %in, i32 3
59   ret <4 x float> %4
80 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
81   %1 = insertelement <2 x float> undef, float %in, i32 0
82   %2 = insertelement <2 x float> %1, float %in, i32 1
83   ret <2 x float> %2
112 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) noun…
113 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounw…
114 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind r…
115   ret <4 x float> %2
118 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
119   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
120   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
121 …%out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float>…
122   ret <4 x float> %out
125 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) noun…
126 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
127   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
128 …%_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
129 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
130 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
131   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
132   ret <3 x float> %c
135 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
136 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
137   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
138   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
139 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
140 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
141   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
142   ret <3 x float> %c
145 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) noun…
146 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounw…
147 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind r…
148   ret <2 x float> %2
151 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
152   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
153   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
154 …%a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) noun…
155 …%b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind …
156   ret <2 x float> %b
159 define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
160   %1 = fcmp olt float %value, %high
161   %2 = select i1 %1, float %value, float %high
162   %3 = fcmp ogt float %2, %low
163   %4 = select i1 %3, float %2, float %low
164   ret float %4
272 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
273 …%1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
274   ret <4 x float> %1
277 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
278   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
279 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
280   ret <4 x float> %2
283 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
284   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
285   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
286 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
287   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
288   ret <3 x float> %4
291 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
292   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
294 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
295   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
296   ret <3 x float> %c
299 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
300 …%1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
301   ret <2 x float> %1
304 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
305   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
306 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
307   ret <2 x float> %2
310 define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
311   %1 = fcmp ogt float %v1, %v2
312   %2 = select i1 %1, float %v1, float %v2
313   ret float %2
321 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
322 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
323   ret <4 x float> %1
326 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
327   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
328 …%2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
329   ret <4 x float> %2
332 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
333   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
334   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
335 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
336   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
337   ret <3 x float> %4
340 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
341   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
342   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
343 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
344   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
345   ret <3 x float> %c
348 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
349 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
350   ret <2 x float> %1
353 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
354   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
355 …%2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
356   ret <2 x float> %2
359 define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
360   %1 = fcmp olt float %v1, %v2
361   %2 = select i1 %1, float %v1, float %v2
362   ret float %2
561 define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
562   %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
563   ret float %1
566 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
567   %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
568   ret <2 x float> %1
571 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
572   %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
573   ret <2 x float> %1
576 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
577   %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
578   ret <3 x float> %1
581 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
582   %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
583   ret <3 x float> %1
586 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
587   %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
588   ret <4 x float> %1
591 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
592   %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
593   ret <4 x float> %1
792 define float @_Z3minff(float %v1, float %v2) nounwind readnone {
793   %1 = tail call float @_Z4fminff(float %v1, float %v2)
794   ret float %1
797 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
798   %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
799   ret <2 x float> %1
802 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
803   %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
804   ret <2 x float> %1
807 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
808   %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
809   ret <3 x float> %1
812 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
813   %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
814   ret <3 x float> %1
817 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
818   %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
819   ret <4 x float> %1
822 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
823   %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
824   ret <4 x float> %1
875 define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone {
876   %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
877 …%2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind re…
878   %3 = fmul <2 x float> %1, %2
879 …%4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind re…
880   %5 = fmul <2 x float> %4, %3
881   ret <2 x float> %5
884 define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone {
885   %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
886 …%2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind re…
887   %3 = fmul <4 x float> %1, %2
888 …%4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind re…
889   %5 = fmul <4 x float> %4, %3
890   ret <4 x float> %5
893 define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone {
894   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
895   %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone
896   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
897   ret <3 x float> %3
905 define float @_Z10half_rsqrtf(float %v) {
906   %1 = insertelement <2 x float> undef, float %v, i32 0
907   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
908   %3 = extractelement <2 x float> %2, i32 0
909   ret float %3
912 define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone {
913   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
914   ret <2 x float> %1
917 define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone {
918   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
919   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
920   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
921   ret <3 x float> %3
924 define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone {
925   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
926   ret <4 x float> %1
933 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
935 %struct.rs_matrix4x4 = type { [16 x float] }
936 %struct.rs_matrix3x3 = type { [9 x float] }
937 %struct.rs_matrix2x2 = type { [4 x float] }
939 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
940   %1 = insertelement <4 x float> undef, float %in, i32 0
941   %2 = insertelement <4 x float> %1, float %in, i32 1
942   %3 = insertelement <4 x float> %2, float %in, i32 2
943   %4 = insertelement <4 x float> %3, float %in, i32 3
944   ret <4 x float> %4
948 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <…
949   %x0 = extractelement <3 x float> %in, i32 0
950   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
951   %y0 = extractelement <3 x float> %in, i32 1
952   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
953   %z0 = extractelement <3 x float> %in, i32 2
954   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
957   %px2 = bitcast float* %px to i8*
958   %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
961   %py2 = bitcast float* %py to i8*
962   %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
965   %pz2 = bitcast float* %pz to i8*
966   %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
967   %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
969   %a1 = fmul <4 x float> %x, %xm
970   %a2 = fmul <4 x float> %y, %ym
971   %a3 = fadd <4 x float> %a1, %a2
972   %a4 = fmul <4 x float> %z, %zm
973   %a5 = fadd <4 x float> %a4, %a3
974   %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
975   ret <3 x float> %a6
978 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <…
979   %x0 = extractelement <2 x float> %in, i32 0
980   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
981   %y0 = extractelement <2 x float> %in, i32 1
982   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
985   %px2 = bitcast float* %px to <4 x float>*
986   %xm = load <4 x float>, <4 x float>* %px2, align 4
988   %py2 = bitcast float* %py to <4 x float>*
989   %ym = load <4 x float>, <4 x float>* %py2, align 4
991   %a1 = fmul <4 x float> %x, %xm
992   %a2 = fmul <4 x float> %y, %ym
993   %a3 = fadd <4 x float> %a1, %a2
994   %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
995   ret <3 x float> %a4
998 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <…
999   %x0 = extractelement <4 x float> %in, i32 0
1000   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1001   %y0 = extractelement <4 x float> %in, i32 1
1002   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1003   %z0 = extractelement <4 x float> %in, i32 2
1004   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1005   %w0 = extractelement <4 x float> %in, i32 3
1006   %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
1009   %px2 = bitcast float* %px to <4 x float>*
1010   %xm = load <4 x float>, <4 x float>* %px2, align 4
1012   %py2 = bitcast float* %py to <4 x float>*
1013   %ym = load <4 x float>, <4 x float>* %py2, align 4
1015   %pz2 = bitcast float* %pz to <4 x float>*
1016   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1018   %pw2 = bitcast float* %pw to <4 x float>*
1019   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1021   %a1 = fmul <4 x float> %x, %xm
1022   %a2 = fmul <4 x float> %y, %ym
1023   %a3 = fadd <4 x float> %a1, %a2
1024   %a4 = fmul <4 x float> %z, %zm
1025   %a5 = fadd <4 x float> %a3, %a4
1026   %a6 = fmul <4 x float> %w, %wm
1027   %a7 = fadd <4 x float> %a5, %a6
1028   ret <4 x float> %a7
1031 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <…
1032   %x0 = extractelement <3 x float> %in, i32 0
1033   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1034   %y0 = extractelement <3 x float> %in, i32 1
1035   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1036   %z0 = extractelement <3 x float> %in, i32 2
1037   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1040   %px2 = bitcast float* %px to <4 x float>*
1041   %xm = load <4 x float>, <4 x float>* %px2, align 4
1043   %py2 = bitcast float* %py to <4 x float>*
1044   %ym = load <4 x float>, <4 x float>* %py2, align 4
1046   %pz2 = bitcast float* %pz to <4 x float>*
1047   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1049   %pw2 = bitcast float* %pw to <4 x float>*
1050   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1052   %a1 = fmul <4 x float> %x, %xm
1053   %a2 = fadd <4 x float> %wm, %a1
1054   %a3 = fmul <4 x float> %y, %ym
1055   %a4 = fadd <4 x float> %a2, %a3
1056   %a5 = fmul <4 x float> %z, %zm
1057   %a6 = fadd <4 x float> %a4, %a5
1058   ret <4 x float> %a6
1061 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <…
1062   %x0 = extractelement <2 x float> %in, i32 0
1063   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1064   %y0 = extractelement <2 x float> %in, i32 1
1065   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1068   %px2 = bitcast float* %px to <4 x float>*
1069   %xm = load <4 x float>, <4 x float>* %px2, align 4
1071   %py2 = bitcast float* %py to <4 x float>*
1072   %ym = load <4 x float>, <4 x float>* %py2, align 4
1074   %pw2 = bitcast float* %pw to <4 x float>*
1075   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1077   %a1 = fmul <4 x float> %x, %xm
1078   %a2 = fadd <4 x float> %wm, %a1
1079   %a3 = fmul <4 x float> %y, %ym
1080   %a4 = fadd <4 x float> %a2, %a3
1081   ret <4 x float> %a4
1091 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, ali…
1092 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
1093 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
1095 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
1096 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
1099 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
1100     %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16
1101     %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16
1102     %f0 = load <4 x float>, <4 x float>* @fc_0, align 16
1103     %v1 = fmul <4 x float> %f255, %color
1104     %v2 = fadd <4 x float> %f05, %v1
1105 …%v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255…
1106     %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
1111 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
1112     %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1113     %2 = insertelement <4 x float> %1, float 1.0, i32 3
1114     %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
1118 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
1119 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
1120     %1 = insertelement <4 x float> undef, float %r, i32 0
1121     %2 = insertelement <4 x float> %1, float %g, i32 1
1122     %3 = insertelement <4 x float> %2, float %b, i32 2
1123     %4 = insertelement <4 x float> %3, float 1.0, i32 3
1124     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1128 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
1129 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnon…
1130     %1 = insertelement <4 x float> undef, float %r, i32 0
1131     %2 = insertelement <4 x float> %1, float %g, i32 1
1132     %3 = insertelement <4 x float> %2, float %b, i32 2
1133     %4 = insertelement <4 x float> %3, float %a, i32 3
1134     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone