Lines Matching full:val
426 const uint8x8_t s##k = vld2_u8(pu1_src).val[e_chroma_plane]; \ in ihevc_resi_trans_8x8_neon()
427 const uint8x8_t p##k = vld2_u8(pu1_pred).val[e_chroma_plane]; \ in ihevc_resi_trans_8x8_neon()
481 a2.val[0] = vmull_n_s16(vget_low_s16(eo0), 83); in ihevc_resi_trans_8x8_neon()
483 a6.val[0] = vmull_n_s16(vget_low_s16(eo0), 36); in ihevc_resi_trans_8x8_neon()
485 a2.val[1] = vmull_n_s16(vget_high_s16(eo0), 83); in ihevc_resi_trans_8x8_neon()
487 a6.val[1] = vmull_n_s16(vget_high_s16(eo0), 36); in ihevc_resi_trans_8x8_neon()
490 a6.val[1] = vmlsl_n_s16(a6.val[1], vget_high_s16(eo1), 83); in ihevc_resi_trans_8x8_neon()
492 a2.val[1] = vmlal_n_s16(a2.val[1], vget_high_s16(eo1), 36); in ihevc_resi_trans_8x8_neon()
494 a6.val[0] = vmlsl_n_s16(a6.val[0], vget_low_s16(eo1), 83); in ihevc_resi_trans_8x8_neon()
496 a2.val[0] = vmlal_n_s16(a2.val[0], vget_low_s16(eo1), 36); in ihevc_resi_trans_8x8_neon()
499 a0.val[0] = vshll_n_s16(vget_low_s16(eee), 6); in ihevc_resi_trans_8x8_neon()
501 a0.val[1] = vshll_n_s16(vget_high_s16(eee), 6); in ihevc_resi_trans_8x8_neon()
503 a4.val[0] = vshll_n_s16(vget_low_s16(eeo), 6); in ihevc_resi_trans_8x8_neon()
505 a4.val[1] = vshll_n_s16(vget_high_s16(eeo), 6); in ihevc_resi_trans_8x8_neon()
507 a7.val[0] = vmull_n_s16(vget_low_s16(o0), 18); /*F7[0] = 18*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
508 a5.val[0] = vmull_n_s16(vget_low_s16(o0), 50); /*F5[0] = 50*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
509 a3.val[0] = vmull_n_s16(vget_low_s16(o0), 75); /*F3[0] = 75*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
510 a1.val[0] = vmull_n_s16(vget_low_s16(o0), 89); /*F1[0] = 89*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
511 a1.val[1] = vmull_n_s16(vget_high_s16(o0), 89); /*F1[1] = 89*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
512 a3.val[1] = vmull_n_s16(vget_high_s16(o0), 75); /*F3[1] = 75*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
513 a5.val[1] = vmull_n_s16(vget_high_s16(o0), 50); /*F5[1] = 50*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
514 a7.val[1] = vmull_n_s16(vget_high_s16(o0), 18); /*F7[1] = 18*(C0 - C7)*/ in ihevc_resi_trans_8x8_neon()
517 a7.val[0] = vmlsl_n_s16(a7.val[0], vget_low_s16(o1), 50); in ihevc_resi_trans_8x8_neon()
519 a5.val[0] = vmlsl_n_s16(a5.val[0], vget_low_s16(o1), 89); in ihevc_resi_trans_8x8_neon()
521 a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o1), 18); in ihevc_resi_trans_8x8_neon()
523 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o1), 75); in ihevc_resi_trans_8x8_neon()
525 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o1), 75); in ihevc_resi_trans_8x8_neon()
527 a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o1), 18); in ihevc_resi_trans_8x8_neon()
529 a5.val[1] = vmlsl_n_s16(a5.val[1], vget_high_s16(o1), 89); in ihevc_resi_trans_8x8_neon()
531 a7.val[1] = vmlsl_n_s16(a7.val[1], vget_high_s16(o1), 50); in ihevc_resi_trans_8x8_neon()
534 a7.val[0] = vmlal_n_s16(a7.val[0], vget_low_s16(o2), 75); in ihevc_resi_trans_8x8_neon()
536 a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o2), 18); in ihevc_resi_trans_8x8_neon()
538 a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o2), 89); in ihevc_resi_trans_8x8_neon()
540 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o2), 50); in ihevc_resi_trans_8x8_neon()
542 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o2), 50); in ihevc_resi_trans_8x8_neon()
544 a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o2), 89); in ihevc_resi_trans_8x8_neon()
546 a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o2), 18); in ihevc_resi_trans_8x8_neon()
548 a7.val[1] = vmlal_n_s16(a7.val[1], vget_high_s16(o2), 75); in ihevc_resi_trans_8x8_neon()
551 a7.val[0] = vmlsl_n_s16(a7.val[0], vget_low_s16(o3), 89); in ihevc_resi_trans_8x8_neon()
553 a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o3), 75); in ihevc_resi_trans_8x8_neon()
555 a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o3), 50); in ihevc_resi_trans_8x8_neon()
557 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o3), 18); in ihevc_resi_trans_8x8_neon()
559 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o3), 18); in ihevc_resi_trans_8x8_neon()
561 a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o3), 50); in ihevc_resi_trans_8x8_neon()
563 a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o3), 75); in ihevc_resi_trans_8x8_neon()
565 a7.val[1] = vmlsl_n_s16(a7.val[1], vget_high_s16(o3), 89); in ihevc_resi_trans_8x8_neon()
577 int32x4x2_t b1 = vtrnq_s32(a0.val[1], a1.val[1]); in ihevc_resi_trans_8x8_neon()
578 int32x4x2_t b3 = vtrnq_s32(a2.val[1], a3.val[1]); in ihevc_resi_trans_8x8_neon()
579 int32x4x2_t b0 = vtrnq_s32(a0.val[0], a1.val[0]); in ihevc_resi_trans_8x8_neon()
580 int32x4x2_t b2 = vtrnq_s32(a2.val[0], a3.val[0]); in ihevc_resi_trans_8x8_neon()
583 a0.val[0] = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b2.val[0])); in ihevc_resi_trans_8x8_neon()
584 a2.val[0] = vcombine_s32(vget_high_s32(b0.val[0]), vget_high_s32(b2.val[0])); in ihevc_resi_trans_8x8_neon()
585 a1.val[0] = vcombine_s32(vget_low_s32(b0.val[1]), vget_low_s32(b2.val[1])); in ihevc_resi_trans_8x8_neon()
586 a3.val[0] = vcombine_s32(vget_high_s32(b0.val[1]), vget_high_s32(b2.val[1])); in ihevc_resi_trans_8x8_neon()
587 a0.val[1] = vcombine_s32(vget_low_s32(b1.val[0]), vget_low_s32(b3.val[0])); in ihevc_resi_trans_8x8_neon()
588 a2.val[1] = vcombine_s32(vget_high_s32(b1.val[0]), vget_high_s32(b3.val[0])); in ihevc_resi_trans_8x8_neon()
589 a1.val[1] = vcombine_s32(vget_low_s32(b1.val[1]), vget_low_s32(b3.val[1])); in ihevc_resi_trans_8x8_neon()
590 a3.val[1] = vcombine_s32(vget_high_s32(b1.val[1]), vget_high_s32(b3.val[1])); in ihevc_resi_trans_8x8_neon()
592 o0_2 = vsubq_s32(a0.val[0], a3.val[1]); /*B0 - B7*/ in ihevc_resi_trans_8x8_neon()
593 o1_2 = vsubq_s32(a1.val[0], a2.val[1]); /*B1 - B6*/ in ihevc_resi_trans_8x8_neon()
594 o2_2 = vsubq_s32(a2.val[0], a1.val[1]); /*B2 - B5*/ in ihevc_resi_trans_8x8_neon()
595 o3_2 = vsubq_s32(a3.val[0], a0.val[1]); /*B3 - B4*/ in ihevc_resi_trans_8x8_neon()
596 e3_2 = vaddq_s32(a3.val[0], a0.val[1]); /*B3 + B4*/ in ihevc_resi_trans_8x8_neon()
597 e2_2 = vaddq_s32(a2.val[0], a1.val[1]); /*B2 + B5*/ in ihevc_resi_trans_8x8_neon()
598 e1_2 = vaddq_s32(a1.val[0], a2.val[1]); /*B1 + B6*/ in ihevc_resi_trans_8x8_neon()
599 e0_2 = vaddq_s32(a0.val[0], a3.val[1]); /*B0 + B7*/ in ihevc_resi_trans_8x8_neon()
691 int32x4x2_t b1 = vtrnq_s32(a4.val[1], a5.val[1]); in ihevc_resi_trans_8x8_neon()
692 int32x4x2_t b3 = vtrnq_s32(a6.val[1], a7.val[1]); in ihevc_resi_trans_8x8_neon()
693 int32x4x2_t b0 = vtrnq_s32(a4.val[0], a5.val[0]); in ihevc_resi_trans_8x8_neon()
694 int32x4x2_t b2 = vtrnq_s32(a6.val[0], a7.val[0]); in ihevc_resi_trans_8x8_neon()
697 a0.val[0] = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b2.val[0])); in ihevc_resi_trans_8x8_neon()
698 a2.val[0] = vcombine_s32(vget_high_s32(b0.val[0]), vget_high_s32(b2.val[0])); in ihevc_resi_trans_8x8_neon()
699 a1.val[0] = vcombine_s32(vget_low_s32(b0.val[1]), vget_low_s32(b2.val[1])); in ihevc_resi_trans_8x8_neon()
700 a3.val[0] = vcombine_s32(vget_high_s32(b0.val[1]), vget_high_s32(b2.val[1])); in ihevc_resi_trans_8x8_neon()
701 a0.val[1] = vcombine_s32(vget_low_s32(b1.val[0]), vget_low_s32(b3.val[0])); in ihevc_resi_trans_8x8_neon()
702 a2.val[1] = vcombine_s32(vget_high_s32(b1.val[0]), vget_high_s32(b3.val[0])); in ihevc_resi_trans_8x8_neon()
703 a1.val[1] = vcombine_s32(vget_low_s32(b1.val[1]), vget_low_s32(b3.val[1])); in ihevc_resi_trans_8x8_neon()
704 a3.val[1] = vcombine_s32(vget_high_s32(b1.val[1]), vget_high_s32(b3.val[1])); in ihevc_resi_trans_8x8_neon()
706 o0_2 = vsubq_s32(a0.val[0], a3.val[1]); /*B0 - B7*/ in ihevc_resi_trans_8x8_neon()
707 o1_2 = vsubq_s32(a1.val[0], a2.val[1]); /*B1 - B6*/ in ihevc_resi_trans_8x8_neon()
708 o2_2 = vsubq_s32(a2.val[0], a1.val[1]); /*B2 - B5*/ in ihevc_resi_trans_8x8_neon()
709 o3_2 = vsubq_s32(a3.val[0], a0.val[1]); /*B3 - B4*/ in ihevc_resi_trans_8x8_neon()
710 e3_2 = vaddq_s32(a3.val[0], a0.val[1]); /*B3 + B4*/ in ihevc_resi_trans_8x8_neon()
711 e2_2 = vaddq_s32(a2.val[0], a1.val[1]); /*B2 + B5*/ in ihevc_resi_trans_8x8_neon()
712 e1_2 = vaddq_s32(a1.val[0], a2.val[1]); /*B1 + B6*/ in ihevc_resi_trans_8x8_neon()
713 e0_2 = vaddq_s32(a0.val[0], a3.val[1]); /*B0 + B7*/ in ihevc_resi_trans_8x8_neon()
814 b[i] = vld2_u8(a).val[e_chroma_plane]; in load()
858 b[0].val[i] = vaddq_s32(a[0].val[i], a[15].val[i]); in cross_input_32()
859 b[1].val[i] = vaddq_s32(a[1].val[i], a[14].val[i]); in cross_input_32()
860 b[2].val[i] = vaddq_s32(a[2].val[i], a[13].val[i]); in cross_input_32()
861 b[3].val[i] = vaddq_s32(a[3].val[i], a[12].val[i]); in cross_input_32()
862 b[4].val[i] = vaddq_s32(a[4].val[i], a[11].val[i]); in cross_input_32()
863 b[5].val[i] = vaddq_s32(a[5].val[i], a[10].val[i]); in cross_input_32()
864 b[6].val[i] = vaddq_s32(a[6].val[i], a[9].val[i]); in cross_input_32()
865 b[7].val[i] = vaddq_s32(a[7].val[i], a[8].val[i]); in cross_input_32()
867 b[8].val[i] = vsubq_s32(a[7].val[i], a[8].val[i]); in cross_input_32()
868 b[9].val[i] = vsubq_s32(a[6].val[i], a[9].val[i]); in cross_input_32()
869 b[10].val[i] = vsubq_s32(a[5].val[i], a[10].val[i]); in cross_input_32()
870 b[11].val[i] = vsubq_s32(a[4].val[i], a[11].val[i]); in cross_input_32()
871 b[12].val[i] = vsubq_s32(a[3].val[i], a[12].val[i]); in cross_input_32()
872 b[13].val[i] = vsubq_s32(a[2].val[i], a[13].val[i]); in cross_input_32()
873 b[14].val[i] = vsubq_s32(a[1].val[i], a[14].val[i]); in cross_input_32()
874 b[15].val[i] = vsubq_s32(a[0].val[i], a[15].val[i]); in cross_input_32()
898 vshrn_n_s32(vaddq_s32(a[0].val[0], vecadd), 13), in partial_round_shift()
899 vshrn_n_s32(vaddq_s32(a[0].val[1], vecadd), 13)); in partial_round_shift()
901 vshrn_n_s32(vaddq_s32(a[1].val[0], vecadd), 13), in partial_round_shift()
902 vshrn_n_s32(vaddq_s32(a[1].val[1], vecadd), 13)); in partial_round_shift()
904 vshrn_n_s32(vaddq_s32(a[2].val[0], vecadd), 13), in partial_round_shift()
905 vshrn_n_s32(vaddq_s32(a[2].val[1], vecadd), 13)); in partial_round_shift()
907 vshrn_n_s32(vaddq_s32(a[3].val[0], vecadd), 13), in partial_round_shift()
908 vshrn_n_s32(vaddq_s32(a[3].val[1], vecadd), 13)); in partial_round_shift()
910 vshrn_n_s32(vaddq_s32(a[4].val[0], vecadd), 13), in partial_round_shift()
911 vshrn_n_s32(vaddq_s32(a[4].val[1], vecadd), 13)); in partial_round_shift()
913 vshrn_n_s32(vaddq_s32(a[5].val[0], vecadd), 13), in partial_round_shift()
914 vshrn_n_s32(vaddq_s32(a[5].val[1], vecadd), 13)); in partial_round_shift()
916 vshrn_n_s32(vaddq_s32(a[6].val[0], vecadd), 13), in partial_round_shift()
917 vshrn_n_s32(vaddq_s32(a[6].val[1], vecadd), 13)); in partial_round_shift()
919 vshrn_n_s32(vaddq_s32(a[7].val[0], vecadd), 13), in partial_round_shift()
920 vshrn_n_s32(vaddq_s32(a[7].val[1], vecadd), 13)); in partial_round_shift()
922 vshrn_n_s32(vaddq_s32(a[8].val[0], vecadd), 13), in partial_round_shift()
923 vshrn_n_s32(vaddq_s32(a[8].val[1], vecadd), 13)); in partial_round_shift()
925 vshrn_n_s32(vaddq_s32(a[9].val[0], vecadd), 13), in partial_round_shift()
926 vshrn_n_s32(vaddq_s32(a[9].val[1], vecadd), 13)); in partial_round_shift()
928 vshrn_n_s32(vaddq_s32(a[10].val[0], vecadd), 13), in partial_round_shift()
929 vshrn_n_s32(vaddq_s32(a[10].val[1], vecadd), 13)); in partial_round_shift()
931 vshrn_n_s32(vaddq_s32(a[11].val[0], vecadd), 13), in partial_round_shift()
932 vshrn_n_s32(vaddq_s32(a[11].val[1], vecadd), 13)); in partial_round_shift()
934 vshrn_n_s32(vaddq_s32(a[12].val[0], vecadd), 13), in partial_round_shift()
935 vshrn_n_s32(vaddq_s32(a[12].val[1], vecadd), 13)); in partial_round_shift()
937 vshrn_n_s32(vaddq_s32(a[13].val[0], vecadd), 13), in partial_round_shift()
938 vshrn_n_s32(vaddq_s32(a[13].val[1], vecadd), 13)); in partial_round_shift()
940 vshrn_n_s32(vaddq_s32(a[14].val[0], vecadd), 13), in partial_round_shift()
941 vshrn_n_s32(vaddq_s32(a[14].val[1], vecadd), 13)); in partial_round_shift()
943 vshrn_n_s32(vaddq_s32(a[15].val[0], vecadd), 13), in partial_round_shift()
944 vshrn_n_s32(vaddq_s32(a[15].val[1], vecadd), 13)); in partial_round_shift()
962 row1->val[0] = vmlal_n_s16(a0, vget_low_s16(b), c); in butterfly_one_coeff_16_32()
963 row1->val[1] = vmlal_n_s16(a1, vget_high_s16(b), c); in butterfly_one_coeff_16_32()
964 row2->val[0] = vmlsl_n_s16(a0, vget_low_s16(b), c); in butterfly_one_coeff_16_32()
965 row2->val[1] = vmlsl_n_s16(a1, vget_high_s16(b), c); in butterfly_one_coeff_16_32()
975 row1->val[0] = vmlal_n_s16(a2, vget_low_s16(b), c0); in butterfly_two_coeff_16_32()
976 row1->val[1] = vmlal_n_s16(a3, vget_high_s16(b), c0); in butterfly_two_coeff_16_32()
977 row2->val[0] = vmlsl_n_s16(a0, vget_low_s16(b), c1); in butterfly_two_coeff_16_32()
978 row2->val[1] = vmlsl_n_s16(a1, vget_high_s16(b), c1); in butterfly_two_coeff_16_32()
984 const int32x4_t a0 = vmulq_n_s32(a.val[0], c); in butterfly_one_coeff_32_32()
985 const int32x4_t a1 = vmulq_n_s32(a.val[1], c); in butterfly_one_coeff_32_32()
986 row1->val[0] = vmlaq_n_s32(a0, b.val[0], c); in butterfly_one_coeff_32_32()
987 row1->val[1] = vmlaq_n_s32(a1, b.val[1], c); in butterfly_one_coeff_32_32()
988 row2->val[0] = vmlsq_n_s32(a0, b.val[0], c); in butterfly_one_coeff_32_32()
989 row2->val[1] = vmlsq_n_s32(a1, b.val[1], c); in butterfly_one_coeff_32_32()
995 const int32x4_t a0 = vmulq_n_s32(a.val[0], c0); in butterfly_two_coeff_32_32()
996 const int32x4_t a1 = vmulq_n_s32(a.val[1], c0); in butterfly_two_coeff_32_32()
997 const int32x4_t a2 = vmulq_n_s32(a.val[0], c1); in butterfly_two_coeff_32_32()
998 const int32x4_t a3 = vmulq_n_s32(a.val[1], c1); in butterfly_two_coeff_32_32()
999 row1->val[0] = vmlaq_n_s32(a2, b.val[0], c0); in butterfly_two_coeff_32_32()
1000 row1->val[1] = vmlaq_n_s32(a3, b.val[1], c0); in butterfly_two_coeff_32_32()
1001 row2->val[0] = vmlsq_n_s32(a0, b.val[0], c1); in butterfly_two_coeff_32_32()
1002 row2->val[1] = vmlsq_n_s32(a1, b.val[1], c1); in butterfly_two_coeff_32_32()
1009 const int32x4x2_t c0 = vtrnq_s32(a[0].val[0], a[1].val[0]); in transpose_8x8()
1010 const int32x4x2_t c1 = vtrnq_s32(a[2].val[0], a[3].val[0]); in transpose_8x8()
1011 const int32x4x2_t c2 = vtrnq_s32(a[4].val[0], a[5].val[0]); in transpose_8x8()
1012 const int32x4x2_t c3 = vtrnq_s32(a[6].val[0], a[7].val[0]); in transpose_8x8()
1013 const int32x4x2_t c4 = vtrnq_s32(a[0].val[1], a[1].val[1]); in transpose_8x8()
1014 const int32x4x2_t c5 = vtrnq_s32(a[2].val[1], a[3].val[1]); in transpose_8x8()
1015 const int32x4x2_t c6 = vtrnq_s32(a[4].val[1], a[5].val[1]); in transpose_8x8()
1016 const int32x4x2_t c7 = vtrnq_s32(a[6].val[1], a[7].val[1]); in transpose_8x8()
1018 const int32x4x2_t d0 = vtrnq_s64_to_s32(c0.val[0], c1.val[0]); in transpose_8x8()
1019 const int32x4x2_t d1 = vtrnq_s64_to_s32(c0.val[1], c1.val[1]); in transpose_8x8()
1020 const int32x4x2_t d2 = vtrnq_s64_to_s32(c2.val[0], c3.val[0]); in transpose_8x8()
1021 const int32x4x2_t d3 = vtrnq_s64_to_s32(c2.val[1], c3.val[1]); in transpose_8x8()
1022 const int32x4x2_t d4 = vtrnq_s64_to_s32(c4.val[0], c5.val[0]); in transpose_8x8()
1023 const int32x4x2_t d5 = vtrnq_s64_to_s32(c4.val[1], c5.val[1]); in transpose_8x8()
1024 const int32x4x2_t d6 = vtrnq_s64_to_s32(c6.val[0], c7.val[0]); in transpose_8x8()
1025 const int32x4x2_t d7 = vtrnq_s64_to_s32(c6.val[1], c7.val[1]); in transpose_8x8()
1027 b[0].val[0] = d0.val[0]; in transpose_8x8()
1028 b[0].val[1] = d2.val[0]; in transpose_8x8()
1029 b[1].val[0] = d1.val[0]; in transpose_8x8()
1030 b[1].val[1] = d3.val[0]; in transpose_8x8()
1031 b[2].val[0] = d0.val[1]; in transpose_8x8()
1032 b[2].val[1] = d2.val[1]; in transpose_8x8()
1033 b[3].val[0] = d1.val[1]; in transpose_8x8()
1034 b[3].val[1] = d3.val[1]; in transpose_8x8()
1035 b[4].val[0] = d4.val[0]; in transpose_8x8()
1036 b[4].val[1] = d6.val[0]; in transpose_8x8()
1037 b[5].val[0] = d5.val[0]; in transpose_8x8()
1038 b[5].val[1] = d7.val[0]; in transpose_8x8()
1039 b[6].val[0] = d4.val[1]; in transpose_8x8()
1040 b[6].val[1] = d6.val[1]; in transpose_8x8()
1041 b[7].val[0] = d5.val[1]; in transpose_8x8()
1042 b[7].val[1] = d7.val[1]; in transpose_8x8()
1079 out[2].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]); in dct_body_16_32()
1080 out[2].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]); in dct_body_16_32()
1082 out[14].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]); in dct_body_16_32()
1083 out[14].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]); in dct_body_16_32()
1088 out[10].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]); in dct_body_16_32()
1089 out[10].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]); in dct_body_16_32()
1091 out[6].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]); in dct_body_16_32()
1092 out[6].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]); in dct_body_16_32()
1100 out[1].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_16_32()
1101 out[1].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_16_32()
1103 out[15].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_16_32()
1104 out[15].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_16_32()
1111 out[3].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_16_32()
1112 out[3].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_16_32()
1114 out[13].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_16_32()
1115 out[13].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_16_32()
1122 out[5].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_16_32()
1123 out[5].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_16_32()
1125 out[11].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_16_32()
1126 out[11].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_16_32()
1133 out[7].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_16_32()
1134 out[7].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_16_32()
1136 out[9].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_16_32()
1137 out[9].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_16_32()
1150 s[0].val[i] = vaddq_s32(in[0].val[i], in[7].val[i]); in dct_body_32_32()
1151 s[1].val[i] = vaddq_s32(in[1].val[i], in[6].val[i]); in dct_body_32_32()
1152 s[2].val[i] = vaddq_s32(in[2].val[i], in[5].val[i]); in dct_body_32_32()
1153 s[3].val[i] = vaddq_s32(in[3].val[i], in[4].val[i]); in dct_body_32_32()
1154 s[4].val[i] = vsubq_s32(in[3].val[i], in[4].val[i]); in dct_body_32_32()
1155 s[5].val[i] = vsubq_s32(in[2].val[i], in[5].val[i]); in dct_body_32_32()
1156 s[6].val[i] = vsubq_s32(in[1].val[i], in[6].val[i]); in dct_body_32_32()
1157 s[7].val[i] = vsubq_s32(in[0].val[i], in[7].val[i]); in dct_body_32_32()
1159 x[0].val[i] = vaddq_s32(s[0].val[i], s[3].val[i]); in dct_body_32_32()
1160 x[1].val[i] = vaddq_s32(s[1].val[i], s[2].val[i]); in dct_body_32_32()
1161 x[2].val[i] = vsubq_s32(s[1].val[i], s[2].val[i]); in dct_body_32_32()
1162 x[3].val[i] = vsubq_s32(s[0].val[i], s[3].val[i]); in dct_body_32_32()
1177 out[2].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]); in dct_body_32_32()
1178 out[2].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]); in dct_body_32_32()
1180 out[14].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]); in dct_body_32_32()
1181 out[14].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]); in dct_body_32_32()
1186 out[10].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]); in dct_body_32_32()
1187 out[10].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]); in dct_body_32_32()
1189 out[6].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]); in dct_body_32_32()
1190 out[6].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]); in dct_body_32_32()
1198 out[1].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_32_32()
1199 out[1].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_32_32()
1201 out[15].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_32_32()
1202 out[15].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_32_32()
1209 out[3].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_32_32()
1210 out[3].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_32_32()
1212 out[13].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_32_32()
1213 out[13].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_32_32()
1220 out[5].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_32_32()
1221 out[5].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_32_32()
1223 out[11].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_32_32()
1224 out[11].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_32_32()
1231 out[7].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]); in dct_body_32_32()
1232 out[7].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]); in dct_body_32_32()
1234 out[9].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]); in dct_body_32_32()
1235 out[9].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]); in dct_body_32_32()