Lines Matching refs:s
232 dup v0.4s, w4
240 sxtl v8.4s, v2.4h
241 sxtl v10.4s, v3.4h
242 raddhn v12.4h, v0.4s, v8.4s
243 raddhn2 v12.8h, v0.4s, v10.4s
250 sxtl v8.4s, v2.4h
251 sxtl v10.4s, v3.4h
252 raddhn v12.4h, v0.4s, v8.4s
253 raddhn2 v12.8h, v0.4s, v10.4s
260 sxtl v8.4s, v2.4h
261 sxtl v10.4s, v3.4h
262 raddhn v12.4h, v0.4s, v8.4s
263 raddhn2 v12.8h, v0.4s, v10.4s
270 sxtl v8.4s, v2.4h
271 sxtl v10.4s, v3.4h
272 raddhn v12.4h, v0.4s, v8.4s
273 raddhn2 v12.8h, v0.4s, v10.4s
280 sxtl v8.4s, v2.4h
281 sxtl v10.4s, v3.4h
282 raddhn v12.4h, v0.4s, v8.4s
283 raddhn2 v12.8h, v0.4s, v10.4s
290 sxtl v8.4s, v2.4h
291 sxtl v10.4s, v3.4h
292 raddhn v12.4h, v0.4s, v8.4s
293 raddhn2 v12.8h, v0.4s, v10.4s
300 sxtl v8.4s, v2.4h
301 sxtl v10.4s, v3.4h
302 raddhn v12.4h, v0.4s, v8.4s
303 raddhn2 v12.8h, v0.4s, v10.4s
310 sxtl v8.4s, v2.4h
311 sxtl v10.4s, v3.4h
312 raddhn v12.4h, v0.4s, v8.4s
313 raddhn2 v12.8h, v0.4s, v10.4s
387 smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
389 smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
392 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
394 smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
396 smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
398 smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
400 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
402 smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
404 smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
406 smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
408 smull v22.4s, v10.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
410 smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
434 smlal v24.4s, v14.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
435 smlsl v26.4s, v14.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
436 smlal v28.4s, v14.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
437 smlal v30.4s, v14.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
439 smlsl v18.4s, v11.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
440 smlal v6.4s, v11.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
442 add v10.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
443 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
445 …smlal v24.4s, v15.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(p…
446 …smlsl v26.4s, v15.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(p…
447 …smlal v28.4s, v15.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(p…
448 …smlsl v30.4s, v15.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(p…
450 add v14.4s, v10.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
451 sub v10.4s, v10.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
452 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
453 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
455 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
456 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
458 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
459 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
461 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
462 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
464 add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
465 sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
467 sqrshrn v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
468 sqrshrn v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
469 sqrshrn v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
470 sqrshrn v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
471 sqrshrn v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
472 sqrshrn v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
473 sqrshrn v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
474 sqrshrn v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
505 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
506 smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
507 smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
508 smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
510 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
511 smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
512 smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
513 smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
515 smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
516 smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
518 smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
521 add v14.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
522 sub v10.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
523 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
524 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
526 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
527 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
529 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
530 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
532 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
533 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
535 add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
536 sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
538 sqrshrn v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
539 sqrshrn v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
540 sqrshrn v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
541 sqrshrn v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
542 sqrshrn v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
543 sqrshrn v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
544 sqrshrn v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
545 sqrshrn v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
557 smull v24.4s, v8.4h, v0.4h[1] //// y1 * cos1(part of b0)
558 smull v26.4s, v8.4h, v0.4h[3] //// y1 * cos3(part of b1)
559 smull v28.4s, v8.4h, v1.4h[1] //// y1 * sin3(part of b2)
560 smull v30.4s, v8.4h, v1.4h[3] //// y1 * sin1(part of b3)
562 smlal v24.4s, v9.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
563 smlsl v26.4s, v9.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
564 smlsl v28.4s, v9.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
565 smlsl v30.4s, v9.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
567 smull v18.4s, v5.4h, v1.4h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1)
568 smull v8.4s, v5.4h, v0.4h[2] //// y2 * cos2(part of d0)
570 smull v20.4s, v4.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
571 smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
573 smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
574 smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
575 smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
576 smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
578 smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
579 smlal v8.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
581 add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
582 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
584 …smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(p…
585 …smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(p…
586 …smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(p…
587 …smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(p…
589 add v16.4s, v12.4s , v8.4s //// a0 = c0 + d0(part of e0,e7)
590 sub v12.4s, v12.4s , v8.4s //// a3 = c0 - d0(part of e3,e4)
591 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of e2,e5)
592 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of e1,e6)
594 add v20.4s, v16.4s , v24.4s //// a0 + b0(part of e0)
595 sub v8.4s, v16.4s , v24.4s //// a0 - b0(part of e7)
597 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of e2)
598 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of e5)
600 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of e1)
601 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of e6)
603 add v26.4s, v12.4s , v30.4s //// a3 + b3(part of e3)
604 sub v30.4s, v12.4s , v30.4s //// a3 - b3(part of x4)
606 sqrshrn v4.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
607 sqrshrn v17.4h, v8.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
608 sqrshrn v5.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
609 sqrshrn v16.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
610 sqrshrn v8.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
611 sqrshrn v13.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
612 sqrshrn v9.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
613 sqrshrn v12.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
631 trn1 v6.2s, v29.2s, v31.2s
632 trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
633 trn1 v2.2s, v25.2s, v27.2s
634 trn2 v3.2s, v25.2s, v27.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
643 trn1 v10.2s, v25.2s, v27.2s
644 trn2 v11.2s, v25.2s, v27.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
645 trn1 v14.2s, v29.2s, v31.2s
646 trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
650 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
651 smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
652 smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
653 smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
655 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
656 smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
657 smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
658 smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
660 smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
663 smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
664 smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
669 sub v22.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
670 add v4.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
673 add v2.4s, v4.4s , v24.4s
675 sub v6.4s, v4.4s , v24.4s
677 add v8.4s, v22.4s , v30.4s
679 sub v24.4s, v22.4s , v30.4s
681 sqrshrn v5.4h, v8.4s, #idct_stg2_shift
682 sqrshrn v2.4h, v2.4s, #idct_stg2_shift
683 sqrshrn v9.4h, v6.4s, #idct_stg2_shift
684 sqrshrn v6.4h, v24.4s, #idct_stg2_shift
686 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
687 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
690 add v30.4s, v22.4s , v28.4s
692 sub v24.4s, v22.4s , v28.4s
694 add v28.4s, v18.4s , v26.4s
696 sub v22.4s, v18.4s , v26.4s
697 sqrshrn v4.4h, v30.4s, #idct_stg2_shift
698 sqrshrn v7.4h, v24.4s, #idct_stg2_shift
699 sqrshrn v3.4h, v28.4s, #idct_stg2_shift
700 sqrshrn v8.4h, v22.4s, #idct_stg2_shift
712 trn1 v2.2s, v27.2s, v25.2s
713 trn2 v4.2s, v27.2s, v25.2s
714 trn1 v3.2s, v29.2s, v31.2s
715 trn2 v5.2s, v29.2s, v31.2s
722 trn1 v6.2s, v27.2s, v25.2s
723 trn2 v8.2s, v27.2s, v25.2s
724 trn1 v7.2s, v29.2s, v31.2s
725 trn2 v9.2s, v29.2s, v31.2s
730 smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
732 smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
733 smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
734 smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
736 smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
737 smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
738 smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
739 smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
740 smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
741 smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
742 smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
767 sub v22.4s, v20.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
768 add v12.4s, v20.4s , v14.4s //// a0 = c0 + d0(part of x0,x7)
771 add v0.4s, v12.4s , v24.4s
774 sub v24.4s, v12.4s , v24.4s
777 add v12.4s, v22.4s , v30.4s
780 sub v14.4s, v22.4s , v30.4s
782 sqrshrn v10.4h, v0.4s, #idct_stg2_shift
783 sqrshrn v17.4h, v24.4s, #idct_stg2_shift
784 sqrshrn v13.4h, v12.4s, #idct_stg2_shift
785 sqrshrn v14.4h, v14.4s, #idct_stg2_shift
787 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
788 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
791 add v0.4s, v22.4s , v28.4s
794 sub v24.4s, v22.4s , v28.4s
797 add v28.4s, v18.4s , v26.4s
800 sub v26.4s, v18.4s , v26.4s
803 sqrshrn v12.4h, v0.4s, #idct_stg2_shift
807 sqrshrn v15.4h, v24.4s, #idct_stg2_shift
813 sqrshrn v11.4h, v28.4s, #idct_stg2_shift
819 sqrshrn v16.4h, v26.4s, #idct_stg2_shift
839 trn1 v2.2s, v27.2s, v25.2s
840 trn2 v3.2s, v27.2s, v25.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
841 trn1 v6.2s, v29.2s, v31.2s
842 trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
849 trn1 v4.2s, v27.2s, v25.2s
850 trn2 v5.2s, v27.2s, v25.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
851 trn1 v8.2s, v29.2s, v31.2s
852 trn2 v9.2s, v29.2s, v31.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
859 trn1 v10.2s, v27.2s, v25.2s
860 trn2 v11.2s, v27.2s, v25.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
861 trn1 v14.2s, v29.2s, v31.2s
862 trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
869 trn1 v12.2s, v27.2s, v25.2s
870 trn2 v13.2s, v27.2s, v25.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
871 trn1 v16.2s, v29.2s, v31.2s
872 trn2 v17.2s, v29.2s, v31.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
911 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
912 smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
913 smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
914 smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
916 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
917 smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
918 smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
919 smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
921 smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
922 smull v22.4s, v4.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
924 smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
925 smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
928 smlal v24.4s, v8.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
929 smlsl v26.4s, v8.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
930 smlal v28.4s, v8.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
931 smlal v30.4s, v8.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
933 smlsl v18.4s, v5.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
934 smlal v6.4s, v5.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
936 add v2.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
937 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
939 …smlal v24.4s, v9.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa…
940 …smlsl v26.4s, v9.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(pa…
941 …smlal v28.4s, v9.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(pa…
942 …smlsl v30.4s, v9.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(pa…
944 sub v22.4s, v2.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
945 add v4.4s, v2.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
948 add v2.4s, v4.4s , v24.4s
950 sub v6.4s, v4.4s , v24.4s
952 add v8.4s, v22.4s , v30.4s
954 sub v24.4s, v22.4s , v30.4s
956 sqrshrn v5.4h, v8.4s, #idct_stg2_shift
957 sqrshrn v2.4h, v2.4s, #idct_stg2_shift
958 sqrshrn v9.4h, v6.4s, #idct_stg2_shift
959 sqrshrn v6.4h, v24.4s, #idct_stg2_shift
961 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
962 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
965 add v30.4s, v22.4s , v28.4s
967 sub v24.4s, v22.4s , v28.4s
969 add v28.4s, v18.4s , v26.4s
971 sub v22.4s, v18.4s , v26.4s
972 sqrshrn v4.4h, v30.4s, #idct_stg2_shift
973 sqrshrn v7.4h, v24.4s, #idct_stg2_shift
974 sqrshrn v3.4h, v28.4s, #idct_stg2_shift
975 sqrshrn v8.4h, v22.4s, #idct_stg2_shift
987 trn1 v2.2s, v27.2s, v25.2s
988 trn2 v4.2s, v27.2s, v25.2s
989 trn1 v3.2s, v29.2s, v31.2s
990 trn2 v5.2s, v29.2s, v31.2s
997 trn1 v6.2s, v27.2s, v25.2s
998 trn2 v8.2s, v27.2s, v25.2s
999 trn1 v7.2s, v29.2s, v31.2s
1000 trn2 v9.2s, v29.2s, v31.2s
1007 smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
1008 smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
1009 smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
1010 smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
1011 smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
1012 smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
1013 smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
1014 smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
1015 smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
1016 smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
1017 smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
1018 smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
1019 smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
1022 smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
1025 smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
1028 smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
1031 smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
1034 smlal v14.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
1036 add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
1037 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
1039 …smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(p…
1046 …smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(p…
1052 …smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(p…
1053 …smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(p…
1055 sub v22.4s, v12.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
1056 add v12.4s, v12.4s , v14.4s //// a0 = c0 + d0(part of x0,x7)
1059 add v0.4s, v12.4s , v24.4s
1062 sub v24.4s, v12.4s , v24.4s
1065 add v12.4s, v22.4s , v30.4s
1068 sub v14.4s, v22.4s , v30.4s
1070 sqrshrn v10.4h, v0.4s, #idct_stg2_shift
1071 sqrshrn v17.4h, v24.4s, #idct_stg2_shift
1072 sqrshrn v13.4h, v12.4s, #idct_stg2_shift
1073 sqrshrn v14.4h, v14.4s, #idct_stg2_shift
1075 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
1076 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
1079 add v0.4s, v22.4s , v28.4s
1082 sub v24.4s, v22.4s , v28.4s
1085 add v28.4s, v18.4s , v26.4s
1088 sub v26.4s, v18.4s , v26.4s
1091 sqrshrn v12.4h, v0.4s, #idct_stg2_shift
1095 sqrshrn v15.4h, v24.4s, #idct_stg2_shift
1101 sqrshrn v11.4h, v28.4s, #idct_stg2_shift
1107 sqrshrn v16.4h, v26.4s, #idct_stg2_shift
1123 trn1 v10.2s, v27.2s, v25.2s
1124 trn2 v12.2s, v27.2s, v25.2s
1125 trn1 v11.2s, v29.2s, v31.2s
1126 trn2 v13.2s, v29.2s, v31.2s
1133 trn1 v14.2s, v27.2s, v25.2s
1134 trn2 v16.2s, v27.2s, v25.2s
1135 trn1 v15.2s, v29.2s, v31.2s
1136 trn2 v17.2s, v29.2s, v31.2s