Lines Matching full:h
146 ld1 {v0.4h, v1.4h, v2.4h, v3.4h},[x14] ////d0,d1 are used for storing the constant data
235 ld1 {v10.4h},[x0],x6
236 ld1 {v11.4h},[x9],x6
237 ld1 {v6.4h},[x0],x10
238 ld1 {v7.4h},[x9],x10
242 ld1 {v4.4h},[x0],x6
243 ld1 {v5.4h},[x9],x6
244 ld1 {v8.4h},[x0],x8
245 ld1 {v9.4h},[x9],x8
255 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
256 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
257 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
258 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
260 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
261 smlal v26.4s, v7.4h, v2.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
262 smlal v28.4s, v7.4h, v3.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
263 smlsl v30.4s, v7.4h, v2.h[3] //// y1 * sin1 - y3 * sin3(part of b3)
270 smull v12.4s, v10.4h, v0.h[0]
271 smlal v12.4s, v11.4h, v0.h[2]
272 smull v14.4s, v10.4h, v0.h[0]
273 smlal v14.4s, v11.4h, v1.h[2]
274 smull v16.4s, v10.4h, v0.h[0]
275 smlal v16.4s, v11.4h, v2.h[2]
276 smull v18.4s, v10.4h, v0.h[0]
277 smlal v18.4s, v11.4h, v3.h[2]
282 smlal v24.4s, v8.4h, v1.h[1]
283 smlal v26.4s, v8.4h, v3.h[3]
284 smlsl v28.4s, v8.4h, v1.h[3]
285 smlsl v30.4s, v8.4h, v0.h[3]
288 smlal v24.4s, v9.4h, v1.h[3]
289 smlsl v26.4s, v9.4h, v2.h[3]
290 smlsl v28.4s, v9.4h, v0.h[3]
291 smlal v30.4s, v9.4h, v3.h[3]
297 smlal v12.4s, v4.4h, v1.h[0]
298 smlal v12.4s, v5.4h, v1.h[2]
299 smlal v14.4s, v4.4h, v3.h[0]
300 smlsl v14.4s, v5.4h, v3.h[2]
301 smlsl v16.4s, v4.4h, v3.h[0]
302 smlsl v16.4s, v5.4h, v0.h[2]
303 smlsl v18.4s, v4.4h, v1.h[0]
304 smlsl v18.4s, v5.4h, v2.h[2]
319 ld1 {v10.4h},[x0],x6
320 ld1 {v11.4h},[x9],x6
321 ld1 {v6.4h},[x0],x10
322 ld1 {v7.4h},[x9],x10
323 ld1 {v4.4h},[x0],x6
324 ld1 {v5.4h},[x9],x6
325 ld1 {v8.4h},[x0],x5
326 ld1 {v9.4h},[x9],x5
331 smlal v24.4s, v6.4h, v2.h[1] //// y1 * cos1(part of b0)
332 smlsl v26.4s, v6.4h, v1.h[1] //// y1 * cos3(part of b1)
333 smlsl v28.4s, v6.4h, v3.h[1] //// y1 * sin3(part of b2)
334 smlal v30.4s, v6.4h, v0.h[1] //// y1 * sin1(part of b3)
336 smlal v24.4s, v7.4h, v2.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
337 smlsl v26.4s, v7.4h, v0.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
338 smlal v28.4s, v7.4h, v2.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
339 smlal v30.4s, v7.4h, v3.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
343 smlal v24.4s, v8.4h, v3.h[1]
344 smlsl v26.4s, v8.4h, v1.h[3]
345 smlal v28.4s, v8.4h, v0.h[1]
346 smlsl v30.4s, v8.4h, v1.h[1]
349 smlal v24.4s, v9.4h, v3.h[3]
350 smlsl v26.4s, v9.4h, v3.h[1]
351 smlal v28.4s, v9.4h, v2.h[3]
352 smlsl v30.4s, v9.4h, v2.h[1]
358 smlal v12.4s, v10.4h, v0.h[0]
359 smlal v12.4s, v11.4h, v2.h[2]
360 smlal v12.4s, v4.4h, v3.h[0]
361 smlal v12.4s, v5.4h, v3.h[2]
366 smlsl v14.4s, v10.4h, v0.h[0]
367 smlsl v14.4s, v11.4h, v0.h[2]
368 smlsl v14.4s, v4.4h, v1.h[0]
369 smlsl v14.4s, v5.4h, v2.h[2]
372 smlsl v16.4s, v10.4h, v0.h[0]
373 smlal v16.4s, v11.4h, v3.h[2]
374 smlal v16.4s, v4.4h, v1.h[0]
375 smlal v16.4s, v5.4h, v1.h[2]
378 smlal v18.4s, v10.4h, v0.h[0]
379 smlal v18.4s, v11.4h, v1.h[2]
380 smlsl v18.4s, v4.4h, v3.h[0]
381 smlsl v18.4s, v5.4h, v0.h[2]
403 sqrshrn v30.4h, v20.4s,#shift_stage1_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
404 sqrshrn v19.4h, v22.4s,#shift_stage1_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
405 sqrshrn v31.4h, v14.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
406 sqrshrn v18.4h, v26.4s,#shift_stage1_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
407 sqrshrn v12.4h, v12.4s,#shift_stage1_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
408 sqrshrn v15.4h, v24.4s,#shift_stage1_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
409 sqrshrn v13.4h, v16.4s,#shift_stage1_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
410 sqrshrn v14.4h, v28.4s,#shift_stage1_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
412 st1 {v30.4h, v31.4h},[x1],#16
413 st1 {v18.4h, v19.4h},[x1],#16
422 ld1 {v10.4h},[x0],x6
423 ld1 {v11.4h},[x9],x6
424 ld1 {v6.4h},[x0],x10
425 ld1 {v7.4h},[x9],x10
426 ld1 {v4.4h},[x0],x6
427 ld1 {v5.4h},[x9],x6
428 ld1 {v8.4h},[x0],x8
429 ld1 {v9.4h},[x9],x8
433 smull v24.4s, v6.4h, v2.h[1] //// y1 * cos1(part of b0)
434 smull v26.4s, v6.4h, v2.h[3] //// y1 * cos3(part of b1)
435 smull v28.4s, v6.4h, v3.h[1] //// y1 * sin3(part of b2)
436 smull v30.4s, v6.4h, v3.h[3] //// y1 * sin1(part of b3)
438 smlsl v24.4s, v7.4h, v1.h[1] //// y1 * cos1 + y3 * cos3(part of b0)
439 smlsl v26.4s, v7.4h, v0.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
440 smlsl v28.4s, v7.4h, v1.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
441 smlsl v30.4s, v7.4h, v3.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
448 smull v22.4s, v10.4h, v0.h[0]
449 smlsl v22.4s, v11.4h, v3.h[2]
450 smull v20.4s, v10.4h, v0.h[0]
451 smlsl v20.4s, v11.4h, v2.h[2]
452 smull v16.4s, v10.4h, v0.h[0]
453 smlsl v16.4s, v11.4h, v1.h[2]
454 smull v18.4s, v10.4h, v0.h[0]
455 smlsl v18.4s, v11.4h, v0.h[2]
461 smlsl v24.4s, v8.4h, v3.h[1]
462 smlal v26.4s, v8.4h, v2.h[1]
463 smlal v28.4s, v8.4h, v0.h[1]
464 smlal v30.4s, v8.4h, v2.h[3]
467 smlal v24.4s, v9.4h, v0.h[1]
468 smlal v26.4s, v9.4h, v3.h[1]
469 smlsl v28.4s, v9.4h, v1.h[1]
470 smlsl v30.4s, v9.4h, v2.h[1]
474 smlsl v22.4s, v4.4h, v1.h[0]
475 smlal v22.4s, v5.4h, v2.h[2]
476 smlsl v20.4s, v4.4h, v3.h[0]
477 smlal v20.4s, v5.4h, v0.h[2]
478 smlal v16.4s, v4.4h, v3.h[0]
479 smlal v16.4s, v5.4h, v3.h[2]
480 smlal v18.4s, v4.4h, v1.h[0]
481 smlsl v18.4s, v5.4h, v1.h[2]
495 ld1 {v10.4h},[x0],x6
496 ld1 {v11.4h},[x9],x6
497 ld1 {v6.4h},[x0],x10
498 ld1 {v7.4h},[x9],x10
499 ld1 {v4.4h},[x0],x6
500 ld1 {v5.4h},[x9],x6
501 ld1 {v8.4h},[x0],x5
502 ld1 {v9.4h},[x9],x5
505 smlsl v24.4s, v6.4h, v3.h[3] //// y1 * cos1(part of b0)
506 smlsl v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
507 smlal v28.4s, v6.4h, v2.h[3] //// y1 * sin3(part of b2)
508 smlal v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
510 smlsl v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
511 smlal v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
512 smlal v28.4s, v7.4h, v3.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
513 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
516 smlal v24.4s, v8.4h, v2.h[3]
517 smlal v26.4s, v8.4h, v3.h[3]
518 smlsl v28.4s, v8.4h, v2.h[1]
519 smlal v30.4s, v8.4h, v0.h[3]
522 smlal v24.4s, v9.4h, v1.h[3]
523 smlsl v26.4s, v9.4h, v1.h[1]
524 smlal v28.4s, v9.4h, v0.h[3]
525 smlsl v30.4s, v9.4h, v0.h[1]
530 smlal v22.4s, v10.4h, v0.h[0]
531 smlsl v22.4s, v11.4h, v1.h[2]
532 smlsl v22.4s, v4.4h, v3.h[0]
533 smlal v22.4s, v5.4h, v0.h[2]
537 smlsl v20.4s, v10.4h, v0.h[0]
538 smlsl v20.4s, v11.4h, v3.h[2]
539 smlal v20.4s, v4.4h, v1.h[0]
540 smlsl v20.4s, v5.4h, v1.h[2]
543 smlsl v16.4s, v10.4h, v0.h[0]
544 smlal v16.4s, v11.4h, v0.h[2]
545 smlsl v16.4s, v4.4h, v1.h[0]
546 smlal v16.4s, v5.4h, v2.h[2]
550 smlal v18.4s, v10.4h, v0.h[0]
551 smlsl v18.4s, v11.4h, v2.h[2]
552 smlal v18.4s, v4.4h, v3.h[0]
553 smlsl v18.4s, v5.4h, v3.h[2]
571 sqrshrn v18.4h, v4.4s,#shift_stage1_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
572 sqrshrn v31.4h, v22.4s,#shift_stage1_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
573 sqrshrn v19.4h, v10.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
574 sqrshrn v30.4h, v26.4s,#shift_stage1_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
575 sqrshrn v20.4h, v6.4s,#shift_stage1_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
576 sqrshrn v23.4h, v24.4s,#shift_stage1_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
577 sqrshrn v21.4h, v16.4s,#shift_stage1_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
578 sqrshrn v22.4h, v28.4s,#shift_stage1_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
588 ld1 {v4.4h, v5.4h},[x1],#16
589 ld1 {v8.4h, v9.4h},[x1],#16
617 trn1 v26.4h, v4.4h, v12.4h
618 trn2 v27.4h, v4.4h, v12.4h
619 trn1 v28.4h, v5.4h, v13.4h
620 trn2 v29.4h, v5.4h, v13.4h
627 trn1 v26.4h, v18.4h, v20.4h
628 trn2 v27.4h, v18.4h, v20.4h
629 trn1 v28.4h, v19.4h, v21.4h
630 trn2 v29.4h, v19.4h, v21.4h
637 trn1 v26.4h, v22.4h, v30.4h
638 trn2 v27.4h, v22.4h, v30.4h
639 trn1 v28.4h, v23.4h, v31.4h
640 trn2 v29.4h, v23.4h, v31.4h
647 trn1 v26.4h, v14.4h, v8.4h
648 trn2 v27.4h, v14.4h, v8.4h
649 trn1 v28.4h, v15.4h, v9.4h
650 trn2 v29.4h, v15.4h, v9.4h
683 st1 { v4.4h, v5.4h},[x1],#16
684 st1 { v12.4h, v13.4h},[x1],#16
686 st1 { v18.4h, v19.4h},[x1],#16
687 st1 { v20.4h, v21.4h},[x1],#16
688 st1 { v22.4h, v23.4h},[x1],#16
689 st1 { v30.4h, v31.4h},[x1],#16
690 st1 { v14.4h, v15.4h},[x1],#16
691 st1 { v8.4h, v9.4h},[x1],#16
748 ld1 {v10.4h, v11.4h},[x1],#16
749 ld1 {v6.4h, v7.4h},[x1],x10
752 ld1 {v4.4h, v5.4h},[x9],#16
753 ld1 {v8.4h, v9.4h},[x9],x10
758 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
759 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
760 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
761 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
763 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
764 smlal v26.4s, v7.4h, v2.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
765 smlal v28.4s, v7.4h, v3.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
766 smlsl v30.4s, v7.4h, v2.h[3] //// y1 * sin1 - y3 * sin3(part of b3)
769 smull v12.4s, v10.4h, v0.h[0]
770 smlal v12.4s, v11.4h, v0.h[2]
771 smull v14.4s, v10.4h, v0.h[0]
772 smlal v14.4s, v11.4h, v1.h[2]
773 smull v16.4s, v10.4h, v0.h[0]
774 smlal v16.4s, v11.4h, v2.h[2]
775 smull v18.4s, v10.4h, v0.h[0]
776 smlal v18.4s, v11.4h, v3.h[2]
780 smlal v24.4s, v8.4h, v1.h[1]
781 smlal v26.4s, v8.4h, v3.h[3]
782 smlsl v28.4s, v8.4h, v1.h[3]
783 smlsl v30.4s, v8.4h, v0.h[3]
786 smlal v24.4s, v9.4h, v1.h[3]
787 smlsl v26.4s, v9.4h, v2.h[3]
788 smlsl v28.4s, v9.4h, v0.h[3]
789 smlal v30.4s, v9.4h, v3.h[3]
792 smlal v12.4s, v4.4h, v1.h[0]
793 smlal v12.4s, v5.4h, v1.h[2]
794 smlal v14.4s, v4.4h, v3.h[0]
795 smlsl v14.4s, v5.4h, v3.h[2]
796 smlsl v16.4s, v4.4h, v3.h[0]
797 smlsl v16.4s, v5.4h, v0.h[2]
798 smlsl v18.4s, v4.4h, v1.h[0]
799 smlsl v18.4s, v5.4h, v2.h[2]
806 ld1 {v10.4h, v11.4h},[x11],#16
807 ld1 {v6.4h, v7.4h},[x11],x10
808 ld1 {v4.4h, v5.4h},[x0],#16
809 ld1 {v8.4h, v9.4h},[x0],x10
815 smlal v24.4s, v6.4h, v2.h[1] //// y1 * cos1(part of b0)
816 smlsl v26.4s, v6.4h, v1.h[1] //// y1 * cos3(part of b1)
817 smlsl v28.4s, v6.4h, v3.h[1] //// y1 * sin3(part of b2)
818 smlal v30.4s, v6.4h, v0.h[1] //// y1 * sin1(part of b3)
820 smlal v24.4s, v7.4h, v2.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
821 smlsl v26.4s, v7.4h, v0.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
822 smlal v28.4s, v7.4h, v2.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
823 smlal v30.4s, v7.4h, v3.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
827 smlal v24.4s, v8.4h, v3.h[1]
828 smlsl v26.4s, v8.4h, v1.h[3]
829 smlal v28.4s, v8.4h, v0.h[1]
830 smlsl v30.4s, v8.4h, v1.h[1]
833 smlal v24.4s, v9.4h, v3.h[3]
834 smlsl v26.4s, v9.4h, v3.h[1]
835 smlal v28.4s, v9.4h, v2.h[3]
836 smlsl v30.4s, v9.4h, v2.h[1]
842 smlal v12.4s, v10.4h, v0.h[0]
843 smlal v12.4s, v11.4h, v2.h[2]
844 smlal v12.4s, v4.4h, v3.h[0]
845 smlal v12.4s, v5.4h, v3.h[2]
850 smlsl v14.4s, v10.4h, v0.h[0]
851 smlsl v14.4s, v11.4h, v0.h[2]
852 smlsl v14.4s, v4.4h, v1.h[0]
853 smlsl v14.4s, v5.4h, v2.h[2]
856 smlsl v16.4s, v10.4h, v0.h[0]
857 smlal v16.4s, v11.4h, v3.h[2]
858 smlal v16.4s, v4.4h, v1.h[0]
859 smlal v16.4s, v5.4h, v1.h[2]
862 smlal v18.4s, v10.4h, v0.h[0]
863 smlal v18.4s, v11.4h, v1.h[2]
864 smlsl v18.4s, v4.4h, v3.h[0]
865 smlsl v18.4s, v5.4h, v0.h[2]
895 sqrshrn v30.4h, v20.4s,#shift_stage2_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
896 sqrshrn v19.4h, v22.4s,#shift_stage2_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
897 sqrshrn v31.4h, v14.4s,#shift_stage2_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
898 sqrshrn v18.4h, v26.4s,#shift_stage2_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
899 sqrshrn v12.4h, v12.4s,#shift_stage2_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
900 sqrshrn v15.4h, v24.4s,#shift_stage2_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
901 sqrshrn v13.4h, v16.4s,#shift_stage2_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
902 sqrshrn v14.4h, v28.4s,#shift_stage2_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
907 ld1 {v10.4h, v11.4h},[x1],#16
908 ld1 {v6.4h, v7.4h},[x1],#16
909 ld1 {v4.4h, v5.4h},[x9],#16
910 ld1 {v8.4h, v9.4h},[x9],#16
913 st1 {v30.4h, v31.4h},[x1],#16
914 st1 {v18.4h, v19.4h},[x1],#16
917 smull v24.4s, v6.4h, v2.h[1] //// y1 * cos1(part of b0)
918 smull v26.4s, v6.4h, v2.h[3] //// y1 * cos3(part of b1)
919 smull v28.4s, v6.4h, v3.h[1] //// y1 * sin3(part of b2)
920 smull v30.4s, v6.4h, v3.h[3] //// y1 * sin1(part of b3)
922 smlsl v24.4s, v7.4h, v1.h[1] //// y1 * cos1 + y3 * cos3(part of b0)
923 smlsl v26.4s, v7.4h, v0.h[1] //// y1 * cos3 - y3 * sin1(part of b1)
924 smlsl v28.4s, v7.4h, v1.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
925 smlsl v30.4s, v7.4h, v3.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
928 smull v22.4s, v10.4h, v0.h[0]
929 smlsl v22.4s, v11.4h, v3.h[2]
930 smull v20.4s, v10.4h, v0.h[0]
931 smlsl v20.4s, v11.4h, v2.h[2]
932 smull v16.4s, v10.4h, v0.h[0]
933 smlsl v16.4s, v11.4h, v1.h[2]
934 smull v18.4s, v10.4h, v0.h[0]
935 smlsl v18.4s, v11.4h, v0.h[2]
943 smlsl v24.4s, v8.4h, v3.h[1]
944 smlal v26.4s, v8.4h, v2.h[1]
945 smlal v28.4s, v8.4h, v0.h[1]
946 smlal v30.4s, v8.4h, v2.h[3]
949 smlal v24.4s, v9.4h, v0.h[1]
950 smlal v26.4s, v9.4h, v3.h[1]
951 smlsl v28.4s, v9.4h, v1.h[1]
952 smlsl v30.4s, v9.4h, v2.h[1]
956 smlsl v22.4s, v4.4h, v1.h[0]
957 smlal v22.4s, v5.4h, v2.h[2]
958 smlsl v20.4s, v4.4h, v3.h[0]
959 smlal v20.4s, v5.4h, v0.h[2]
960 smlal v16.4s, v4.4h, v3.h[0]
961 smlal v16.4s, v5.4h, v3.h[2]
962 smlal v18.4s, v4.4h, v1.h[0]
963 smlsl v18.4s, v5.4h, v1.h[2]
968 ld1 {v10.4h, v11.4h},[x11],#16
969 ld1 {v6.4h, v7.4h},[x11],#16
970 ld1 {v4.4h, v5.4h},[x0],#16
971 ld1 {v8.4h, v9.4h},[x0],#16
973 smlsl v24.4s, v6.4h, v3.h[3] //// y1 * cos1(part of b0)
974 smlsl v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
975 smlal v28.4s, v6.4h, v2.h[3] //// y1 * sin3(part of b2)
976 smlal v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
978 smlsl v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
979 smlal v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
980 smlal v28.4s, v7.4h, v3.h[3] //// y1 * sin3 - y3 * cos1(part of b2)
981 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
984 smlal v24.4s, v8.4h, v2.h[3]
985 smlal v26.4s, v8.4h, v3.h[3]
986 smlsl v28.4s, v8.4h, v2.h[1]
987 smlal v30.4s, v8.4h, v0.h[3]
990 smlal v24.4s, v9.4h, v1.h[3]
991 smlsl v26.4s, v9.4h, v1.h[1]
992 smlal v28.4s, v9.4h, v0.h[3]
993 smlsl v30.4s, v9.4h, v0.h[1]
998 smlal v22.4s, v10.4h, v0.h[0]
999 smlsl v22.4s, v11.4h, v1.h[2]
1000 smlsl v22.4s, v4.4h, v3.h[0]
1001 smlal v22.4s, v5.4h, v0.h[2]
1005 smlsl v20.4s, v10.4h, v0.h[0]
1006 smlsl v20.4s, v11.4h, v3.h[2]
1007 smlal v20.4s, v4.4h, v1.h[0]
1008 smlsl v20.4s, v5.4h, v1.h[2]
1011 smlsl v16.4s, v10.4h, v0.h[0]
1012 smlal v16.4s, v11.4h, v0.h[2]
1013 smlsl v16.4s, v4.4h, v1.h[0]
1014 smlal v16.4s, v5.4h, v2.h[2]
1018 smlal v18.4s, v10.4h, v0.h[0]
1019 smlsl v18.4s, v11.4h, v2.h[2]
1020 smlal v18.4s, v4.4h, v3.h[0]
1021 smlsl v18.4s, v5.4h, v3.h[2]
1042 sqrshrn v18.4h, v4.4s,#shift_stage2_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
1043 sqrshrn v31.4h, v22.4s,#shift_stage2_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
1044 sqrshrn v19.4h, v10.4s,#shift_stage2_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
1045 sqrshrn v30.4h, v26.4s,#shift_stage2_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
1046 sqrshrn v20.4h, v6.4s,#shift_stage2_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
1047 sqrshrn v23.4h, v24.4s,#shift_stage2_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
1048 sqrshrn v21.4h, v16.4s,#shift_stage2_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
1049 sqrshrn v22.4h, v28.4s,#shift_stage2_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
1051 ld1 {v4.4h, v5.4h},[x1],#16
1052 ld1 {v8.4h, v9.4h},[x1],#16
1083 trn1 v26.4h, v4.4h, v12.4h
1084 trn2 v27.4h, v4.4h, v12.4h
1085 trn1 v28.4h, v5.4h, v13.4h
1086 trn2 v29.4h, v5.4h, v13.4h
1093 trn1 v26.4h, v18.4h, v20.4h
1094 trn2 v27.4h, v18.4h, v20.4h
1095 trn1 v28.4h, v19.4h, v21.4h
1096 trn2 v29.4h, v19.4h, v21.4h
1103 trn1 v26.4h, v22.4h, v30.4h
1104 trn2 v27.4h, v22.4h, v30.4h
1105 trn1 v28.4h, v23.4h, v31.4h
1106 trn2 v29.4h, v23.4h, v31.4h
1113 trn1 v26.4h, v14.4h, v8.4h
1114 trn2 v27.4h, v14.4h, v8.4h
1115 trn1 v28.4h, v15.4h, v9.4h
1116 trn2 v29.4h, v15.4h, v9.4h
1192 uaddw v4.8h, v4.8h , v16.8b
1193 uaddw v22.8h, v22.8h , v17.8b
1194 uaddw v12.8h, v12.8h , v28.8b
1195 uaddw v30.8h, v30.8h , v29.8b
1196 uaddw v18.8h, v18.8h , v24.8b
1197 uaddw v14.8h, v14.8h , v25.8b
1198 uaddw v20.8h, v20.8h , v26.8b
1199 uaddw v8.8h, v8.8h , v27.8b
1202 sqxtun v16.8b, v4.8h
1203 sqxtun v17.8b, v22.8h
1204 sqxtun v28.8b, v12.8h
1205 sqxtun v29.8b, v30.8h
1206 sqxtun v24.8b, v18.8h
1207 sqxtun v25.8b, v14.8h
1208 sqxtun v26.8b, v20.8h
1209 sqxtun v27.8b, v8.8h