/external/libavc/common/armv8/ |
D | ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s | 79 movi v24.8h, #0x5 // Filter coeff 5 into Q12 114 uaddl v24.8h, v5.8b, v7.8b 117 mla v20.8h, v24.8h , v28.8h 118 uaddl v24.8h, v14.8b, v15.8b 123 mla v22.8h, v24.8h , v28.8h 126 ext v24.16b, v18.16b , v20.16b , #4 130 add v0.8h, v24.8h , v26.8h 131 ext v24.16b, v18.16b , v20.16b , #2 133 add v24.8h, v24.8h , v26.8h 137 smlsl v26.4s, v24.4h, v30.4h [all …]
|
D | ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s | 140 movi v24.8h, #0x5 // Filter coeff 5 into Q12 176 uaddl v24.8h, v5.8b, v7.8b 179 mla v20.8h, v24.8h , v28.8h 180 uaddl v24.8h, v14.8b, v15.8b 185 mla v22.8h, v24.8h , v28.8h 189 ext v24.16b, v18.16b , v20.16b , #4 193 add v0.8h, v24.8h , v26.8h 194 ext v24.16b, v18.16b , v20.16b , #2 196 add v24.8h, v24.8h , v26.8h 200 smlsl v26.4s, v24.4h, v30.4h [all …]
|
D | ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s | 153 uaddl v24.8h, v0.8b, v10.8b 154 umlal v24.8h, v4.8b, v30.8b 155 umlal v24.8h, v6.8b, v30.8b 156 umlsl v24.8h, v2.8b, v31.8b 157 umlsl v24.8h, v8.8b, v31.8b 163 sqrshrun v26.8b, v24.8h, #5 170 uaddl v24.8h, v1.8b, v11.8b 171 umlal v24.8h, v5.8b, v30.8b 172 umlal v24.8h, v7.8b, v30.8b 173 umlsl v24.8h, v3.8b, v31.8b [all …]
|
D | ih264_inter_pred_chroma_av8.s | 169 umull v24.8h, v5.8b, v28.8b 171 umlal v24.8h, v8.8b, v29.8b 173 umlal v24.8h, v10.8b, v30.8b 175 umlal v24.8h, v13.8b, v31.8b 178 sqrshrun v18.8b, v24.8h, #6 189 umull v24.8h, v11.8b, v28.8b 191 umlal v24.8h, v14.8b, v29.8b 193 umlal v24.8h, v1.8b, v30.8b 195 umlal v24.8h, v4.8b, v31.8b 197 sqrshrun v27.8b, v24.8h, #6 [all …]
|
D | ih264_inter_pred_filters_luma_vert_av8.s | 118 movi v24.8h, #5 // Filter coeff 0x4 into Q12 145 mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 149 mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 156 mls v16.8h, v18.8h , v24.8h 164 mls v14.8h, v26.8h , v24.8h 170 mls v18.8h, v20.8h , v24.8h 178 mls v16.8h, v26.8h , v24.8h 185 mls v14.8h, v20.8h , v24.8h 188 mls v18.8h, v26.8h , v24.8h 205 mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 [all …]
|
D | ih264_resi_trans_quant_av8.s | 118 ld1 {v24.8b}, [x0] //load first 8 pix src row 4 124 usubl v6.8h, v24.8b, v25.8b //find residue row 4 172 add v24.4h, v18.4h , v19.4h //x5 = x0 + x1; 179 st1 {v24.h}[0], [x10] //store the dc value to alternate dc sddress 183 abs v0.4h, v24.4h //abs val of row 1 188 cmgt v4.4h, v24.4h, #0 203 dup v24.4s, w7 205 sshl v20.4s, v20.4s, v24.4s //shift row 1 206 sshl v21.4s, v21.4s, v24.4s //shift row 2 207 sshl v22.4s, v22.4s, v24.4s //shift row 3 [all …]
|
D | ih264_inter_pred_luma_vert_qpel_av8.s | 125 movi v24.8h, #5 // Filter coeff 0x4 into Q12 152 mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 156 mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 163 mls v16.8h, v18.8h , v24.8h 173 mls v14.8h, v26.8h , v24.8h 179 mls v18.8h, v20.8h , v24.8h 189 mls v16.8h, v26.8h , v24.8h 196 mls v14.8h, v20.8h , v24.8h 200 mls v18.8h, v26.8h , v24.8h 222 mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 [all …]
|
/external/libhevc/common/arm64/ |
D | ihevc_intra_pred_luma_vert.s | 188 dup v24.16b,w12 //src[2nt+1] 213 sqxtun v24.8b, v28.8h 214 sqxtun2 v24.16b, v0.8h 218 rev64 v24.16b, v24.16b 219 mov v25.d[0], v24.d[1] 223 bsl v18.8b, v24.8b , v16.8b //only select row values from q12(predpixel) 239 bsl v1.8b, v24.8b , v16.8b 264 bsl v18.8b, v24.8b , v16.8b //only select row values from q12(predpixel) 277 bsl v1.8b, v24.8b , v16.8b 294 bsl v18.8b, v24.8b , v16.8b //only select row values from q12(predpixel) [all …]
|
D | ihevc_inter_pred_chroma_vert_w16inp.s | 224 smull v24.4s, v3.4h, v16.4h //vmull_s16(src_tmp2, coeff_0) 226 smlal v24.4s, v4.4h, v17.4h 228 smlal v24.4s, v5.4h, v18.4h 230 smlal v24.4s, v6.4h, v19.4h 248 sqshrn v24.4h, v24.4s,#6 //right shift 263 sqrshrun v24.8b, v24.8h,#6 //rounding shift 269 st1 {v24.s}[0],[x9] //stores the loaded value 279 smull v24.4s, v3.4h, v16.4h //vmull_s16(src_tmp2, coeff_0) 282 smlal v24.4s, v4.4h, v17.4h 284 smlal v24.4s, v5.4h, v18.4h [all …]
|
D | ihevc_itrans_recon_32x32.s | 216 smull v24.4s, v8.4h, v0.h[1] //// y1 * cos1(part of b0) 221 smlal v24.4s, v9.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0) 256 smlal v24.4s, v14.4h, v1.h[1] 262 smlal v24.4s, v15.4h, v1.h[3] 286 smlal v24.4s, v8.4h, v2.h[1] //// y1 * cos1(part of b0) 291 smlal v24.4s, v9.4h, v2.h[3] //// y1 * cos1 + y3 * cos3(part of b0) 330 smlal v24.4s, v14.4h, v3.h[1] 336 smlal v24.4s, v15.4h, v3.h[3] 362 smlal v24.4s, v8.4h, v4.h[1] //// y1 * cos1(part of b0) 367 smlal v24.4s, v9.4h, v4.h[3] //// y1 * cos1 + y3 * cos3(part of b0) [all …]
|
D | ihevc_inter_pred_chroma_vert_w16inp_w16out.s | 222 smull v24.4s, v3.4h, v16.4h //vmull_s16(src_tmp2, coeff_0) 224 smlal v24.4s, v4.4h, v17.4h 226 smlal v24.4s, v5.4h, v18.4h 228 smlal v24.4s, v6.4h, v19.4h 245 sqshrn v24.4h, v24.4s,#6 //right shift 264 st1 {v24.2s},[x9] //stores the loaded value 273 smull v24.4s, v3.4h, v16.4h //vmull_s16(src_tmp2, coeff_0) 275 smlal v24.4s, v4.4h, v17.4h 278 smlal v24.4s, v5.4h, v18.4h 281 smlal v24.4s, v6.4h, v19.4h [all …]
|
D | ihevc_sao_edge_offset_class0_chroma.s | 198 cmhi v24.16b, v28.16b , v30.16b //II vcltq_u8(pu1_cur_row, pu1_cur_row_tmp) 223 …SUB v20.16b, v24.16b , v26.16b //II sign_left = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp… 227 cmhi v24.16b, v28.16b , v30.16b //II vcltq_u8(pu1_cur_row, pu1_cur_row_tmp) 237 …SUB v22.16b, v24.16b , v26.16b //II sign_right = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cm… 239 ADD v24.16b, v2.16b , v20.16b //II edge_idx = vaddq_s8(const_2, sign_left) 243 ADD v24.16b, v24.16b , v22.16b //II edge_idx = vaddq_s8(edge_idx, sign_right) 249 TBL v24.16b, {v5.16b},v24.16b //II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 257 AND v24.16b, v24.16b , v3.16b //II edge_idx = vandq_s8(edge_idx, au1_mask) 258 mov v25.d[0],v24.d[1] 260 UZP1 v1.8b, v24.8b, v25.8b [all …]
|
D | ihevc_itrans_recon_8x8.s | 192 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0) 200 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0) 234 smlal v24.4s, v14.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) 245 …smlal v24.4s, v15.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa… 255 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0) 256 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7) 258 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2) 269 sqrshrn v3.4h, v24.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct) 304 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0) 309 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0) [all …]
|
D | ihevc_inter_pred_chroma_vert.s | 268 umull v24.8h, v16.8b, v1.8b 270 umlsl v24.8h, v7.8b, v0.8b 273 umlal v24.8h, v17.8b, v2.8b 275 umlsl v24.8h, v18.8b, v3.8b 300 sqrshrun v24.8b, v24.8h,#6 308 st1 {v24.8b},[x7],x3 //stores the loaded value 337 umull v24.8h, v16.8b, v1.8b 348 umlsl v24.8h, v7.8b, v0.8b 351 umlal v24.8h, v17.8b, v2.8b 354 umlsl v24.8h, v18.8b, v3.8b [all …]
|
D | ihevc_weighted_pred_bi_default.s | 210 ld1 {v24.4h},[x11],x3 //load and increment the pi2_src1 iv iteration 212 sqadd v18.4h,v24.4h,v25.4h //vaddq_s32(i4_tmp2_t1, i4_tmp2_t2) iv iteration 292 ld1 { v24.8h},[x0],#16 //load and increment the pi2_src1 295 sqadd v24.8h,v24.8h,v26.8h 297 sqadd v24.8h,v24.8h,v0.8h //vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t) 303 sqshrun v20.8b, v24.8h,#7 352 ld1 { v24.8h},[x0],#16 //load and increment the pi2_src1 355 sqadd v24.8h,v24.8h,v26.8h 357 sqadd v24.8h,v24.8h,v0.8h //vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t) 362 sqshrun v20.8b, v24.8h,#7 [all …]
|
D | ihevc_sao_edge_offset_class1_chroma.s | 195 cmhi v24.16b, v30.16b , v18.16b //II vcltq_u8(pu1_cur_row, pu1_top_row) 197 …SUB v28.16b, v24.16b , v22.16b //II sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp… 232 TBL v24.8b, {v7.16b},v22.8b //offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 236 ZIP1 v27.8b, v24.8b, v25.8b 237 ZIP2 v25.8b, v24.8b, v25.8b 238 mov v24.8b,v27.8b 244 …SADDW v26.8h, v26.8h , v24.8b //II pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val… 287 TBL v24.8b, {v7.16b},v22.8b 289 ZIP1 v27.8b, v24.8b, v25.8b 290 ZIP2 v25.8b, v24.8b, v25.8b [all …]
|
D | ihevc_intra_pred_luma_horz.s | 209 usubl v24.8h, v30.8b, v28.8b 212 sshr v24.8h, v24.8h,#1 215 sqadd v22.8h, v26.8h , v24.8h 223 usubl v24.8h, v31.8b, v28.8b 226 sshr v24.8h, v24.8h,#1 229 sqadd v22.8h, v26.8h , v24.8h 291 usubl v24.8h, v30.8b, v28.8b 294 sshr v24.8h, v24.8h,#1 297 sqadd v22.8h, v26.8h , v24.8h 335 usubl v24.8h, v30.8b, v28.8b [all …]
|
D | ihevc_intra_pred_luma_mode_3_to_9.s | 190 umull v24.8h, v12.8b, v7.8b //mul (row 0) 191 umlal v24.8h, v13.8b, v6.8b //mul (row 0) 197 rshrn v24.8b, v24.8h,#5 //round shft (row 0) 207 st1 {v24.8b},[x2], x3 //st (row 0) 233 umull v24.8h, v12.8b, v7.8b //mul (row 4) 234 umlal v24.8h, v13.8b, v6.8b //mul (row 4) 241 rshrn v24.8b, v24.8h,#5 //round shft (row 4) 251 st1 {v24.8b},[x2], x3 //st (row 4) 328 st1 {v24.8b},[x5], x3 //st (row 4) 363 umull v24.8h, v12.8b, v7.8b //mul (row 0) [all …]
|
D | ihevc_intra_pred_filters_luma_mode_11_to_17.s | 310 umull v24.8h, v12.8b, v7.8b //mul (row 0) 311 umlal v24.8h, v13.8b, v6.8b //mul (row 0) 317 rshrn v24.8b, v24.8h,#5 //round shft (row 0) 327 st1 {v24.8b},[x2], x3 //st (row 0) 353 umull v24.8h, v12.8b, v7.8b //mul (row 4) 354 umlal v24.8h, v13.8b, v6.8b //mul (row 4) 361 rshrn v24.8b, v24.8h,#5 //round shft (row 4) 371 st1 {v24.8b},[x2], x3 //st (row 4) 448 st1 {v24.8b},[x5], x3 //st (row 4) 449 rshrn v24.8b, v22.8h,#5 //round shft (row 5) [all …]
|
D | ihevc_itrans_recon_16x16.s | 255 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0) 260 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0) 282 smlal v24.4s, v8.4h, v1.h[1] 288 smlal v24.4s, v9.4h, v1.h[3] 331 smlal v24.4s, v6.4h, v2.h[1] //// y1 * cos1(part of b0) 336 smlal v24.4s, v7.4h, v2.h[3] //// y1 * cos1 + y3 * cos3(part of b0) 343 smlal v24.4s, v8.4h, v3.h[1] 349 smlal v24.4s, v9.4h, v3.h[3] 384 add v20.4s, v12.4s , v24.4s 385 sub v22.4s, v12.4s , v24.4s [all …]
|
D | ihevc_intra_pred_chroma_mode_3_to_9.s | 189 umull v24.8h, v25.8b, v7.8b //mul (row 0) 190 umlal v24.8h, v13.8b, v6.8b //mul (row 0) 196 rshrn v24.8b, v24.8h,#5 //round shft (row 0) 206 st1 {v24.8b},[x2], x3 //st (row 0) 232 umull v24.8h, v25.8b, v7.8b //mul (row 4) 233 umlal v24.8h, v13.8b, v6.8b //mul (row 4) 242 rshrn v24.8b, v24.8h,#5 //round shft (row 4) 252 st1 {v24.8b},[x2], x3 //st (row 4) 340 st1 {v24.8b},[x5], x3 //st (row 4) 374 umull v24.8h, v25.8b, v7.8b //mul (row 0) [all …]
|
/external/libavc/encoder/armv8/ |
D | ih264e_half_pel_av8.s | 176 …sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2… 181 st1 {v23.8b, v24.8b}, [x1], #16 ////Store dest row1 320 uaddl v24.8h, v4.8b, v19.8b //// a0 + a5 (column3,row0) 322 umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) 323 umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) 324 umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) 325 umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) 326 mov v25.d[0], v24.d[1] 341 …sqrshrun v4.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3… 350 ext v31.8b, v23.8b , v24.8b , #2 [all …]
|
D | ih264e_evaluate_intra16x16_modes_av8.s | 181 uabdl v24.8h, v1.8b, v31.8b 202 uabal v24.8h, v3.8b, v31.8b 218 uabal v24.8h, v5.8b, v31.8b 234 uabal v24.8h, v7.8b, v31.8b 251 uabal v24.8h, v1.8b, v31.8b 267 uabal v24.8h, v3.8b, v31.8b 284 uabal v24.8h, v5.8b, v31.8b 301 uabal v24.8h, v7.8b, v31.8b 321 uabal v24.8h, v1.8b, v31.8b 338 uabal v24.8h, v3.8b, v31.8b [all …]
|
D | ih264e_evaluate_intra_chroma_modes_av8.s | 213 uabdl v24.8h, v1.8b, v29.8b 231 uabal v24.8h, v3.8b, v29.8b 247 uabal v24.8h, v5.8b, v29.8b 262 uabal v24.8h, v7.8b, v29.8b 283 uabal v24.8h, v1.8b, v31.8b 300 uabal v24.8h, v3.8b, v31.8b 317 uabal v24.8h, v5.8b, v31.8b 332 uabal v24.8h, v7.8b, v31.8b 359 add v24.8h, v22.8h , v24.8h ///DC 360 mov v25.d[0], v24.d[1] [all …]
|
/external/libmpeg2/common/armv8/ |
D | impeg2_idct.s | 392 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) 400 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) 434 smlal v24.4s, v14.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) 445 …smlal v24.4s, v15.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(p… 455 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0) 456 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7) 458 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2) 469 sqrshrn v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) 505 smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) 510 smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) [all …]
|