1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void vp9_post_proc_down_and_across_xmm 15;( 16; unsigned char *src_ptr, 17; unsigned char *dst_ptr, 18; int src_pixels_per_line, 19; int dst_pixels_per_line, 20; int rows, 21; int cols, 22; int flimit 23;) 24global sym(vp9_post_proc_down_and_across_xmm) PRIVATE 25sym(vp9_post_proc_down_and_across_xmm): 26 push rbp 27 mov rbp, rsp 28 SHADOW_ARGS_TO_STACK 7 29 SAVE_XMM 7 30 GET_GOT rbx 31 push rsi 32 push rdi 33 ; end prolog 34 35%if ABI_IS_32BIT=1 && CONFIG_PIC=1 36 ALIGN_STACK 16, rax 37 ; move the global rd onto the stack, since we don't have enough registers 38 ; to do PIC addressing 39 movdqa xmm0, [GLOBAL(rd42)] 40 sub rsp, 16 41 movdqa [rsp], xmm0 42%define RD42 [rsp] 43%else 44%define RD42 [GLOBAL(rd42)] 45%endif 46 47 48 movd xmm2, dword ptr arg(6) ;flimit 49 punpcklwd xmm2, xmm2 50 punpckldq xmm2, xmm2 51 punpcklqdq xmm2, xmm2 52 53 mov rsi, arg(0) ;src_ptr 54 mov rdi, arg(1) ;dst_ptr 55 56 movsxd rcx, DWORD PTR arg(4) ;rows 57 movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? 58 pxor xmm0, xmm0 ; mm0 = 00000000 59 60.nextrow: 61 62 xor rdx, rdx ; clear out rdx for use as loop counter 63.nextcol: 64 movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 65 punpcklbw xmm3, xmm0 ; mm3 = p0..p3 66 movdqa xmm1, xmm3 ; mm1 = p0..p3 67 psllw xmm3, 2 ; 68 69 movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 70 punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 71 paddusw xmm3, xmm5 ; mm3 += mm6 72 73 ; thresholding 74 movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 75 psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 76 psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 77 paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) 78 pcmpgtw xmm7, xmm2 79 80 movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 81 punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 82 paddusw xmm3, xmm5 ; mm3 += mm5 83 84 ; thresholding 85 movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 86 psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 87 psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 88 paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) 89 pcmpgtw xmm6, xmm2 90 por xmm7, xmm6 ; accumulate thresholds 91 92 93 neg rax 94 movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 95 punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 96 paddusw xmm3, xmm5 ; mm3 += mm5 97 98 ; thresholding 99 movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 100 psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 101 psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 102 paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) 103 pcmpgtw xmm6, xmm2 104 por xmm7, xmm6 ; accumulate thresholds 105 106 movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 107 punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 108 paddusw xmm3, xmm4 ; mm3 += mm5 109 110 ; thresholding 111 movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 112 psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 113 psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 114 paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) 115 pcmpgtw xmm6, xmm2 116 por xmm7, xmm6 ; accumulate thresholds 117 118 119 paddusw xmm3, RD42 ; mm3 += round value 120 psraw xmm3, 3 ; mm3 /= 8 121 122 pand xmm1, xmm7 ; mm1 select vals > thresh from source 123 pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 124 paddusw xmm1, xmm7 ; combination 125 126 packuswb xmm1, xmm0 ; pack to bytes 127 movq QWORD PTR [rdi], xmm1 ; 128 129 neg rax ; pitch is positive 130 add rsi, 8 131 add rdi, 8 132 133 add rdx, 8 134 cmp edx, dword arg(5) ;cols 135 136 jl .nextcol 137 138 ; done with the all cols, start the across filtering in place 139 sub rsi, rdx 140 sub rdi, rdx 141 142 xor rdx, rdx 143 movq mm0, QWORD PTR [rdi-8]; 144 145.acrossnextcol: 146 movq xmm7, QWORD PTR [rdi +rdx -2] 147 movd xmm4, DWORD PTR [rdi +rdx +6] 148 149 pslldq xmm4, 8 150 por xmm4, xmm7 151 152 movdqa xmm3, xmm4 153 psrldq xmm3, 2 154 punpcklbw xmm3, xmm0 ; mm3 = p0..p3 155 movdqa xmm1, xmm3 ; mm1 = p0..p3 156 psllw xmm3, 2 157 158 159 movdqa xmm5, xmm4 160 psrldq xmm5, 3 161 punpcklbw xmm5, xmm0 ; mm5 = p1..p4 162 paddusw xmm3, xmm5 ; mm3 += mm6 163 164 ; thresholding 165 movdqa xmm7, xmm1 ; mm7 = p0..p3 166 psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 167 psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 168 paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) 169 pcmpgtw xmm7, xmm2 170 171 movdqa xmm5, xmm4 172 psrldq xmm5, 4 173 punpcklbw xmm5, xmm0 ; mm5 = p2..p5 174 paddusw xmm3, xmm5 ; mm3 += mm5 175 176 ; thresholding 177 movdqa xmm6, xmm1 ; mm6 = p0..p3 178 psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 179 psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 180 paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 181 pcmpgtw xmm6, xmm2 182 por xmm7, xmm6 ; accumulate thresholds 183 184 185 movdqa xmm5, xmm4 ; mm5 = p-2..p5 186 punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 187 paddusw xmm3, xmm5 ; mm3 += mm5 188 189 ; thresholding 190 movdqa xmm6, xmm1 ; mm6 = p0..p3 191 psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 192 psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 193 paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 194 pcmpgtw xmm6, xmm2 195 por xmm7, xmm6 ; accumulate thresholds 196 197 psrldq xmm4, 1 ; mm4 = p-1..p5 198 punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 199 paddusw xmm3, xmm4 ; mm3 += mm5 200 201 ; thresholding 202 movdqa xmm6, xmm1 ; mm6 = p0..p3 203 psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 204 psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 205 paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) 206 pcmpgtw xmm6, xmm2 207 por xmm7, xmm6 ; accumulate thresholds 208 209 paddusw xmm3, RD42 ; mm3 += round value 210 psraw xmm3, 3 ; mm3 /= 8 211 212 pand xmm1, xmm7 ; mm1 select vals > thresh from source 213 pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 214 paddusw xmm1, xmm7 ; combination 215 216 packuswb xmm1, xmm0 ; pack to bytes 217 movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes 218 movdq2q mm0, xmm1 219 220 add rdx, 8 221 cmp edx, dword arg(5) ;cols 222 jl .acrossnextcol; 223 224 ; last 8 pixels 225 movq QWORD PTR [rdi+rdx-8], mm0 226 227 ; done with this rwo 228 add rsi,rax ; next line 229 mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? 230 add rdi,rax ; next destination 231 mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? 232 233 dec rcx ; decrement count 234 jnz .nextrow ; next row 235 236%if ABI_IS_32BIT=1 && CONFIG_PIC=1 237 add rsp,16 238 pop rsp 239%endif 240 ; begin epilog 241 pop rdi 242 pop rsi 243 RESTORE_GOT 244 RESTORE_XMM 245 UNSHADOW_ARGS 246 pop rbp 247 ret 248%undef RD42 249 250 251;void vp9_mbpost_proc_down_xmm(unsigned char *dst, 252; int pitch, int rows, int cols,int flimit) 253extern sym(vp9_rv) 254global sym(vp9_mbpost_proc_down_xmm) PRIVATE 255sym(vp9_mbpost_proc_down_xmm): 256 push rbp 257 mov rbp, rsp 258 SHADOW_ARGS_TO_STACK 5 259 SAVE_XMM 7 260 GET_GOT rbx 261 push rsi 262 push rdi 263 ; end prolog 264 265 ALIGN_STACK 16, rax 266 sub rsp, 128+16 267 268 ; unsigned char d[16][8] at [rsp] 269 ; create flimit2 at [rsp+128] 270 mov eax, dword ptr arg(4) ;flimit 271 mov [rsp+128], eax 272 mov [rsp+128+4], eax 273 mov [rsp+128+8], eax 274 mov [rsp+128+12], eax 275%define flimit4 [rsp+128] 276 277%if ABI_IS_32BIT=0 278 lea r8, [GLOBAL(sym(vp9_rv))] 279%endif 280 281 ;rows +=8; 282 add dword arg(2), 8 283 284 ;for(c=0; c<cols; c+=8) 285.loop_col: 286 mov rsi, arg(0) ; s 287 pxor xmm0, xmm0 ; 288 289 movsxd rax, dword ptr arg(1) ;pitch ; 290 neg rax ; rax = -pitch 291 292 lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] 293 neg rax 294 295 296 pxor xmm5, xmm5 297 pxor xmm6, xmm6 ; 298 299 pxor xmm7, xmm7 ; 300 mov rdi, rsi 301 302 mov rcx, 15 ; 303 304.loop_initvar: 305 movq xmm1, QWORD PTR [rdi]; 306 punpcklbw xmm1, xmm0 ; 307 308 paddw xmm5, xmm1 ; 309 pmullw xmm1, xmm1 ; 310 311 movdqa xmm2, xmm1 ; 312 punpcklwd xmm1, xmm0 ; 313 314 punpckhwd xmm2, xmm0 ; 315 paddd xmm6, xmm1 ; 316 317 paddd xmm7, xmm2 ; 318 lea rdi, [rdi+rax] ; 319 320 dec rcx 321 jne .loop_initvar 322 ;save the var and sum 323 xor rdx, rdx 324.loop_row: 325 movq xmm1, QWORD PTR [rsi] ; [s-pitch*8] 326 movq xmm2, QWORD PTR [rdi] ; [s+pitch*7] 327 328 punpcklbw xmm1, xmm0 329 punpcklbw xmm2, xmm0 330 331 paddw xmm5, xmm2 332 psubw xmm5, xmm1 333 334 pmullw xmm2, xmm2 335 movdqa xmm4, xmm2 336 337 punpcklwd xmm2, xmm0 338 punpckhwd xmm4, xmm0 339 340 paddd xmm6, xmm2 341 paddd xmm7, xmm4 342 343 pmullw xmm1, xmm1 344 movdqa xmm2, xmm1 345 346 punpcklwd xmm1, xmm0 347 psubd xmm6, xmm1 348 349 punpckhwd xmm2, xmm0 350 psubd xmm7, xmm2 351 352 353 movdqa xmm3, xmm6 354 pslld xmm3, 4 355 356 psubd xmm3, xmm6 357 movdqa xmm1, xmm5 358 359 movdqa xmm4, xmm5 360 pmullw xmm1, xmm1 361 362 pmulhw xmm4, xmm4 363 movdqa xmm2, xmm1 364 365 punpcklwd xmm1, xmm4 366 punpckhwd xmm2, xmm4 367 368 movdqa xmm4, xmm7 369 pslld xmm4, 4 370 371 psubd xmm4, xmm7 372 373 psubd xmm3, xmm1 374 psubd xmm4, xmm2 375 376 psubd xmm3, flimit4 377 psubd xmm4, flimit4 378 379 psrad xmm3, 31 380 psrad xmm4, 31 381 382 packssdw xmm3, xmm4 383 packsswb xmm3, xmm0 384 385 movq xmm1, QWORD PTR [rsi+rax*8] 386 387 movq xmm2, xmm1 388 punpcklbw xmm1, xmm0 389 390 paddw xmm1, xmm5 391 mov rcx, rdx 392 393 and rcx, 127 394%if ABI_IS_32BIT=1 && CONFIG_PIC=1 395 push rax 396 lea rax, [GLOBAL(sym(vp9_rv))] 397 movdqu xmm4, [rax + rcx*2] ;vp9_rv[rcx*2] 398 pop rax 399%elif ABI_IS_32BIT=0 400 movdqu xmm4, [r8 + rcx*2] ;vp9_rv[rcx*2] 401%else 402 movdqu xmm4, [sym(vp9_rv) + rcx*2] 403%endif 404 405 paddw xmm1, xmm4 406 ;paddw xmm1, eight8s 407 psraw xmm1, 4 408 409 packuswb xmm1, xmm0 410 pand xmm1, xmm3 411 412 pandn xmm3, xmm2 413 por xmm1, xmm3 414 415 and rcx, 15 416 movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8] 417 418 mov rcx, rdx 419 sub rcx, 8 420 421 and rcx, 15 422 movq mm0, [rsp + rcx*8] ;d[rcx*8] 423 424 movq [rsi], mm0 425 lea rsi, [rsi+rax] 426 427 lea rdi, [rdi+rax] 428 add rdx, 1 429 430 cmp edx, dword arg(2) ;rows 431 jl .loop_row 432 433 add dword arg(0), 8 ; s += 8 434 sub dword arg(3), 8 ; cols -= 8 435 cmp dword arg(3), 0 436 jg .loop_col 437 438 add rsp, 128+16 439 pop rsp 440 441 ; begin epilog 442 pop rdi 443 pop rsi 444 RESTORE_GOT 445 RESTORE_XMM 446 UNSHADOW_ARGS 447 pop rbp 448 ret 449%undef flimit4 450 451 452;void vp9_mbpost_proc_across_ip_xmm(unsigned char *src, 453; int pitch, int rows, int cols,int flimit) 454global sym(vp9_mbpost_proc_across_ip_xmm) PRIVATE 455sym(vp9_mbpost_proc_across_ip_xmm): 456 push rbp 457 mov rbp, rsp 458 SHADOW_ARGS_TO_STACK 5 459 SAVE_XMM 7 460 GET_GOT rbx 461 push rsi 462 push rdi 463 ; end prolog 464 465 ALIGN_STACK 16, rax 466 sub rsp, 16 467 468 ; create flimit4 at [rsp] 469 mov eax, dword ptr arg(4) ;flimit 470 mov [rsp], eax 471 mov [rsp+4], eax 472 mov [rsp+8], eax 473 mov [rsp+12], eax 474%define flimit4 [rsp] 475 476 477 ;for(r=0;r<rows;r++) 478.ip_row_loop: 479 480 xor rdx, rdx ;sumsq=0; 481 xor rcx, rcx ;sum=0; 482 mov rsi, arg(0); s 483 mov rdi, -8 484.ip_var_loop: 485 ;for(i=-8;i<=6;i++) 486 ;{ 487 ; sumsq += s[i]*s[i]; 488 ; sum += s[i]; 489 ;} 490 movzx eax, byte [rsi+rdi] 491 add ecx, eax 492 mul al 493 add edx, eax 494 add rdi, 1 495 cmp rdi, 6 496 jle .ip_var_loop 497 498 499 ;mov rax, sumsq 500 ;movd xmm7, rax 501 movd xmm7, edx 502 503 ;mov rax, sum 504 ;movd xmm6, rax 505 movd xmm6, ecx 506 507 mov rsi, arg(0) ;s 508 xor rcx, rcx 509 510 movsxd rdx, dword arg(3) ;cols 511 add rdx, 8 512 pxor mm0, mm0 513 pxor mm1, mm1 514 515 pxor xmm0, xmm0 516.nextcol4: 517 518 movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5 519 movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10 520 521 punpcklbw xmm1, xmm0 ; expanding 522 punpcklbw xmm2, xmm0 ; expanding 523 524 punpcklwd xmm1, xmm0 ; expanding to dwords 525 punpcklwd xmm2, xmm0 ; expanding to dwords 526 527 psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5 528 paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2 529 530 paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5 531 pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5 532 533 paddd xmm6, xmm2 534 paddd xmm7, xmm1 535 536 pshufd xmm6, xmm6, 0 ; duplicate the last ones 537 pshufd xmm7, xmm7, 0 ; duplicate the last ones 538 539 psrldq xmm1, 4 ; 8--7 9--6 10--5 0000 540 psrldq xmm2, 4 ; 8--7 9--6 10--5 0000 541 542 pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared 543 pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared 544 545 paddd xmm6, xmm4 546 paddd xmm7, xmm3 547 548 pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared 549 pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared 550 551 paddd xmm7, xmm3 552 paddd xmm6, xmm4 553 554 pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared 555 pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared 556 557 paddd xmm7, xmm3 558 paddd xmm6, xmm4 559 560 movdqa xmm3, xmm6 561 pmaddwd xmm3, xmm3 562 563 movdqa xmm5, xmm7 564 pslld xmm5, 4 565 566 psubd xmm5, xmm7 567 psubd xmm5, xmm3 568 569 psubd xmm5, flimit4 570 psrad xmm5, 31 571 572 packssdw xmm5, xmm0 573 packsswb xmm5, xmm0 574 575 movd xmm1, DWORD PTR [rsi+rcx] 576 movq xmm2, xmm1 577 578 punpcklbw xmm1, xmm0 579 punpcklwd xmm1, xmm0 580 581 paddd xmm1, xmm6 582 paddd xmm1, [GLOBAL(four8s)] 583 584 psrad xmm1, 4 585 packssdw xmm1, xmm0 586 587 packuswb xmm1, xmm0 588 pand xmm1, xmm5 589 590 pandn xmm5, xmm2 591 por xmm5, xmm1 592 593 movd [rsi+rcx-8], mm0 594 movq mm0, mm1 595 596 movdq2q mm1, xmm5 597 psrldq xmm7, 12 598 599 psrldq xmm6, 12 600 add rcx, 4 601 602 cmp rcx, rdx 603 jl .nextcol4 604 605 ;s+=pitch; 606 movsxd rax, dword arg(1) 607 add arg(0), rax 608 609 sub dword arg(2), 1 ;rows-=1 610 cmp dword arg(2), 0 611 jg .ip_row_loop 612 613 add rsp, 16 614 pop rsp 615 616 ; begin epilog 617 pop rdi 618 pop rsi 619 RESTORE_GOT 620 RESTORE_XMM 621 UNSHADOW_ARGS 622 pop rbp 623 ret 624%undef flimit4 625 626 627;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise, 628; unsigned char blackclamp[16], 629; unsigned char whiteclamp[16], 630; unsigned char bothclamp[16], 631; unsigned int width, unsigned int height, int pitch) 632global sym(vp9_plane_add_noise_wmt) PRIVATE 633sym(vp9_plane_add_noise_wmt): 634 push rbp 635 mov rbp, rsp 636 SHADOW_ARGS_TO_STACK 8 637 GET_GOT rbx 638 push rsi 639 push rdi 640 ; end prolog 641 642.addnoise_loop: 643 call sym(LIBVPX_RAND) WRT_PLT 644 mov rcx, arg(1) ;noise 645 and rax, 0xff 646 add rcx, rax 647 648 ; we rely on the fact that the clamping vectors are stored contiguously 649 ; in black/white/both order. Note that we have to reload this here because 650 ; rdx could be trashed by rand() 651 mov rdx, arg(2) ; blackclamp 652 653 654 mov rdi, rcx 655 movsxd rcx, dword arg(5) ;[Width] 656 mov rsi, arg(0) ;Pos 657 xor rax,rax 658 659.addnoise_nextset: 660 movdqu xmm1,[rsi+rax] ; get the source 661 662 psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise 663 paddusb xmm1, [rdx+32] ;bothclamp 664 psubusb xmm1, [rdx+16] ;whiteclamp 665 666 movdqu xmm2,[rdi+rax] ; get the noise for this line 667 paddb xmm1,xmm2 ; add it in 668 movdqu [rsi+rax],xmm1 ; store the result 669 670 add rax,16 ; move to the next line 671 672 cmp rax, rcx 673 jl .addnoise_nextset 674 675 movsxd rax, dword arg(7) ; Pitch 676 add arg(0), rax ; Start += Pitch 677 sub dword arg(6), 1 ; Height -= 1 678 jg .addnoise_loop 679 680 ; begin epilog 681 pop rdi 682 pop rsi 683 RESTORE_GOT 684 UNSHADOW_ARGS 685 pop rbp 686 ret 687 688 689SECTION_RODATA 690align 16 691rd42: 692 times 8 dw 0x04 693four8s: 694 times 4 dd 8 695