1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8cx_pack_mb_row_tokens_armv5| 13 IMPORT |vp8_validate_buffer_arm| 14 15 INCLUDE vp8_asm_enc_offsets.asm 16 17 ARM 18 REQUIRE8 19 PRESERVE8 20 21 AREA |.text|, CODE, READONLY 22 23 24 ; macro for validating write buffer position 25 ; needs vp8_writer in r0 26 ; start shall not be in r1 27 MACRO 28 VALIDATE_POS $start, $pos 29 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call 30 ldr r2, [r0, #vp8_writer_buffer_end] 31 ldr r3, [r0, #vp8_writer_error] 32 mov r1, $pos 33 mov r0, $start 34 bl vp8_validate_buffer_arm 35 pop {r0-r3, r12, lr} 36 MEND 37 38; r0 VP8_COMP *cpi 39; r1 vp8_writer *w 40; r2 vp8_coef_encodings 41; r3 vp8_extra_bits 42; s0 vp8_coef_tree 43 44|vp8cx_pack_mb_row_tokens_armv5| PROC 45 push {r4-r12, lr} 46 sub sp, sp, #24 47 48 ; Compute address of cpi->common.mb_rows 49 ldr r4, _VP8_COMP_common_ 50 ldr r6, _VP8_COMMON_MBrows_ 51 add r4, r0, r4 52 53 ldr r5, [r4, r6] ; load up mb_rows 54 55 str r2, [sp, #20] ; save vp8_coef_encodings 56 str r5, [sp, #12] ; save mb_rows 57 str r3, [sp, #8] ; save vp8_extra_bits 58 59 ldr r4, _VP8_COMP_tplist_ 60 add r4, r0, r4 61 ldr r7, [r4, #0] ; dereference cpi->tp_list 62 63 mov r0, r1 ; keep same as other loops 64 65 ldr r2, [r0, #vp8_writer_lowvalue] 66 ldr r5, [r0, #vp8_writer_range] 67 ldr r3, [r0, #vp8_writer_count] 68 69mb_row_loop 70 71 ldr r1, [r7, #tokenlist_start] 72 ldr r9, [r7, #tokenlist_stop] 73 str r9, [sp, #0] ; save stop for later comparison 74 str r7, [sp, #16] ; tokenlist address for next time 75 76 b check_p_lt_stop 77 78 ; actuall work gets done here! 79 80while_p_lt_stop 81 ldrb r6, [r1, #tokenextra_token] ; t 82 ldr r4, [sp, #20] ; vp8_coef_encodings 83 mov lr, #0 84 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t 85 ldr r9, [r1, #tokenextra_context_tree] ; pp 86 87 ldrb r7, [r1, #tokenextra_skip_eob_node] 88 89 ldr r6, [r4, #vp8_token_value] ; v 90 ldr r8, [r4, #vp8_token_len] ; n 91 92 ; vp8 specific skip_eob_node 93 cmp r7, #0 94 movne lr, #2 ; i = 2 95 subne r8, r8, #1 ; --n 96 97 rsb r4, r8, #32 ; 32-n 98 ldr r10, [sp, #64] ; vp8_coef_tree 99 100 ; v is kept in r12 during the token pack loop 101 lsl r12, r6, r4 ; r12 = v << 32 - n 102 103; loop start 104token_loop 105 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] 106 sub r7, r5, #1 ; range-1 107 108 ; Decisions are made based on the bit value shifted 109 ; off of v, so set a flag here based on this. 110 ; This value is refered to as "bb" 111 lsls r12, r12, #1 ; bb = v >> n 112 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 113 114 ; bb can only be 0 or 1. So only execute this statement 115 ; if bb == 1, otherwise it will act like i + 0 116 addcs lr, lr, #1 ; i + bb 117 118 mov r7, #1 119 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] 120 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) 121 122 addcs r2, r2, r4 ; if (bb) lowvalue += split 123 subcs r4, r5, r4 ; if (bb) range = range-split 124 125 ; Counting the leading zeros is used to normalize range. 126 clz r6, r4 127 sub r6, r6, #24 ; shift 128 129 ; Flag is set on the sum of count. This flag is used later 130 ; to determine if count >= 0 131 adds r3, r3, r6 ; count += shift 132 lsl r5, r4, r6 ; range <<= shift 133 bmi token_count_lt_zero ; if(count >= 0) 134 135 sub r6, r6, r3 ; offset = shift - count 136 sub r4, r6, #1 ; offset-1 137 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 138 bpl token_high_bit_not_set 139 140 ldr r4, [r0, #vp8_writer_pos] ; x 141 sub r4, r4, #1 ; x = w->pos-1 142 b token_zero_while_start 143token_zero_while_loop 144 mov r10, #0 145 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 146 sub r4, r4, #1 ; x-- 147token_zero_while_start 148 cmp r4, #0 149 ldrge r7, [r0, #vp8_writer_buffer] 150 ldrb r11, [r7, r4] 151 cmpge r11, #0xff 152 beq token_zero_while_loop 153 154 ldr r7, [r0, #vp8_writer_buffer] 155 ldrb r10, [r7, r4] ; w->buffer[x] 156 add r10, r10, #1 157 strb r10, [r7, r4] ; w->buffer[x] + 1 158token_high_bit_not_set 159 rsb r4, r6, #24 ; 24-offset 160 ldr r10, [r0, #vp8_writer_buffer] 161 lsr r7, r2, r4 ; lowvalue >> (24-offset) 162 ldr r4, [r0, #vp8_writer_pos] ; w->pos 163 lsl r2, r2, r6 ; lowvalue <<= offset 164 mov r6, r3 ; shift = count 165 add r11, r4, #1 ; w->pos++ 166 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 167 str r11, [r0, #vp8_writer_pos] 168 sub r3, r3, #8 ; count -= 8 169 170 VALIDATE_POS r10, r11 ; validate_buffer at pos 171 172 strb r7, [r10, r4] ; w->buffer[w->pos++] 173 174 ; r10 is used earlier in the loop, but r10 is used as 175 ; temp variable here. So after r10 is used, reload 176 ; vp8_coef_tree_dcd into r10 177 ldr r10, [sp, #64] ; vp8_coef_tree 178 179token_count_lt_zero 180 lsl r2, r2, r6 ; lowvalue <<= shift 181 182 subs r8, r8, #1 ; --n 183 bne token_loop 184 185 ldrb r6, [r1, #tokenextra_token] ; t 186 ldr r7, [sp, #8] ; vp8_extra_bits 187 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired 188 ; element. Here vp8_extra_bit_struct == 16 189 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t 190 191 ldr r4, [r12, #vp8_extra_bit_struct_base_val] 192 cmp r4, #0 193 beq skip_extra_bits 194 195; if( b->base_val) 196 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L 197 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra 198 cmp r8, #0 ; if( L) 199 beq no_extra_bits 200 201 ldr r9, [r12, #vp8_extra_bit_struct_prob] 202 asr r7, lr, #1 ; v=e>>1 203 204 ldr r10, [r12, #vp8_extra_bit_struct_tree] 205 str r10, [sp, #4] ; b->tree 206 207 rsb r4, r8, #32 208 lsl r12, r7, r4 209 210 mov lr, #0 ; i = 0 211 212extra_bits_loop 213 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] 214 sub r7, r5, #1 ; range-1 215 lsls r12, r12, #1 ; v >> n 216 mul r6, r4, r7 ; (range-1) * pp[i>>1] 217 addcs lr, lr, #1 ; i + bb 218 219 mov r7, #1 220 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] 221 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) 222 223 addcs r2, r2, r4 ; if (bb) lowvalue += split 224 subcs r4, r5, r4 ; if (bb) range = range-split 225 226 clz r6, r4 227 sub r6, r6, #24 228 229 adds r3, r3, r6 ; count += shift 230 lsl r5, r4, r6 ; range <<= shift 231 bmi extra_count_lt_zero ; if(count >= 0) 232 233 sub r6, r6, r3 ; offset= shift - count 234 sub r4, r6, #1 ; offset-1 235 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 236 bpl extra_high_bit_not_set 237 238 ldr r4, [r0, #vp8_writer_pos] ; x 239 sub r4, r4, #1 ; x = w->pos - 1 240 b extra_zero_while_start 241extra_zero_while_loop 242 mov r10, #0 243 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 244 sub r4, r4, #1 ; x-- 245extra_zero_while_start 246 cmp r4, #0 247 ldrge r7, [r0, #vp8_writer_buffer] 248 ldrb r11, [r7, r4] 249 cmpge r11, #0xff 250 beq extra_zero_while_loop 251 252 ldr r7, [r0, #vp8_writer_buffer] 253 ldrb r10, [r7, r4] 254 add r10, r10, #1 255 strb r10, [r7, r4] 256extra_high_bit_not_set 257 rsb r4, r6, #24 ; 24-offset 258 ldr r10, [r0, #vp8_writer_buffer] 259 lsr r7, r2, r4 ; lowvalue >> (24-offset) 260 ldr r4, [r0, #vp8_writer_pos] 261 lsl r2, r2, r6 ; lowvalue <<= offset 262 mov r6, r3 ; shift = count 263 add r11, r4, #1 ; w->pos++ 264 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 265 str r11, [r0, #vp8_writer_pos] 266 sub r3, r3, #8 ; count -= 8 267 268 VALIDATE_POS r10, r11 ; validate_buffer at pos 269 270 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) 271 ldr r10, [sp, #4] ; b->tree 272extra_count_lt_zero 273 lsl r2, r2, r6 274 275 subs r8, r8, #1 ; --n 276 bne extra_bits_loop ; while (n) 277 278no_extra_bits 279 ldr lr, [r1, #4] ; e = p->Extra 280 add r4, r5, #1 ; range + 1 281 tst lr, #1 282 lsr r4, r4, #1 ; split = (range + 1) >> 1 283 addne r2, r2, r4 ; lowvalue += split 284 subne r4, r5, r4 ; range = range-split 285 tst r2, #0x80000000 ; lowvalue & 0x80000000 286 lsl r5, r4, #1 ; range <<= 1 287 beq end_high_bit_not_set 288 289 ldr r4, [r0, #vp8_writer_pos] 290 mov r7, #0 291 sub r4, r4, #1 292 b end_zero_while_start 293end_zero_while_loop 294 strb r7, [r6, r4] 295 sub r4, r4, #1 ; x-- 296end_zero_while_start 297 cmp r4, #0 298 ldrge r6, [r0, #vp8_writer_buffer] 299 ldrb r12, [r6, r4] 300 cmpge r12, #0xff 301 beq end_zero_while_loop 302 303 ldr r6, [r0, #vp8_writer_buffer] 304 ldrb r7, [r6, r4] 305 add r7, r7, #1 306 strb r7, [r6, r4] 307end_high_bit_not_set 308 adds r3, r3, #1 ; ++count 309 lsl r2, r2, #1 ; lowvalue <<= 1 310 bne end_count_zero 311 312 ldr r4, [r0, #vp8_writer_pos] 313 mvn r3, #7 314 ldr r7, [r0, #vp8_writer_buffer] 315 lsr r6, r2, #24 ; lowvalue >> 24 316 add r12, r4, #1 ; w->pos++ 317 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 318 str r12, [r0, #vp8_writer_pos] 319 320 VALIDATE_POS r7, r12 ; validate_buffer at pos 321 322 strb r6, [r7, r4] 323end_count_zero 324skip_extra_bits 325 add r1, r1, #TOKENEXTRA_SZ ; ++p 326check_p_lt_stop 327 ldr r4, [sp, #0] ; stop 328 cmp r1, r4 ; while( p < stop) 329 bcc while_p_lt_stop 330 331 ldr r6, [sp, #12] ; mb_rows 332 ldr r7, [sp, #16] ; tokenlist address 333 subs r6, r6, #1 334 add r7, r7, #TOKENLIST_SZ ; next element in the array 335 str r6, [sp, #12] 336 bne mb_row_loop 337 338 str r2, [r0, #vp8_writer_lowvalue] 339 str r5, [r0, #vp8_writer_range] 340 str r3, [r0, #vp8_writer_count] 341 add sp, sp, #24 342 pop {r4-r12, pc} 343 ENDP 344 345_VP8_COMP_common_ 346 DCD vp8_comp_common 347_VP8_COMMON_MBrows_ 348 DCD vp8_common_mb_rows 349_VP8_COMP_tplist_ 350 DCD vp8_comp_tplist 351 352 END 353