1 #include "viddec_pm_utils_bstream.h"
2 #include "viddec_fw_debug.h"
3 
4 /* Internal data structure for calculating required bits. */
5 typedef union
6 {
7     uint8_t byte[8];
8     uint32_t word[2];
9 }viddec_pm_utils_getbits_t;
10 
11 void viddec_pm_utils_bstream_reload(viddec_pm_utils_bstream_cxt_t *cxt);
12 uint32_t viddec_pm_utils_bstream_getphys(viddec_pm_utils_bstream_cxt_t *cxt, uint32_t pos, uint32_t lst_index);
13 extern uint32_t cp_using_dma(uint32_t ddr_addr, uint32_t local_addr, uint32_t size, char to_ddr, char swap);
14 
15 /* Bytes left in cubby buffer which were not consumed yet */
viddec_pm_utils_bstream_bytesincubby(viddec_pm_utils_bstream_buf_cxt_t * cxt)16 static inline uint32_t viddec_pm_utils_bstream_bytesincubby(viddec_pm_utils_bstream_buf_cxt_t *cxt)
17 {
18     return (cxt->buf_end - cxt->buf_index);
19 }
20 
21 /*
22   This function checks to see if we are at the last valid byte for current access unit.
23 */
viddec_pm_utils_bstream_nomorerbspdata(viddec_pm_utils_bstream_cxt_t * cxt)24 uint8_t viddec_pm_utils_bstream_nomorerbspdata(viddec_pm_utils_bstream_cxt_t *cxt)
25 {
26     uint32_t data_remaining = 0;
27     uint8_t ret = false;
28 
29     /* How much data is remaining including current byte to be processed.*/
30     data_remaining = cxt->list->total_bytes - (cxt->au_pos + (cxt->bstrm_buf.buf_index - cxt->bstrm_buf.buf_st));
31 
32     /* Start code prefix can be 000001 or 0000001. We always only check for 000001.
33        data_reamining should be 1 for 000001, as we don't count sc prefix and 1 represents current byte.
34        data_reamining should be 2 for 00000001, as we don't count sc prefix its current byte and extra 00 as we check for 000001.
35        NOTE: This is used for H264 only.
36     */
37     switch(data_remaining)
38     {
39         case 2:
40             /* If next byte is 0 and its the last byte in access unit */
41             ret = (cxt->bstrm_buf.buf[cxt->bstrm_buf.buf_index+1] == 0x0);
42             break;
43         case 1:
44             /* if the current byte is last byte */
45             ret = true;
46             break;
47         default:
48             break;
49     }
50     return ret;
51 }
52 
53 /*
54   This function returns true if cubby buffer has the last byte of access unit.
55 */
viddec_pm_utils_bstream_nomoredata(viddec_pm_utils_bstream_cxt_t * cxt)56 uint8_t viddec_pm_utils_bstream_nomoredata(viddec_pm_utils_bstream_cxt_t *cxt)
57 {
58     uint32_t last_byte_offset_plus_one=0;
59     uint8_t ret = false;
60     /* Check to see if the last byte Acces unit offset is the last byte for current access unit.
61      End represents the first invalid byte, so (end - st) will give number of bytes.*/
62     last_byte_offset_plus_one = cxt->au_pos + (cxt->bstrm_buf.buf_end - cxt->bstrm_buf.buf_st);
63     if((int32_t)last_byte_offset_plus_one >= cxt->list->total_bytes)
64     {
65         ret = true;
66     }
67     return ret;
68 }
69 
70 /* This function initializes scratch buffer, which is used for staging already read data, due to DMA limitations */
viddec_pm_utils_bstream_scratch_init(viddec_pm_utils_bstream_scratch_cxt_t * cxt)71 static inline void viddec_pm_utils_bstream_scratch_init(viddec_pm_utils_bstream_scratch_cxt_t *cxt)
72 {
73     cxt->st = cxt->size = cxt->bitoff=0;
74 }
75 
76 /* This function tells us how much more data is in the current es buffer from current position. Its used to figure out if
77    we need to go to next es buffer */
viddec_pm_utils_bstream_datafromindex(viddec_pm_utils_list_t * list,uint32_t index,uint32_t offset)78 static inline uint32_t viddec_pm_utils_bstream_datafromindex(viddec_pm_utils_list_t *list, uint32_t index, uint32_t offset)
79 {
80     uint32_t ret=0;
81     int32_t val=0;
82     val = (list->data[index].edpos <= (uint32_t)list->total_bytes) ? list->data[index].edpos: (uint32_t)list->total_bytes;
83     val = val - (int32_t)offset;
84     if(val > 0) ret = (uint32_t)val;
85     return val;
86 }
87 
88 /* This function seeks to byte offset position starting from lst_index, if more data is present in current ES buffer pointed by
89  lst_index returns the remaining data in current buffer along with physical address of byte offset. The lst_index parameter
90  at returns index of ES buffer in list which has byte_offset */
viddec_pm_utils_bstream_maxbytes_from_index(viddec_pm_utils_bstream_cxt_t * cxt,uint32_t * lst_index,uint32_t byte_offset,uint32_t * physaddr)91 static inline uint32_t viddec_pm_utils_bstream_maxbytes_from_index(viddec_pm_utils_bstream_cxt_t *cxt,
92                                                                    uint32_t *lst_index,
93                                                                    uint32_t byte_offset,
94                                                                    uint32_t *physaddr)
95 {
96     viddec_pm_utils_list_t *list;
97     uint32_t last_byte_offst=0, bytes_left=0;/* default return value is 0 bytes */
98 
99     list = cxt->list;
100     while(*lst_index < list->num_items)
101     {
102         /* Check to see if we reached the buffer with last valid byte of current access unit, List can have data beyond current access unit */
103         last_byte_offst = (list->data[*lst_index].edpos <= (uint32_t)list->total_bytes) ? list->data[*lst_index].edpos: (uint32_t)list->total_bytes;
104         if(byte_offset < last_byte_offst)
105         {/* Found a match so return with data remaining */
106 #if 1
107             int32_t val=0;
108             val = last_byte_offst - (int32_t)byte_offset;
109             if(val > 0) bytes_left = (uint32_t)val;
110 #else
111             bytes_left = viddec_pm_utils_bstream_datafromindex(list, *lst_index, byte_offset);
112 #endif
113             *physaddr = viddec_pm_utils_bstream_getphys(cxt, byte_offset, *lst_index);
114             break;
115         }
116         *lst_index+=1;
117     }
118     return bytes_left;
119 }
120 
121 /* This function is for copying trailing bytes of cubby bitstream buffer to scratch buffer */
viddec_pm_utils_bstream_scratch_copyto(viddec_pm_utils_bstream_scratch_cxt_t * cxt,uint8_t * data,uint32_t num_bytes)122 static inline void viddec_pm_utils_bstream_scratch_copyto(viddec_pm_utils_bstream_scratch_cxt_t *cxt, uint8_t *data, uint32_t num_bytes)
123 {
124     uint32_t i=0;
125     for(i=0; i<num_bytes;i++)
126     {
127         cxt->buf_scratch[i] = *data;
128         data++;cxt->size++;
129     }
130 }
131 
132 /* This function is for copying trailing bytes from scratch buffer to  bitstream buffer*/
viddec_pm_utils_bstream_scratch_copyfrom(viddec_pm_utils_bstream_scratch_cxt_t * cxt,uint8_t * data)133 static inline void viddec_pm_utils_bstream_scratch_copyfrom(viddec_pm_utils_bstream_scratch_cxt_t *cxt, uint8_t *data)
134 {
135     uint32_t i=0;
136     for(i=0; i<cxt->size;i++)
137     {
138         *data = cxt->buf_scratch[i];
139         data++;
140     }
141 }
142 
143 /* This function populates requested number of bytes into data parameter, skips emulation prevention bytes if needed */
viddec_pm_utils_getbytes(viddec_pm_utils_bstream_buf_cxt_t * bstream,viddec_pm_utils_getbits_t * data,uint32_t * act_bytes,uint32_t * phase,uint32_t num_bytes,uint32_t emul_reqd,uint8_t is_offset_zero)144 static inline int32_t viddec_pm_utils_getbytes(viddec_pm_utils_bstream_buf_cxt_t *bstream,
145                                                viddec_pm_utils_getbits_t *data,/* gets populated with read bytes*/
146                                                uint32_t *act_bytes, /* actual number of bytes read can be more due to emulation prev bytes*/
147                                                uint32_t *phase,    /* Phase for emulation */
148                                                uint32_t num_bytes,/* requested number of bytes*/
149                                                uint32_t emul_reqd, /* On true we look for emulation prevention */
150                                                uint8_t is_offset_zero /* Are we on aligned byte position for first byte*/
151                                                )
152 {
153     int32_t ret = 1;
154     uint8_t cur_byte = 0, valid_bytes_read = 0;
155     *act_bytes = 0;
156 
157     while(valid_bytes_read < num_bytes)
158     {
159         cur_byte = bstream->buf[bstream->buf_index + *act_bytes];
160         if((cur_byte == 0x3) &&(*phase == 2))
161         {/* skip emulation byte. we update the phase only if emulation prevention is enabled */
162             *phase = 0;
163         }
164         else
165         {
166             data->byte[valid_bytes_read] = cur_byte;
167             /*
168               We only update phase for first byte if bit offset is 0. If its not 0 then it was already accounted for in the past.
169               From second byte onwards we always look to update phase.
170              */
171             if((*act_bytes != 0) || (is_offset_zero))
172             {
173                 if(cur_byte == 0)
174                 {
175                     /* Update phase only if emulation prevention is required */
176                     *phase +=( ((*phase < 2) && emul_reqd ) ? 1: 0 );
177                 }
178                 else
179                 {
180                     *phase=0;
181                 }
182             }
183             valid_bytes_read++;
184         }
185         *act_bytes +=1;
186     }
187    /* Check to see if we reached end during above operation. We might be out of range buts it safe since our array
188       has at least MIN_DATA extra bytes and the maximum out of bounds we will go is 5 bytes */
189     if((bstream->buf_index + *act_bytes -1) >= bstream->buf_end)
190     {
191         ret = -1;
192     }
193     return ret;
194 }
195 
196 /*
197   This function checks to see if we have minimum amount of data else tries to reload as much as it can.
198   Always returns the data left in current buffer in parameter.
199 */
viddec_pm_utils_check_bstream_reload(viddec_pm_utils_bstream_cxt_t * cxt,uint32_t * data_left)200 static inline void viddec_pm_utils_check_bstream_reload(viddec_pm_utils_bstream_cxt_t *cxt, uint32_t *data_left)
201 {
202 #ifdef VBP
203 	*data_left = viddec_pm_utils_bstream_bytesincubby(&(cxt->bstrm_buf));
204 #else
205     uint8_t isReload=0;
206 
207     *data_left = viddec_pm_utils_bstream_bytesincubby(&(cxt->bstrm_buf));
208     /* If we have minimum data we should continue, else try to read more data */
209     if(*data_left <MIN_DATA)
210     {
211         /* Check to see if we already read last byte of current access unit */
212         isReload = !(viddec_pm_utils_bstream_nomoredata(cxt) == 1);
213         while(isReload)
214         {
215             /* We have more data in access unit so keep reading until we get at least minimum data */
216             viddec_pm_utils_bstream_reload(cxt);
217             *data_left = viddec_pm_utils_bstream_bytesincubby(&(cxt->bstrm_buf));
218             /* Break out of loop if we reached last byte or we have enough data */
219             isReload = !((*data_left >= MIN_DATA) || (viddec_pm_utils_bstream_nomoredata(cxt) == 1));
220         }
221     }
222 #endif
223 }
224 /*
225   This function moves the stream position by N bits(parameter bits). The bytes parameter tells us how many bytes were
226   read for this N bits(can be different due to emulation bytes).
227 */
viddec_pm_utils_update_skipoffsets(viddec_pm_utils_bstream_buf_cxt_t * bstream,uint32_t bits,uint32_t bytes)228 static inline void viddec_pm_utils_update_skipoffsets(viddec_pm_utils_bstream_buf_cxt_t *bstream, uint32_t bits, uint32_t bytes)
229 {
230     if((bits & 0x7) == 0)
231     {
232         bstream->buf_bitoff = 0;
233         bstream->buf_index +=bytes;
234     }
235     else
236     {
237         bstream->buf_bitoff = bits & 0x7;
238         bstream->buf_index +=(bytes - 1);
239     }
240 }
241 
242 /*
243   This function gets physical address of the requested au offset(pos).
244 */
245 
viddec_pm_utils_bstream_getphys(viddec_pm_utils_bstream_cxt_t * cxt,uint32_t pos,uint32_t lst_index)246 uint32_t viddec_pm_utils_bstream_getphys(viddec_pm_utils_bstream_cxt_t *cxt, uint32_t pos, uint32_t lst_index)
247 {
248     uint32_t ret = 0, last_byte_offst=0;
249     viddec_pm_utils_list_t *list;
250 
251     list = cxt->list;
252     while(lst_index < list->num_items)
253     {
254         last_byte_offst = (list->data[lst_index].edpos <= (uint32_t)list->total_bytes) ? list->data[lst_index].edpos: (uint32_t)list->total_bytes;
255         if(pos < last_byte_offst)
256         {
257 #ifndef MFDBIGENDIAN
258             ret = (uint32_t)list->sc_ibuf[lst_index].buf;
259 #else
260             ret = list->sc_ibuf[lst_index].phys;
261 #endif
262             ret +=(pos - list->data[lst_index].stpos);
263             if(lst_index == 0) ret+=list->start_offset;
264             break;
265         }
266         lst_index++;
267     }
268     return ret;
269 }
270 
271 /*
272   Actual reload function which uses dma to refill bitstream buffer.
273 */
viddec_pm_utils_bstream_reload(viddec_pm_utils_bstream_cxt_t * cxt)274 void viddec_pm_utils_bstream_reload(viddec_pm_utils_bstream_cxt_t *cxt)
275 {
276     viddec_pm_utils_bstream_buf_cxt_t *bstream;
277 
278     bstream = &(cxt->bstrm_buf);
279 
280     /* Update current offset positions */
281     cxt->au_pos +=  (bstream->buf_index - bstream->buf_st);
282     bstream->buf_st = bstream->buf_index;
283     /* copy leftover bytes into scratch */
284     {
285         int32_t cur_bytes=0;
286         viddec_pm_utils_bstream_scratch_init(&(cxt->scratch));
287         cur_bytes = viddec_pm_utils_bstream_bytesincubby(&(cxt->bstrm_buf));
288         if(cur_bytes > 0)
289         {
290             viddec_pm_utils_bstream_scratch_copyto(&(cxt->scratch), &(bstream->buf[bstream->buf_index]), cur_bytes);
291             cxt->scratch.bitoff = bstream->buf_bitoff;
292         }
293     }
294     /* Initiate DMA and copyback scratch data */
295     {
296         uint32_t data_left = 0, ddr_mask=0;
297         /* calculate necesary aligmnets and copy data */
298         {
299             uint32_t ddr_addr=0, data_wrote=0;
300             uint32_t byte_pos;
301             /* byte pos points to the position from where we want to read data.*/
302             byte_pos = cxt->au_pos + cxt->scratch.size;
303             data_left = viddec_pm_utils_bstream_maxbytes_from_index(cxt, &(cxt->list_off), byte_pos, &ddr_addr);
304             if(data_left > CUBBY_SIZE)
305             {
306                 data_left = CUBBY_SIZE;
307             }
308             if(data_left != 0)
309             {
310                 ddr_mask = ddr_addr & 0x3;
311                 ddr_addr = ddr_addr & ~0x3;
312                 data_wrote = cp_using_dma(ddr_addr, (uint32_t)&(bstream->buf[MIN_DATA]), (data_left + ddr_mask), 0, 1);
313             }
314         }
315         /* copy scratch data back to buffer and update offsets */
316         {
317             uint32_t index=0;
318             index = MIN_DATA + ddr_mask;
319             index -= cxt->scratch.size;
320             viddec_pm_utils_bstream_scratch_copyfrom(&(cxt->scratch), &(bstream->buf[index]));
321             bstream->buf_st = bstream->buf_index = index;
322             bstream->buf_end = data_left + cxt->scratch.size + bstream->buf_st;
323             bstream->buf_bitoff = cxt->scratch.bitoff;
324         }
325     }
326 }
327 
328 /*
329   Init function called by parser manager after sc code detected.
330 */
viddec_pm_utils_bstream_init(viddec_pm_utils_bstream_cxt_t * cxt,viddec_pm_utils_list_t * list,uint32_t is_emul)331 void viddec_pm_utils_bstream_init(viddec_pm_utils_bstream_cxt_t *cxt, viddec_pm_utils_list_t *list, uint32_t is_emul)
332 {
333 #ifdef VBP
334 	cxt->emulation_byte_counter = 0;
335 #endif
336 
337     cxt->au_pos = 0;
338     cxt->list = list;
339     cxt->list_off = 0;
340     cxt->phase = 0;
341     cxt->is_emul_reqd = is_emul;
342     cxt->bstrm_buf.buf_st = cxt->bstrm_buf.buf_end = cxt->bstrm_buf.buf_index = cxt->bstrm_buf.buf_bitoff = 0;
343 }
344 
345 /* Get the requested byte position. If the byte is already present in cubby its returned
346    else we seek forward and get the requested byte.
347    Limitation:Once we seek forward we can't return back.
348 */
viddec_pm_utils_bstream_get_current_byte(viddec_pm_utils_bstream_cxt_t * cxt,uint8_t * byte)349 int32_t viddec_pm_utils_bstream_get_current_byte(viddec_pm_utils_bstream_cxt_t *cxt, uint8_t *byte)
350 {
351     int32_t ret = -1;
352     uint32_t data_left=0;
353     viddec_pm_utils_bstream_buf_cxt_t *bstream;
354 
355     bstream = &(cxt->bstrm_buf);
356     viddec_pm_utils_check_bstream_reload(cxt, &data_left);
357     if(data_left != 0)
358     {
359         *byte = bstream->buf[bstream->buf_index];
360         ret = 1;
361     }
362     return ret;
363 }
364 
365 /*
366   Function to skip N bits ( N<= 32).
367 */
viddec_pm_utils_bstream_skipbits(viddec_pm_utils_bstream_cxt_t * cxt,uint32_t num_bits)368 int32_t viddec_pm_utils_bstream_skipbits(viddec_pm_utils_bstream_cxt_t *cxt, uint32_t num_bits)
369 {
370     int32_t ret = -1;
371     uint32_t data_left=0;
372     viddec_pm_utils_bstream_buf_cxt_t *bstream;
373 
374     bstream = &(cxt->bstrm_buf);
375     viddec_pm_utils_check_bstream_reload(cxt, &data_left);
376     if((num_bits <= 32) && (num_bits > 0) && (data_left != 0))
377     {
378         uint8_t bytes_required=0;
379 
380         bytes_required = (bstream->buf_bitoff + num_bits + 7)>>3;
381         if(bytes_required <= data_left)
382         {
383             viddec_pm_utils_getbits_t data;
384             uint32_t act_bytes =0;
385             if(viddec_pm_utils_getbytes(bstream, &data,  &act_bytes, &(cxt->phase), bytes_required, cxt->is_emul_reqd, (bstream->buf_bitoff == 0)) != -1)
386             {
387                 uint32_t total_bits=0;
388                 total_bits=num_bits+bstream->buf_bitoff;
389                 viddec_pm_utils_update_skipoffsets(bstream, total_bits, act_bytes);
390                 ret=1;
391 
392                 if (act_bytes > bytes_required)
393                 {
394                 	cxt->emulation_byte_counter = act_bytes - bytes_required;
395                 }
396             }
397         }
398     }
399     return ret;
400 }
401 
402 /*
403   Function to get N bits ( N<= 32).
404 */
viddec_pm_utils_bstream_peekbits(viddec_pm_utils_bstream_cxt_t * cxt,uint32_t * out,uint32_t num_bits,uint8_t skip)405 int32_t viddec_pm_utils_bstream_peekbits(viddec_pm_utils_bstream_cxt_t *cxt, uint32_t *out, uint32_t num_bits, uint8_t skip)
406 {
407     uint32_t data_left=0;
408     int32_t ret = -1;
409     /* STEP 1: Make sure that we have at least minimum data before we calculate bits */
410     viddec_pm_utils_check_bstream_reload(cxt, &data_left);
411 
412     if((num_bits <= 32) && (num_bits > 0) && (data_left != 0))
413     {
414         uint32_t bytes_required=0;
415         viddec_pm_utils_bstream_buf_cxt_t *bstream;
416 
417         bstream = &(cxt->bstrm_buf);
418         bytes_required = (bstream->buf_bitoff + num_bits + 7)>>3;
419 
420         /* Step 2: Make sure we have bytes for requested bits */
421         if(bytes_required <= data_left)
422         {
423             uint32_t act_bytes, phase;
424             viddec_pm_utils_getbits_t data;
425             phase = cxt->phase;
426             /* Step 3: Due to emualtion prevention bytes sometimes the bytes_required > actual_required bytes */
427             if(viddec_pm_utils_getbytes(bstream, &data, &act_bytes, &phase, bytes_required, cxt->is_emul_reqd, (bstream->buf_bitoff == 0)) != -1)
428             {
429                 uint32_t total_bits=0;
430                 uint32_t shift_by=0;
431                 /* zero out upper bits */
432                 /* LIMITATION:For some reason compiler is optimizing it to NOP if i do both shifts
433                    in single statement */
434                 data.byte[0] <<= bstream->buf_bitoff;
435                 data.byte[0] >>= bstream->buf_bitoff;
436 
437 #ifndef MFDBIGENDIAN
438                 data.word[0] = SWAP_WORD(data.word[0]);
439                 data.word[1] = SWAP_WORD(data.word[1]);
440 #endif
441                 total_bits = num_bits+bstream->buf_bitoff;
442                 if(total_bits > 32)
443                 {
444                     /* We have to use both the words to get required data */
445                     shift_by = total_bits - 32;
446                     data.word[0] = (data.word[0] << shift_by) | ( data.word[1] >> (32 - shift_by));
447                     //total_bits -= shift_by;/* BUG */
448                 }
449                 else
450                 {
451                     shift_by = 32 - total_bits;
452                     data.word[0] = data.word[0] >> shift_by;
453                 }
454                 *out = data.word[0];
455                 if(skip)
456                 {
457                     /* update au byte position if needed */
458                     viddec_pm_utils_update_skipoffsets(bstream, total_bits, act_bytes);
459                     cxt->phase = phase;
460 
461                     if (act_bytes > bytes_required)
462                     {
463                     	cxt->emulation_byte_counter += act_bytes - bytes_required;
464                     }
465                 }
466 
467                 ret =1;
468             }
469         }
470     }
471     return ret;
472 }
473