1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "precomp.hpp"
43 #include <vector>
44 
45 #if CV_NEON
46 #define WITH_NEON
47 #endif
48 
49 namespace cv
50 {
51 namespace mjpeg
52 {
53 
54 enum { COLORSPACE_GRAY=0, COLORSPACE_RGBA=1, COLORSPACE_BGR=2, COLORSPACE_YUV444P=3 };
55 
56 #define fourCC(a,b,c,d)   ((int)((uchar(d)<<24) | (uchar(c)<<16) | (uchar(b)<<8) | uchar(a)))
57 
58 static const int AVIH_STRH_SIZE = 56;
59 static const int STRF_SIZE = 40;
60 static const int AVI_DWFLAG = 0x00000910;
61 static const int AVI_DWSCALE = 1;
62 static const int AVI_DWQUALITY = -1;
63 static const int JUNK_SEEK = 4096;
64 static const int AVIIF_KEYFRAME = 0x10;
65 static const int MAX_BYTES_PER_SEC = 99999999;
66 static const int SUG_BUFFER_SIZE = 1048576;
67 
68 static const unsigned bit_mask[] =
69 {
70     0,
71     0x00000001, 0x00000003, 0x00000007, 0x0000000F,
72     0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
73     0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
74     0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
75     0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
76     0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
77     0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
78     0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
79 };
80 
81 class BitStream
82 {
83 public:
84     enum
85     {
86         DEFAULT_BLOCK_SIZE = (1 << 15),
87         huff_val_shift = 20,
88         huff_code_mask = (1 << huff_val_shift) - 1
89     };
90 
BitStream()91     BitStream()
92     {
93         m_buf.resize(DEFAULT_BLOCK_SIZE + 1024);
94         m_start = &m_buf[0];
95         m_end = m_start + DEFAULT_BLOCK_SIZE;
96         m_is_opened = false;
97         m_f = 0;
98     }
99 
~BitStream()100     ~BitStream()
101     {
102         close();
103     }
104 
open(const String & filename)105     bool open(const String& filename)
106     {
107         close();
108         m_f = fopen(filename.c_str(), "wb");
109         if( !m_f )
110             return false;
111         m_current = m_start;
112         m_pos = 0;
113         return true;
114     }
115 
isOpened() const116     bool isOpened() const { return m_f != 0; }
117 
close()118     void close()
119     {
120         writeBlock();
121         if( m_f )
122             fclose(m_f);
123         m_f = 0;
124     }
125 
writeBlock()126     void writeBlock()
127     {
128         size_t wsz0 = m_current - m_start;
129         if( wsz0 > 0 && m_f )
130         {
131             size_t wsz = fwrite(m_start, 1, wsz0, m_f);
132             CV_Assert( wsz == wsz0 );
133         }
134         m_pos += wsz0;
135         m_current = m_start;
136     }
137 
getPos() const138     size_t getPos() const
139     {
140         return (size_t)(m_current - m_start) + m_pos;
141     }
142 
putByte(int val)143     void putByte(int val)
144     {
145         *m_current++ = (uchar)val;
146         if( m_current >= m_end )
147             writeBlock();
148     }
149 
putBytes(const uchar * buf,int count)150     void putBytes(const uchar* buf, int count)
151     {
152         uchar* data = (uchar*)buf;
153         CV_Assert(m_f && data && m_current && count >= 0);
154         if( m_current >= m_end )
155             writeBlock();
156 
157         while( count )
158         {
159             int l = (int)(m_end - m_current);
160 
161             if (l > count)
162                 l = count;
163 
164             if( l > 0 )
165             {
166                 memcpy(m_current, data, l);
167                 m_current += l;
168                 data += l;
169                 count -= l;
170             }
171             if( m_current >= m_end )
172                 writeBlock();
173         }
174     }
175 
putShort(int val)176     void putShort(int val)
177     {
178         m_current[0] = (uchar)val;
179         m_current[1] = (uchar)(val >> 8);
180         m_current += 2;
181         if( m_current >= m_end )
182             writeBlock();
183     }
184 
putInt(int val)185     void putInt(int val)
186     {
187         m_current[0] = (uchar)val;
188         m_current[1] = (uchar)(val >> 8);
189         m_current[2] = (uchar)(val >> 16);
190         m_current[3] = (uchar)(val >> 24);
191         m_current += 4;
192         if( m_current >= m_end )
193             writeBlock();
194     }
195 
jputShort(int val)196     void jputShort(int val)
197     {
198         m_current[0] = (uchar)(val >> 8);
199         m_current[1] = (uchar)val;
200         m_current += 2;
201         if( m_current >= m_end )
202             writeBlock();
203     }
204 
patchInt(int val,size_t pos)205     void patchInt(int val, size_t pos)
206     {
207         if( pos >= m_pos )
208         {
209             ptrdiff_t delta = pos - m_pos;
210             CV_Assert( delta < m_current - m_start );
211             m_start[delta] = (uchar)val;
212             m_start[delta+1] = (uchar)(val >> 8);
213             m_start[delta+2] = (uchar)(val >> 16);
214             m_start[delta+3] = (uchar)(val >> 24);
215         }
216         else
217         {
218             long fpos = ftell(m_f);
219             fseek(m_f, (long)pos, SEEK_SET);
220             uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
221             fwrite(buf, 1, 4, m_f);
222             fseek(m_f, fpos, SEEK_SET);
223         }
224     }
225 
jput(unsigned currval)226     void jput(unsigned currval)
227     {
228         uchar v;
229         uchar* ptr = m_current;
230         v = (uchar)(currval >> 24);
231         *ptr++ = v;
232         if( v == 255 )
233             *ptr++ = 0;
234         v = (uchar)(currval >> 16);
235         *ptr++ = v;
236         if( v == 255 )
237             *ptr++ = 0;
238         v = (uchar)(currval >> 8);
239         *ptr++ = v;
240         if( v == 255 )
241             *ptr++ = 0;
242         v = (uchar)currval;
243         *ptr++ = v;
244         if( v == 255 )
245             *ptr++ = 0;
246         m_current = ptr;
247         if( m_current >= m_end )
248             writeBlock();
249     }
250 
jflush(unsigned currval,int bitIdx)251     void jflush(unsigned currval, int bitIdx)
252     {
253         uchar v;
254         uchar* ptr = m_current;
255         currval |= (1 << bitIdx)-1;
256         while( bitIdx < 32 )
257         {
258             v = (uchar)(currval >> 24);
259             *ptr++ = v;
260             if( v == 255 )
261                 *ptr++ = 0;
262             currval <<= 8;
263             bitIdx += 8;
264         }
265         m_current = ptr;
266         if( m_current >= m_end )
267             writeBlock();
268     }
269 
createEncodeHuffmanTable(const int * src,unsigned * table,int max_size)270     static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
271     {
272         int  i, k;
273         int  min_val = INT_MAX, max_val = INT_MIN;
274         int  size;
275 
276         /* calc min and max values in the table */
277         for( i = 1, k = 1; src[k] >= 0; i++ )
278         {
279             int code_count = src[k++];
280 
281             for( code_count += k; k < code_count; k++ )
282             {
283                 int  val = src[k] >> huff_val_shift;
284                 if( val < min_val )
285                     min_val = val;
286                 if( val > max_val )
287                     max_val = val;
288             }
289         }
290 
291         size = max_val - min_val + 3;
292 
293         if( size > max_size )
294         {
295             CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
296             return false;
297         }
298 
299         memset( table, 0, size*sizeof(table[0]));
300 
301         table[0] = min_val;
302         table[1] = size - 2;
303 
304         for( i = 1, k = 1; src[k] >= 0; i++ )
305         {
306             int code_count = src[k++];
307 
308             for( code_count += k; k < code_count; k++ )
309             {
310                 int  val = src[k] >> huff_val_shift;
311                 int  code = src[k] & huff_code_mask;
312 
313                 table[val - min_val + 2] = (code << 8) | i;
314             }
315         }
316         return true;
317     }
318 
createSourceHuffmanTable(const uchar * src,int * dst,int max_bits,int first_bits)319     static int* createSourceHuffmanTable(const uchar* src, int* dst,
320                                          int max_bits, int first_bits)
321     {
322         int   i, val_idx, code = 0;
323         int*  table = dst;
324         *dst++ = first_bits;
325         for (i = 1, val_idx = max_bits; i <= max_bits; i++)
326         {
327             int code_count = src[i - 1];
328             dst[0] = code_count;
329             code <<= 1;
330             for (int k = 0; k < code_count; k++)
331             {
332                 dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
333             }
334             code += code_count;
335             dst += code_count + 1;
336             val_idx += code_count;
337         }
338         dst[0] = -1;
339         return  table;
340     }
341 
342 protected:
343     std::vector<uchar> m_buf;
344     uchar*  m_start;
345     uchar*  m_end;
346     uchar*  m_current;
347     size_t  m_pos;
348     bool    m_is_opened;
349     FILE*   m_f;
350 };
351 
352 
353 class MotionJpegWriter : public IVideoWriter
354 {
355 public:
MotionJpegWriter()356     MotionJpegWriter() { rawstream = false; }
MotionJpegWriter(const String & filename,double fps,Size size,bool iscolor)357     MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
358     {
359         rawstream = false;
360         open(filename, fps, size, iscolor);
361     }
~MotionJpegWriter()362     ~MotionJpegWriter() { close(); }
363 
close()364     void close()
365     {
366         if( !strm.isOpened() )
367             return;
368 
369         if( !frameOffset.empty() && !rawstream )
370         {
371             endWriteChunk(); // end LIST 'movi'
372             writeIndex();
373             finishWriteAVI();
374         }
375         strm.close();
376         frameOffset.clear();
377         frameSize.clear();
378         AVIChunkSizeIndex.clear();
379         frameNumIndexes.clear();
380     }
381 
open(const String & filename,double fps,Size size,bool iscolor)382     bool open(const String& filename, double fps, Size size, bool iscolor)
383     {
384         close();
385 
386         if( filename.empty() )
387             return false;
388         const char* ext = strrchr(filename.c_str(), '.');
389         if( !ext )
390             return false;
391         if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
392             return false;
393 
394         bool ok = strm.open(filename);
395         if( !ok )
396             return false;
397 
398         CV_Assert(fps >= 1);
399         outfps = cvRound(fps);
400         width = size.width;
401         height = size.height;
402         quality = 75;
403         rawstream = false;
404         channels = iscolor ? 3 : 1;
405 
406         if( !rawstream )
407         {
408             startWriteAVI();
409             writeStreamHeader();
410         }
411         //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
412         return true;
413     }
414 
isOpened() const415     bool isOpened() const { return strm.isOpened(); }
416 
startWriteAVI()417     void startWriteAVI()
418     {
419         startWriteChunk(fourCC('R', 'I', 'F', 'F'));
420 
421         strm.putInt(fourCC('A', 'V', 'I', ' '));
422 
423         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
424 
425         strm.putInt(fourCC('h', 'd', 'r', 'l'));
426         strm.putInt(fourCC('a', 'v', 'i', 'h'));
427         strm.putInt(AVIH_STRH_SIZE);
428         strm.putInt(cvRound(1e6 / outfps));
429         strm.putInt(MAX_BYTES_PER_SEC);
430         strm.putInt(0);
431         strm.putInt(AVI_DWFLAG);
432 
433         frameNumIndexes.push_back(strm.getPos());
434 
435         strm.putInt(0);
436         strm.putInt(0);
437         strm.putInt(1); // number of streams
438         strm.putInt(SUG_BUFFER_SIZE);
439         strm.putInt(width);
440         strm.putInt(height);
441         strm.putInt(0);
442         strm.putInt(0);
443         strm.putInt(0);
444         strm.putInt(0);
445     }
446 
writeStreamHeader()447     void writeStreamHeader()
448     {
449         // strh
450         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
451 
452         strm.putInt(fourCC('s', 't', 'r', 'l'));
453         strm.putInt(fourCC('s', 't', 'r', 'h'));
454         strm.putInt(AVIH_STRH_SIZE);
455         strm.putInt(fourCC('v', 'i', 'd', 's'));
456         strm.putInt(fourCC('M', 'J', 'P', 'G'));
457         strm.putInt(0);
458         strm.putInt(0);
459         strm.putInt(0);
460         strm.putInt(AVI_DWSCALE);
461         strm.putInt(outfps);
462         strm.putInt(0);
463 
464         frameNumIndexes.push_back(strm.getPos());
465 
466         strm.putInt(0);
467         strm.putInt(SUG_BUFFER_SIZE);
468         strm.putInt(AVI_DWQUALITY);
469         strm.putInt(0);
470         strm.putShort(0);
471         strm.putShort(0);
472         strm.putShort(width);
473         strm.putShort(height);
474 
475         // strf (use the BITMAPINFOHEADER for video)
476         startWriteChunk(fourCC('s', 't', 'r', 'f'));
477 
478         strm.putInt(STRF_SIZE);
479         strm.putInt(width);
480         strm.putInt(height);
481         strm.putShort(1); // planes (1 means interleaved data (after decompression))
482 
483         strm.putShort(channels); // bits per pixel
484         strm.putInt(fourCC('M', 'J', 'P', 'G'));
485         strm.putInt(width * height * channels);
486         strm.putInt(0);
487         strm.putInt(0);
488         strm.putInt(0);
489         strm.putInt(0);
490         // Must be indx chunk
491         endWriteChunk(); // end strf
492         endWriteChunk(); // end strl
493 
494         // odml
495         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
496         strm.putInt(fourCC('o', 'd', 'm', 'l'));
497         startWriteChunk(fourCC('d', 'm', 'l', 'h'));
498 
499         frameNumIndexes.push_back(strm.getPos());
500 
501         strm.putInt(0);
502         strm.putInt(0);
503 
504         endWriteChunk(); // end dmlh
505         endWriteChunk(); // end odml
506 
507         endWriteChunk(); // end hdrl
508 
509         // JUNK
510         startWriteChunk(fourCC('J', 'U', 'N', 'K'));
511         size_t pos = strm.getPos();
512         for( ; pos < (size_t)JUNK_SEEK; pos += 4 )
513             strm.putInt(0);
514         endWriteChunk(); // end JUNK
515         // movi
516         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
517         moviPointer = strm.getPos();
518         strm.putInt(fourCC('m', 'o', 'v', 'i'));
519     }
520 
startWriteChunk(int fourcc)521     void startWriteChunk(int fourcc)
522     {
523         CV_Assert(fourcc != 0);
524         strm.putInt(fourcc);
525 
526         AVIChunkSizeIndex.push_back(strm.getPos());
527         strm.putInt(0);
528     }
529 
endWriteChunk()530     void endWriteChunk()
531     {
532         if( !AVIChunkSizeIndex.empty() )
533         {
534             size_t currpos = strm.getPos();
535             size_t pospos = AVIChunkSizeIndex.back();
536             AVIChunkSizeIndex.pop_back();
537             int chunksz = (int)(currpos - (pospos + 4));
538             strm.patchInt(chunksz, pospos);
539         }
540     }
541 
writeIndex()542     void writeIndex()
543     {
544         // old style AVI index. Must be Open-DML index
545         startWriteChunk(fourCC('i', 'd', 'x', '1'));
546         int nframes = (int)frameOffset.size();
547         for( int i = 0; i < nframes; i++ )
548         {
549             strm.putInt(fourCC('0', '0', 'd', 'c'));
550             strm.putInt(AVIIF_KEYFRAME);
551             strm.putInt((int)frameOffset[i]);
552             strm.putInt((int)frameSize[i]);
553         }
554         endWriteChunk(); // End idx1
555     }
556 
finishWriteAVI()557     void finishWriteAVI()
558     {
559         int nframes = (int)frameOffset.size();
560         // Record frames numbers to AVI Header
561         while (!frameNumIndexes.empty())
562         {
563             size_t ppos = frameNumIndexes.back();
564             frameNumIndexes.pop_back();
565             strm.patchInt(nframes, ppos);
566         }
567         endWriteChunk(); // end RIFF
568     }
569 
write(InputArray _img)570     void write(InputArray _img)
571     {
572         Mat img = _img.getMat();
573         size_t chunkPointer = strm.getPos();
574         int input_channels = img.channels();
575         int colorspace = -1;
576 
577         if( input_channels == 1 && channels == 1 )
578         {
579             CV_Assert( img.cols == width && img.rows == height );
580             colorspace = COLORSPACE_GRAY;
581         }
582         else if( input_channels == 4 )
583         {
584             CV_Assert( img.cols == width && img.rows == height && channels == 3 );
585             colorspace = COLORSPACE_RGBA;
586         }
587         else if( input_channels == 3 )
588         {
589             CV_Assert( img.cols == width && img.rows == height && channels == 3 );
590             colorspace = COLORSPACE_BGR;
591         }
592         else if( input_channels == 1 && channels == 3 )
593         {
594             CV_Assert( img.cols == width && img.rows == height*3 );
595             colorspace = COLORSPACE_YUV444P;
596         }
597         else
598             CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
599 
600         if( !rawstream )
601             startWriteChunk(fourCC('0', '0', 'd', 'c'));
602 
603         writeFrameData(img.data, (int)img.step, colorspace, input_channels);
604 
605         if( !rawstream )
606         {
607             frameOffset.push_back(chunkPointer - moviPointer);
608             frameSize.push_back(strm.getPos() - chunkPointer - 8);       // Size excludes '00dc' and size field
609             endWriteChunk(); // end '00dc'
610         }
611     }
612 
getProperty(int propId) const613     double getProperty(int propId) const
614     {
615         if( propId == VIDEOWRITER_PROP_QUALITY )
616             return quality;
617         if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
618             return frameSize.empty() ? 0. : (double)frameSize.back();
619         return 0.;
620     }
621 
setProperty(int propId,double value)622     bool setProperty(int propId, double value)
623     {
624         if( propId == VIDEOWRITER_PROP_QUALITY )
625         {
626             quality = value;
627             return true;
628         }
629         return false;
630     }
631 
632     void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
633 
634 protected:
635     int outfps;
636     int width, height, channels;
637     double quality;
638     size_t moviPointer;
639     std::vector<size_t> frameOffset, frameSize, AVIChunkSizeIndex, frameNumIndexes;
640     bool rawstream;
641 
642     BitStream strm;
643 };
644 
645 #define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
646 #define fix(x, n)   (int)((x)*(1 << (n)) + .5);
647 
648 enum
649 {
650     fixb = 14,
651     fixc = 12,
652     postshift = 14
653 };
654 
655 static const int C0_707 = fix(0.707106781f, fixb);
656 static const int C0_541 = fix(0.541196100f, fixb);
657 static const int C0_382 = fix(0.382683432f, fixb);
658 static const int C1_306 = fix(1.306562965f, fixb);
659 
660 static const int y_r = fix(0.299, fixc);
661 static const int y_g = fix(0.587, fixc);
662 static const int y_b = fix(0.114, fixc);
663 
664 static const int cb_r = -fix(0.1687, fixc);
665 static const int cb_g = -fix(0.3313, fixc);
666 static const int cb_b = fix(0.5, fixc);
667 
668 static const int cr_r = fix(0.5, fixc);
669 static const int cr_g = -fix(0.4187, fixc);
670 static const int cr_b = -fix(0.0813, fixc);
671 
672 // Standard JPEG quantization tables
673 static const uchar jpegTableK1_T[] =
674 {
675     16, 12, 14, 14,  18,  24,  49,  72,
676     11, 12, 13, 17,  22,  35,  64,  92,
677     10, 14, 16, 22,  37,  55,  78,  95,
678     16, 19, 24, 29,  56,  64,  87,  98,
679     24, 26, 40, 51,  68,  81, 103, 112,
680     40, 58, 57, 87, 109, 104, 121, 100,
681     51, 60, 69, 80, 103, 113, 120, 103,
682     61, 55, 56, 62,  77,  92, 101,  99
683 };
684 
685 static const uchar jpegTableK2_T[] =
686 {
687     17, 18, 24, 47, 99, 99, 99, 99,
688     18, 21, 26, 66, 99, 99, 99, 99,
689     24, 26, 56, 99, 99, 99, 99, 99,
690     47, 66, 99, 99, 99, 99, 99, 99,
691     99, 99, 99, 99, 99, 99, 99, 99,
692     99, 99, 99, 99, 99, 99, 99, 99,
693     99, 99, 99, 99, 99, 99, 99, 99,
694     99, 99, 99, 99, 99, 99, 99, 99
695 };
696 
697 // Standard Huffman tables
698 
699 // ... for luma DCs.
700 static const uchar jpegTableK3[] =
701 {
702     0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
703     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
704 };
705 
706 // ... for chroma DCs.
707 static const uchar jpegTableK4[] =
708 {
709     0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
710     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
711 };
712 
713 // ... for luma ACs.
714 static const uchar jpegTableK5[] =
715 {
716     0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
717     0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
718     0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
719     0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
720     0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
721     0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
722     0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
723     0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
724     0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
725     0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
726     0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
727     0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
728     0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
729     0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
730     0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
731     0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
732     0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
733     0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
734     0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
735     0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
736     0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
737     0xf9, 0xfa
738 };
739 
740 // ... for chroma ACs
741 static const uchar jpegTableK6[] =
742 {
743     0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
744     0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
745     0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
746     0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
747     0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
748     0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
749     0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
750     0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
751     0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
752     0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
753     0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
754     0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
755     0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
756     0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
757     0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
758     0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
759     0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
760     0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
761     0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
762     0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
763     0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
764     0xf9, 0xfa
765 };
766 
767 static const uchar zigzag[] =
768 {
769     0,  8,  1,  2,  9, 16, 24, 17, 10,  3,  4, 11, 18, 25, 32, 40,
770     33, 26, 19, 12,  5,  6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
771     28, 21, 14,  7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
772     23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
773     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
774 };
775 
776 
777 static const int idct_prescale[] =
778 {
779     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
780     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
781     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
782     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
783     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
784     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
785     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
786     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
787 };
788 
789 static const char jpegHeader[] =
790 "\xFF\xD8"  // SOI  - start of image
791 "\xFF\xE0"  // APP0 - jfif extention
792 "\x00\x10"  // 2 bytes: length of APP0 segment
793 "JFIF\x00"  // JFIF signature
794 "\x01\x02"  // version of JFIF
795 "\x00"      // units = pixels ( 1 - inch, 2 - cm )
796 "\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
797 "\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
798 
799 #ifdef WITH_NEON
800 // FDCT with postscaling
aan_fdct8x8(const short * src,short * dst,int step,const short * postscale)801 static void aan_fdct8x8( const short *src, short *dst,
802                         int step, const short *postscale )
803 {
804     // Pass 1: process rows
805     int16x8_t x0 = vld1q_s16(src);    int16x8_t x1 = vld1q_s16(src + step*7);
806     int16x8_t x2 = vld1q_s16(src + step*3);    int16x8_t x3 = vld1q_s16(src + step*4);
807 
808     int16x8_t x4 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
809     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
810 
811     int16x8_t t1 = x0; int16x8_t t2 = x2;
812 
813     x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
814 
815     x0 = vld1q_s16(src + step);    x3 = vld1q_s16(src + step*6);
816 
817     x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
818     int16x8_t t3 = x0;
819 
820     x0 = vld1q_s16(src + step*2);    x3 = vld1q_s16(src + step*5);
821 
822     int16x8_t t4 = vsubq_s16(x0, x3);
823 
824     x0 = vaddq_s16(x0, x3);
825     x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
826     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
827 
828     int16x8_t res0 = x1;
829     int16x8_t res4 = x2;
830     x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
831     x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
832 
833     int16x8_t res2 = x4;
834     int16x8_t res6 = x1;
835 
836     x0 = t2;    x1 = t4;
837     x2 = t3;    x3 = t1;
838     x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
839     x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
840 
841     x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
842     x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
843     x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
844     x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
845 
846     x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
847     x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
848 
849     int16x8_t res1 = x0;
850     int16x8_t res3 = x3;
851     int16x8_t res5 = x1;
852     int16x8_t res7 = x4;
853 
854     //transpose a matrix
855     /*
856      res0 00 01 02 03 04 05 06 07
857      res1 10 11 12 13 14 15 16 17
858      res2 20 21 22 23 24 25 26 27
859      res3 30 31 32 33 34 35 36 37
860      res4 40 41 42 43 44 45 46 47
861      res5 50 51 52 53 54 55 56 57
862      res6 60 61 62 63 64 65 66 67
863      res7 70 71 72 73 74 75 76 77
864      */
865 
866     //transpose elements 00-33
867     int16x4_t res0_0 = vget_low_s16(res0);
868     int16x4_t res1_0 = vget_low_s16(res1);
869     int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
870     int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
871 
872     res0_0 = vget_low_s16(res2);
873     res1_0 = vget_low_s16(res3);
874     tres = vtrn_s16(res0_0, res1_0);
875     int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
876 
877     int32x4x2_t tres1 = vtrnq_s32(l0, l1);
878 
879     // transpose elements 40-73
880     res0_0 = vget_low_s16(res4);
881     res1_0 = vget_low_s16(res5);
882     tres = vtrn_s16(res0_0, res1_0);
883     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
884 
885     res0_0 = vget_low_s16(res6);
886     res1_0 = vget_low_s16(res7);
887 
888     tres = vtrn_s16(res0_0, res1_0);
889     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
890 
891     int32x4x2_t tres2 = vtrnq_s32(l0, l1);
892 
893     //combine into 0-3
894     int16x8_t transp_res0 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
895     int16x8_t transp_res1 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
896     int16x8_t transp_res2 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
897     int16x8_t transp_res3 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
898 
899     // transpose elements 04-37
900     res0_0 = vget_high_s16(res0);
901     res1_0 = vget_high_s16(res1);
902     tres = vtrn_s16(res0_0, res1_0);
903     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
904 
905     res0_0 = vget_high_s16(res2);
906     res1_0 = vget_high_s16(res3);
907 
908     tres = vtrn_s16(res0_0, res1_0);
909     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
910 
911     tres1 = vtrnq_s32(l0, l1);
912 
913     // transpose elements 44-77
914     res0_0 = vget_high_s16(res4);
915     res1_0 = vget_high_s16(res5);
916     tres = vtrn_s16(res0_0, res1_0);
917     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
918 
919     res0_0 = vget_high_s16(res6);
920     res1_0 = vget_high_s16(res7);
921 
922     tres = vtrn_s16(res0_0, res1_0);
923     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
924 
925     tres2 = vtrnq_s32(l0, l1);
926 
927     //combine into 4-7
928     int16x8_t transp_res4 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
929     int16x8_t transp_res5 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
930     int16x8_t transp_res6 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
931     int16x8_t transp_res7 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
932 
933     //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
934 #define STORE_DESCALED(addr, reg, mul_addr)            postscale_line = vld1q_s16((mul_addr)); \
935 mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
936 reg = vabsq_s16(reg); \
937 reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
938 reg = vsubq_s16(veorq_s16(reg, mask), mask); \
939 vst1q_s16((addr), reg);
940 
941     int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
942 
943     // pass 2: process columns
944     x0 = transp_res0;    x1 = transp_res7;
945     x2 = transp_res3;    x3 = transp_res4;
946 
947     x4 = vaddq_s16(x0, x1);   x0 = vsubq_s16(x0, x1);
948     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
949 
950     t1 = x0; t2 = x2;
951 
952     x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
953 
954     x0 = transp_res1;
955     x3 = transp_res6;
956 
957     x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
958 
959     t3 = x0;
960 
961     x0 = transp_res2; x3 = transp_res5;
962 
963     t4 = vsubq_s16(x0, x3);
964 
965     x0 = vaddq_s16(x0, x3);
966 
967     x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
968     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
969 
970     STORE_DESCALED(dst, x1, postscale);
971     STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
972 
973     x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
974 
975     x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
976 
977     STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
978     STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
979 
980     x0 = t2; x1 = t4;
981     x2 = t3; x3 = t1;
982 
983     x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
984 
985     x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
986 
987     x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
988 
989     x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
990     x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
991     x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
992 
993     x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
994     x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
995 
996     STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
997     STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
998     STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
999     STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
1000 }
1001 
1002 #else
1003 // FDCT with postscaling
aan_fdct8x8(const short * src,short * dst,int step,const short * postscale)1004 static void aan_fdct8x8( const short *src, short *dst,
1005                         int step, const short *postscale )
1006 {
1007     int workspace[64], *work = workspace;
1008     int  i;
1009 
1010     // Pass 1: process rows
1011     for( i = 8; i > 0; i--, src += step, work += 8 )
1012     {
1013         int x0 = src[0], x1 = src[7];
1014         int x2 = src[3], x3 = src[4];
1015 
1016         int x4 = x0 + x1; x0 -= x1;
1017         x1 = x2 + x3; x2 -= x3;
1018 
1019         work[7] = x0; work[1] = x2;
1020         x2 = x4 + x1; x4 -= x1;
1021 
1022         x0 = src[1]; x3 = src[6];
1023         x1 = x0 + x3; x0 -= x3;
1024         work[5] = x0;
1025 
1026         x0 = src[2]; x3 = src[5];
1027         work[3] = x0 - x3; x0 += x3;
1028 
1029         x3 = x0 + x1; x0 -= x1;
1030         x1 = x2 + x3; x2 -= x3;
1031 
1032         work[0] = x1; work[4] = x2;
1033 
1034         x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
1035         x1 = x4 + x0; x4 -= x0;
1036         work[2] = x4; work[6] = x1;
1037 
1038         x0 = work[1]; x1 = work[3];
1039         x2 = work[5]; x3 = work[7];
1040 
1041         x0 += x1; x1 += x2; x2 += x3;
1042         x1 = DCT_DESCALE(x1*C0_707, fixb);
1043 
1044         x4 = x1 + x3; x3 -= x1;
1045         x1 = (x0 - x2)*C0_382;
1046         x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1047         x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1048 
1049         x1 = x0 + x3; x3 -= x0;
1050         x0 = x4 + x2; x4 -= x2;
1051 
1052         work[5] = x1; work[1] = x0;
1053         work[7] = x4; work[3] = x3;
1054     }
1055 
1056     work = workspace;
1057     // pass 2: process columns
1058     for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
1059     {
1060         int  x0 = work[8*0], x1 = work[8*7];
1061         int  x2 = work[8*3], x3 = work[8*4];
1062 
1063         int  x4 = x0 + x1; x0 -= x1;
1064         x1 = x2 + x3; x2 -= x3;
1065 
1066         work[8*7] = x0; work[8*0] = x2;
1067         x2 = x4 + x1; x4 -= x1;
1068 
1069         x0 = work[8*1]; x3 = work[8*6];
1070         x1 = x0 + x3; x0 -= x3;
1071         work[8*4] = x0;
1072 
1073         x0 = work[8*2]; x3 = work[8*5];
1074         work[8*3] = x0 - x3; x0 += x3;
1075 
1076         x3 = x0 + x1; x0 -= x1;
1077         x1 = x2 + x3; x2 -= x3;
1078 
1079         dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
1080         dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
1081 
1082         x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
1083         x1 = x4 + x0; x4 -= x0;
1084 
1085         dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
1086         dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
1087 
1088         x0 = work[8*0]; x1 = work[8*3];
1089         x2 = work[8*4]; x3 = work[8*7];
1090 
1091         x0 += x1; x1 += x2; x2 += x3;
1092         x1 = DCT_DESCALE(x1*C0_707, fixb);
1093 
1094         x4 = x1 + x3; x3 -= x1;
1095         x1 = (x0 - x2)*C0_382;
1096         x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1097         x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1098 
1099         x1 = x0 + x3; x3 -= x0;
1100         x0 = x4 + x2; x4 -= x2;
1101 
1102         dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
1103         dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
1104         dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
1105         dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
1106     }
1107 }
1108 #endif
1109 
writeFrameData(const uchar * data,int step,int colorspace,int input_channels)1110 void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1111 {
1112     //double total_cvt = 0, total_dct = 0;
1113     static bool init_cat_table = false;
1114     const int CAT_TAB_SIZE = 4096;
1115     static uchar cat_table[CAT_TAB_SIZE*2+1];
1116     if( !init_cat_table )
1117     {
1118         for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1119         {
1120             Cv32suf a;
1121             a.f = (float)i;
1122             cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
1123         }
1124         init_cat_table = true;
1125     }
1126 
1127     //double total_dct = 0, total_cvt = 0;
1128     CV_Assert( data && width > 0 && height > 0 );
1129 
1130     // encode the header and tables
1131     // for each mcu:
1132     //   convert rgb to yuv with downsampling (if color).
1133     //   for every block:
1134     //     calc dct and quantize
1135     //     encode block.
1136     int x, y;
1137     int i, j;
1138     const int max_quality = 12;
1139     short fdct_qtab[2][64];
1140     unsigned huff_dc_tab[2][16];
1141     unsigned huff_ac_tab[2][256];
1142 
1143     int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1144     int  dc_pred[] = { 0, 0, 0 };
1145     int  x_step = x_scale * 8;
1146     int  y_step = y_scale * 8;
1147     short  block[6][64];
1148     short  buffer[4096];
1149     int*   hbuffer = (int*)buffer;
1150     int  luma_count = x_scale*y_scale;
1151     int  block_count = luma_count + channels - 1;
1152     int  Y_step = x_scale*8;
1153     const int UV_step = 16;
1154     int u_plane_ofs = step*height;
1155     int v_plane_ofs = u_plane_ofs + step*height;
1156     double _quality = quality*0.01*max_quality;
1157 
1158     if( _quality < 1. ) _quality = 1.;
1159     if( _quality > max_quality ) _quality = max_quality;
1160 
1161     double inv_quality = 1./_quality;
1162 
1163     // Encode header
1164     strm.putBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
1165 
1166     // Encode quantization tables
1167     for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
1168     {
1169         const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
1170         int chroma_scale = i > 0 ? luma_count : 1;
1171 
1172         strm.jputShort( 0xffdb );   // DQT marker
1173         strm.jputShort( 2 + 65*1 ); // put single qtable
1174         strm.putByte( 0*16 + i );   // 8-bit table
1175 
1176         // put coefficients
1177         for( j = 0; j < 64; j++ )
1178         {
1179             int idx = zigzag[j];
1180             int qval = cvRound(qtable[idx]*inv_quality);
1181             if( qval < 1 )
1182                 qval = 1;
1183             if( qval > 255 )
1184                 qval = 255;
1185             fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
1186                                 (qval*chroma_scale*idct_prescale[idx]));
1187             strm.putByte( qval );
1188         }
1189     }
1190 
1191     // Encode huffman tables
1192     for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
1193     {
1194         const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
1195         i == 2 ? jpegTableK4 : jpegTableK6;
1196         int is_ac_tab = i & 1;
1197         int idx = i >= 2;
1198         int tableSize = 16 + (is_ac_tab ? 162 : 12);
1199 
1200         strm.jputShort( 0xFFC4 );      // DHT marker
1201         strm.jputShort( 3 + tableSize ); // define one huffman table
1202         strm.putByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
1203         strm.putBytes( htable, tableSize ); // put table
1204 
1205         BitStream::createEncodeHuffmanTable( BitStream::createSourceHuffmanTable(
1206                                             htable, hbuffer, 16, 9 ), is_ac_tab ? huff_ac_tab[idx] :
1207                                             huff_dc_tab[idx], is_ac_tab ? 256 : 16 );
1208     }
1209 
1210     // put frame header
1211     strm.jputShort( 0xFFC0 );          // SOF0 marker
1212     strm.jputShort( 8 + 3*channels );  // length of frame header
1213     strm.putByte( 8 );               // sample precision
1214     strm.jputShort( height );
1215     strm.jputShort( width );
1216     strm.putByte( channels );        // number of components
1217 
1218     for( i = 0; i < channels; i++ )
1219     {
1220         strm.putByte( i + 1 );  // (i+1)-th component id (Y,U or V)
1221         if( i == 0 )
1222             strm.putByte(x_scale*16 + y_scale); // chroma scale factors
1223         else
1224             strm.putByte(1*16 + 1);
1225         strm.putByte( i > 0 ); // quantization table idx
1226     }
1227 
1228     // put scan header
1229     strm.jputShort( 0xFFDA );          // SOS marker
1230     strm.jputShort( 6 + 2*channels );  // length of scan header
1231     strm.putByte( channels );          // number of components in the scan
1232 
1233     for( i = 0; i < channels; i++ )
1234     {
1235         strm.putByte( i+1 );             // component id
1236         strm.putByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
1237     }
1238 
1239     strm.jputShort(0*256 + 63); // start and end of spectral selection - for
1240     // sequential DCT start is 0 and end is 63
1241 
1242     strm.putByte( 0 );  // successive approximation bit position
1243     // high & low - (0,0) for sequential DCT
1244     unsigned currval = 0, code = 0, tempval = 0;
1245     int bit_idx = 32;
1246 
1247 #define JPUT_BITS(val, bits) \
1248     bit_idx -= (bits); \
1249     tempval = (val) & bit_mask[(bits)]; \
1250     if( bit_idx <= 0 ) \
1251     {  \
1252         strm.jput(currval | ((unsigned)tempval >> -bit_idx)); \
1253         bit_idx += 32; \
1254         currval = bit_idx < 32 ? (tempval << bit_idx) : 0; \
1255     } \
1256     else \
1257         currval |= (tempval << bit_idx)
1258 
1259 #define JPUT_HUFF(val, table) \
1260     code = table[(val) + 2]; \
1261     JPUT_BITS(code >> 8, (int)(code & 255))
1262 
1263     // encode data
1264     for( y = 0; y < height; y += y_step, data += y_step*step )
1265     {
1266         for( x = 0; x < width; x += x_step )
1267         {
1268             int x_limit = x_step;
1269             int y_limit = y_step;
1270             const uchar* pix_data = data + x*input_channels;
1271             short* Y_data = block[0];
1272 
1273             if( x + x_limit > width ) x_limit = width - x;
1274             if( y + y_limit > height ) y_limit = height - y;
1275 
1276             memset( block, 0, block_count*64*sizeof(block[0][0]));
1277 
1278             if( channels > 1 )
1279             {
1280                 short* UV_data = block[luma_count];
1281                 // double t = (double)cv::getTickCount();
1282 
1283                 if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
1284                 {
1285                     for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
1286                     {
1287 #ifdef WITH_NEON
1288                         {
1289                             uint16x8_t masklo = vdupq_n_u16(255);
1290                             uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1291                             uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1292                             lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1293                             uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1294                             t1 = vaddq_u16(t1, t2);
1295                             vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1296 
1297                             lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1298                             t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1299                             lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1300                             t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1301                             t1 = vaddq_u16(t1, t2);
1302                             vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1303                         }
1304 
1305                         {
1306                             int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1307                             int16x8_t delta = vdupq_n_s16(128);
1308                             lane = vsubq_s16(lane, delta);
1309                             vst1q_s16(Y_data, lane);
1310 
1311                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
1312                             lane = vsubq_s16(lane, delta);
1313                             vst1q_s16(Y_data + 8, lane);
1314 
1315                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1316                             lane = vsubq_s16(lane, delta);
1317                             vst1q_s16(Y_data+Y_step, lane);
1318 
1319                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
1320                             lane = vsubq_s16(lane, delta);
1321                             vst1q_s16(Y_data+Y_step + 8, lane);
1322                         }
1323 #else
1324                         for( j = 0; j < x_limit; j += 2, pix_data += 2 )
1325                         {
1326                             Y_data[j] = pix_data[0] - 128;
1327                             Y_data[j+1] = pix_data[1] - 128;
1328                             Y_data[j+Y_step] = pix_data[step] - 128;
1329                             Y_data[j+Y_step+1] = pix_data[step+1] - 128;
1330 
1331                             UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
1332                                 pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
1333                             UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
1334                                 pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
1335 
1336                         }
1337 
1338                         pix_data -= x_limit*input_channels;
1339 #endif
1340                     }
1341                 }
1342                 else
1343                 {
1344                     for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1345                     {
1346                         for( j = 0; j < x_limit; j++, pix_data += input_channels )
1347                         {
1348                             int Y, U, V;
1349 
1350                             if( colorspace == COLORSPACE_BGR )
1351                             {
1352                                 int r = pix_data[2];
1353                                 int g = pix_data[1];
1354                                 int b = pix_data[0];
1355 
1356                                 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1357                                 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1358                                 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1359                             }
1360                             else if( colorspace == COLORSPACE_RGBA )
1361                             {
1362                                 int r = pix_data[0];
1363                                 int g = pix_data[1];
1364                                 int b = pix_data[2];
1365 
1366                                 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1367                                 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1368                                 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1369                             }
1370                             else
1371                             {
1372                                 Y = pix_data[0] - 128;
1373                                 U = pix_data[v_plane_ofs] - 128;
1374                                 V = pix_data[u_plane_ofs] - 128;
1375                             }
1376 
1377                             int j2 = j >> (x_scale - 1);
1378                             Y_data[j] = (short)Y;
1379                             UV_data[j2] = (short)(UV_data[j2] + U);
1380                             UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
1381                         }
1382 
1383                         pix_data -= x_limit*input_channels;
1384                         if( ((i+1) & (y_scale - 1)) == 0 )
1385                         {
1386                             UV_data += UV_step;
1387                         }
1388                     }
1389                 }
1390 
1391                 // total_cvt += (double)cv::getTickCount() - t;
1392             }
1393             else
1394             {
1395                 for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1396                 {
1397                     for( j = 0; j < x_limit; j++ )
1398                         Y_data[j] = (short)(pix_data[j]*4 - 128*4);
1399                 }
1400             }
1401 
1402             for( i = 0; i < block_count; i++ )
1403             {
1404                 int is_chroma = i >= luma_count;
1405                 int src_step = x_scale * 8;
1406                 int run = 0, val;
1407                 const short* src_ptr = block[i & -2] + (i & 1)*8;
1408                 const unsigned* htable = huff_ac_tab[is_chroma];
1409 
1410                 //double t = (double)cv::getTickCount();
1411                 aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1412                 //total_dct += (double)cv::getTickCount() - t;
1413 
1414                 j = is_chroma + (i > luma_count);
1415                 val = buffer[0] - dc_pred[j];
1416                 dc_pred[j] = buffer[0];
1417 
1418                 {
1419                     int cat = cat_table[val + CAT_TAB_SIZE];
1420 
1421                     //CV_Assert( cat <= 11 );
1422                     JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
1423                     JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
1424                 }
1425 
1426                 for( j = 1; j < 64; j++ )
1427                 {
1428                     val = buffer[zigzag[j]];
1429 
1430                     if( val == 0 )
1431                     {
1432                         run++;
1433                     }
1434                     else
1435                     {
1436                         while( run >= 16 )
1437                         {
1438                             JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
1439                             run -= 16;
1440                         }
1441 
1442                         {
1443                             int cat = cat_table[val + CAT_TAB_SIZE];
1444                             //CV_Assert( cat <= 10 );
1445                             JPUT_HUFF( cat + run*16, htable );
1446                             JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
1447                         }
1448 
1449                         run = 0;
1450                     }
1451                 }
1452 
1453                 if( run )
1454                 {
1455                     JPUT_HUFF( 0x00, htable ); // encode EOB
1456                 }
1457             }
1458         }
1459     }
1460 
1461     // Flush
1462     strm.jflush(currval, bit_idx);
1463     strm.jputShort( 0xFFD9 ); // EOI marker
1464     /*printf("total dct = %.1fms, total cvt = %.1fms\n",
1465      total_dct*1000./cv::getTickFrequency(),
1466      total_cvt*1000./cv::getTickFrequency());*/
1467     size_t pos = strm.getPos();
1468     size_t pos1 = (pos + 3) & ~3;
1469     for( ; pos < pos1; pos++ )
1470         strm.putByte(0);
1471 }
1472 
1473 }
1474 
createMotionJpegWriter(const String & filename,double fps,Size frameSize,bool iscolor)1475 Ptr<IVideoWriter> createMotionJpegWriter( const String& filename, double fps, Size frameSize, bool iscolor )
1476 {
1477     Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
1478     if( !iwriter->isOpened() )
1479         iwriter.release();
1480     return iwriter;
1481 }
1482 
1483 }
1484