1Enabling SSE support 2 3Copyright (c) 2016 Google, Inc. 4Written by Mike Klein, Matt Sarett 5 6This INSTALL file written by Glenn Randers-Pehrson, 2016. 7 8If you have moved intel_init.c and filter_sse2_intrinsics.c to a different 9directory, be sure to update the '#include "../../pngpriv.h"' line in both 10files if necessary to point to the correct relative location of pngpriv.h 11with respect to the new location of those files. 12 13To enable SSE support in libpng, follow the instructions in I, II, or III, 14below: 15 16I. Using patched "configure" scripts: 17 18First, apply intel_sse.patch in your build directory. 19 20 patch -i contrib/intel/intel_sse.patch -p1 21 22Then, if you are not building in a new GIT clone, e.g., in a tar 23distribution, remove any existing pre-built configure scripts: 24 25 ./configure --enable-maintainer-mode 26 make maintainer-clean 27 ./autogen.sh --maintainer --clean 28 29Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS: 30 31 ./autogen.sh --maintainer 32 CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options] 33 make CPPFLAGS="-DPNG_INTEL_SSE" [options] 34 make 35 36II. Using a custom makefile: 37 38If you are using a custom makefile makefile, you will have to update it 39manually to include contrib/intel/*.o in the dependencies, and to define 40PNG_INTEL_SSE. 41 42III. Using manually updated "configure" scripts: 43 44If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am, 45following the instructions below, then follow the instructions in 46section II of INSTALL in the main libpng directory, then configure libpng 47with -DPNG_INTEL_SSE in CPPFLAGS. 48 491. Add the following code to configure.ac under HOST SPECIFIC OPTIONS 50directly beneath the section for ARM: 51 52-----------------cut---------------- 53# INTEL 54# ===== 55# 56# INTEL SSE (SIMD) support. 57 58AC_ARG_ENABLE([intel-sse], 59 AS_HELP_STRING([[[--enable-intel-sse]]], 60 [Enable Intel SSE optimizations: =no/off, yes/on:] 61 [no/off: disable the optimizations;] 62 [yes/on: enable the optimizations.] 63 [If not specified: determined by the compiler.]), 64 [case "$enableval" in 65 no|off) 66 # disable the default enabling: 67 AC_DEFINE([PNG_INTEL_SSE_OPT], [0], 68 [Disable Intel SSE optimizations]) 69 # Prevent inclusion of the assembler files below: 70 enable_intel_sse=no;; 71 yes|on) 72 AC_DEFINE([PNG_INTEL_SSE_OPT], [1], 73 [Enable Intel SSE optimizations]);; 74 *) 75 AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value]) 76 esac]) 77 78# Add Intel specific files to all builds where the host_cpu is Intel ('x86*') 79# or where Intel optimizations were explicitly requested (this allows a 80# fallback if a future host CPU does not match 'x86*') 81AM_CONDITIONAL([PNG_INTEL_SSE], 82 [test "$enable_intel_sse" != 'no' && 83 case "$host_cpu" in 84 i?86|x86_64) :;; 85 *) test "$enable_intel_sse" != '';; 86 esac]) 87-----------------cut---------------- 88 892. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS 90directly beneath the "if PNG_ARM_NEON ... endif" statement: 91 92-----------------cut---------------- 93if PNG_INTEL_SSE 94libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ 95 contrib/intel/filter_sse2_intrinsics.c 96endif 97-----------------cut---------------- 98 993. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT 100code: 101 102-----------------cut---------------- 103#ifndef PNG_INTEL_SSE_OPT 104# ifdef PNG_INTEL_SSE 105 /* Only check for SSE if the build configuration has been modified to 106 * enable SSE optimizations. This means that these optimizations will 107 * be off by default. See contrib/intel for more details. 108 */ 109# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ 110 defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ 111 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) 112# define PNG_INTEL_SSE_OPT 1 113# endif 114# endif 115#endif 116 117#if PNG_INTEL_SSE_OPT > 0 118# ifndef PNG_INTEL_SSE_IMPLEMENTATION 119# if defined(__SSE4_1__) || defined(__AVX__) 120 /* We are not actually using AVX, but checking for AVX is the best 121 way we can detect SSE4.1 and SSSE3 on MSVC. 122 */ 123# define PNG_INTEL_SSE_IMPLEMENTATION 3 124# elif defined(__SSSE3__) 125# define PNG_INTEL_SSE_IMPLEMENTATION 2 126# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ 127 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) 128# define PNG_INTEL_SSE_IMPLEMENTATION 1 129# else 130# define PNG_INTEL_SSE_IMPLEMENTATION 0 131# endif 132# endif 133 134# if PNG_INTEL_SSE_IMPLEMENTATION > 0 135# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 136# endif 137#endif 138 139-----------------cut---------------- 140 1414. Add the following lines to pngpriv.h, following the prototype for 142png_read_filter_row_paeth4_neon: 143 144-----------------cut---------------- 145PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop 146 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 147PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop 148 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 149PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop 150 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 151PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop 152 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 153PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop 154 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 155PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop 156 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 157 158-----------------cut---------------- 159