• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1Enabling SSE support
2
3Copyright (c) 2016 Google, Inc.
4Written by Mike Klein, Matt Sarett
5
6This INSTALL file written by Glenn Randers-Pehrson, 2016.
7
8If you have moved intel_init.c and filter_sse2_intrinsics.c to a different
9directory, be sure to update the '#include "../../pngpriv.h"' line in both
10files if necessary to point to the correct relative location of pngpriv.h
11with respect to the new location of those files.
12
13To enable SSE support in libpng, follow the instructions in I, II, or III,
14below:
15
16I. Using patched "configure" scripts:
17
18First, apply intel_sse.patch in your build directory.
19
20   patch -i contrib/intel/intel_sse.patch -p1
21
22Then, if you are not building in a new GIT clone, e.g., in a tar
23distribution, remove any existing pre-built configure scripts:
24
25   ./configure --enable-maintainer-mode
26   make maintainer-clean
27   ./autogen.sh --maintainer --clean
28
29Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS:
30
31   ./autogen.sh --maintainer
32   CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options]
33   make CPPFLAGS="-DPNG_INTEL_SSE" [options]
34   make
35
36II. Using a custom makefile:
37
38If you are using a custom makefile makefile, you will have to update it
39manually to include contrib/intel/*.o in the dependencies, and to define
40PNG_INTEL_SSE.
41
42III. Using manually updated "configure" scripts:
43
44If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am,
45following the instructions below, then follow the instructions in
46section II of INSTALL in the main libpng directory, then configure libpng
47with -DPNG_INTEL_SSE in CPPFLAGS.
48
491. Insert the following lines above the copyright line near the top of
50configure.ac:
51
52-----------------cut----------------
53# Copyright (c) 2016 Google, Inc.
54# Written by Mike Klein and Matt Sarett
55# Derived from the ARM supporting code in libpng/configure.ac, which was
56-----------------cut----------------
57
582. Add the following code to configure.ac under HOST SPECIFIC OPTIONS
59directly beneath the section for ARM:
60
61-----------------cut----------------
62# INTEL
63# =====
64#
65# INTEL SSE (SIMD) support.
66
67AC_ARG_ENABLE([intel-sse],
68   AS_HELP_STRING([[[--enable-intel-sse]]],
69      [Enable Intel SSE optimizations: =no/off, yes/on:]
70      [no/off: disable the optimizations;]
71      [yes/on: enable the optimizations.]
72      [If not specified: determined by the compiler.]),
73   [case "$enableval" in
74      no|off)
75         # disable the default enabling:
76         AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
77                   [Disable Intel SSE optimizations])
78         # Prevent inclusion of the assembler files below:
79         enable_intel_sse=no;;
80      yes|on)
81         AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
82                   [Enable Intel SSE optimizations]);;
83      *)
84         AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
85   esac])
86
87# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
88# or where Intel optimizations were explicitly requested (this allows a
89# fallback if a future host CPU does not match 'x86*')
90AM_CONDITIONAL([PNG_INTEL_SSE],
91   [test "$enable_intel_sse" != 'no' &&
92    case "$host_cpu" in
93      i?86|x86_64) :;;
94      *)    test "$enable_intel_sse" != '';;
95    esac])
96-----------------cut----------------
97
983. Insert the following lines above the copyright line near the top of
99Makefile.am:
100
101-----------------cut----------------
102# Copyright (c) 2016 Google, Inc.
103# Written by Mike Klein and Matt Sarett
104# Derived from the ARM supporting code in libpng/configure.ac, which was
105-----------------cut----------------
106
1074. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS
108directly beneath the "if PNG_ARM_NEON ... endif" statement:
109
110-----------------cut----------------
111if PNG_INTEL_SSE
112libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
113    contrib/intel/filter_sse2_intrinsics.c
114endif
115-----------------cut----------------
116
1175. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT
118code:
119
120-----------------cut----------------
121#ifndef PNG_INTEL_SSE_OPT
122#   ifdef PNG_INTEL_SSE
123      /* Only check for SSE if the build configuration has been modified to
124       * enable SSE optimizations.  This means that these optimizations will
125       * be off by default.  See contrib/intel for more details.
126       */
127#     if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
128       defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
129       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
130#         define PNG_INTEL_SSE_OPT 1
131#      endif
132#   endif
133#endif
134
135#if PNG_INTEL_SSE_OPT > 0
136#   ifndef PNG_INTEL_SSE_IMPLEMENTATION
137#      if defined(__SSE4_1__) || defined(__AVX__)
138          /* We are not actually using AVX, but checking for AVX is the best
139             way we can detect SSE4.1 and SSSE3 on MSVC.
140          */
141#         define PNG_INTEL_SSE_IMPLEMENTATION 3
142#      elif defined(__SSSE3__)
143#         define PNG_INTEL_SSE_IMPLEMENTATION 2
144#      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
145       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
146#         define PNG_INTEL_SSE_IMPLEMENTATION 1
147#      else
148#         define PNG_INTEL_SSE_IMPLEMENTATION 0
149#      endif
150#   endif
151
152#   if PNG_INTEL_SSE_IMPLEMENTATION > 0
153#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
154#   endif
155#endif
156
157-----------------cut----------------
158
1595. Add the following lines to pngpriv.h, following the prototype for
160png_read_filter_row_paeth4_neon:
161
162-----------------cut----------------
163PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
164    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
165PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
166    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
167PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
168    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
169PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
170    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
171PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
172    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
173PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
174    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
175
176-----------------cut----------------
177