1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #endif
34 
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37     runtime_cpu_detection = false;
38 }
39 
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46 
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49 
50 static const union {
51     uint32_t bits;
52     float    f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55 
56 #if defined(__clang__) || defined(__GNUC__)
57     #define small_memcpy __builtin_memcpy
58 #else
59     #define small_memcpy memcpy
60 #endif
61 
log2f_(float x)62 static float log2f_(float x) {
63     // The first approximation of log2(x) is its exponent 'e', minus 127.
64     int32_t bits;
65     small_memcpy(&bits, &x, sizeof(bits));
66 
67     float e = (float)bits * (1.0f / (1<<23));
68 
69     // If we use the mantissa too we can refine the error signficantly.
70     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71     float m;
72     small_memcpy(&m, &m_bits, sizeof(m));
73 
74     return (e - 124.225514990f
75               -   1.498030302f*m
76               -   1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79     const float ln2 = 0.69314718f;
80     return ln2*log2f_(x);
81 }
82 
exp2f_(float x)83 static float exp2f_(float x) {
84     float fract = x - floorf_(x);
85 
86     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87                                         -   1.490129070f*fract
88                                         +  27.728023300f/(4.84252568f - fract));
89 
90     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92     // Negative values are effectively underflow - we'll end up returning a (different) negative
93     // value, which makes no sense. So clamp to zero.
94     if (fbits >= (float)INT_MAX) {
95         return INFINITY_;
96     } else if (fbits < 0) {
97         return 0;
98     }
99 
100     int32_t bits = (int32_t)fbits;
101     small_memcpy(&x, &bits, sizeof(x));
102     return x;
103 }
104 
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107     assert (x >= 0);
108     return (x == 0) || (x == 1) ? x
109                                 : exp2f_(log2f_(x) * y);
110 }
111 
expf_(float x)112 static float expf_(float x) {
113     const float log2_e = 1.4426950408889634074f;
114     return exp2f_(log2_e * x);
115 }
116 
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119 
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121 
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123     int32_t bits;
124     memcpy(&bits, &x, sizeof(bits));
125     bits = bits - 1;
126     memcpy(&x, &bits, sizeof(bits));
127     return x;
128 }
129 
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
134 struct TF_PQish  { float A,B,C,D,E,F; };
135 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
136 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
137 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
138 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
139 
TFKind_marker(TFKind kind)140 static float TFKind_marker(TFKind kind) {
141     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
142     return -(float)kind;
143 }
144 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)145 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
146                                                        , TF_HLGish* hlg = nullptr) {
147     if (tf.g < 0 && (int)tf.g == tf.g) {
148         // TODO: soundness checks for PQ/HLG like we do for sRGBish?
149         switch ((int)tf.g) {
150             case -PQish:     if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
151             case -HLGish:    if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
152             case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
153         }
154         return Bad;
155     }
156 
157     // Basic soundness checks for sRGBish transfer functions.
158     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
159             // a,c,d,g should be non-negative to make any sense.
160             && tf.a >= 0
161             && tf.c >= 0
162             && tf.d >= 0
163             && tf.g >= 0
164             // Raising a negative value to a fractional tf->g produces complex numbers.
165             && tf.a * tf.d + tf.b >= 0) {
166         return sRGBish;
167     }
168 
169     return Bad;
170 }
171 
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)172 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
173     return classify(*tf) == sRGBish;
174 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)175 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
176     return classify(*tf) == PQish;
177 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)178 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
179     return classify(*tf) == HLGish;
180 }
181 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)182 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
183                                       float A, float B, float C,
184                                       float D, float E, float F) {
185     *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
186     assert(skcms_TransferFunction_isPQish(tf));
187     return true;
188 }
189 
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)190 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
191                                              float K, float R, float G,
192                                              float a, float b, float c) {
193     *tf = { TFKind_marker(HLGish), R,G, a,b,c, K-1.0f };
194     assert(skcms_TransferFunction_isHLGish(tf));
195     return true;
196 }
197 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)198 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
199     float sign = x < 0 ? -1.0f : 1.0f;
200     x *= sign;
201 
202     TF_PQish  pq;
203     TF_HLGish hlg;
204     switch (classify(*tf, &pq, &hlg)) {
205         case Bad:       break;
206 
207         case HLGish: {
208             const float K = hlg.K_minus_1 + 1.0f;
209             return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
210                                             : expf_((x-hlg.c)*hlg.a) + hlg.b);
211         }
212 
213         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
214         case HLGinvish: {
215             const float K = hlg.K_minus_1 + 1.0f;
216             x /= K;
217             return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
218                                   : hlg.a * logf_(x - hlg.b) + hlg.c);
219         }
220 
221 
222         case sRGBish: return sign * (x < tf->d ?       tf->c * x + tf->f
223                                                : powf_(tf->a * x + tf->b, tf->g) + tf->e);
224 
225         case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
226                                             / (pq.D + pq.E * powf_(x, pq.C)),
227                                         pq.F);
228     }
229     return 0;
230 }
231 
232 
eval_curve(const skcms_Curve * curve,float x)233 static float eval_curve(const skcms_Curve* curve, float x) {
234     if (curve->table_entries == 0) {
235         return skcms_TransferFunction_eval(&curve->parametric, x);
236     }
237 
238     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
239     int   lo = (int)                   ix        ,
240           hi = (int)(float)minus_1_ulp(ix + 1.0f);
241     float t = ix - (float)lo;
242 
243     float l, h;
244     if (curve->table_8) {
245         l = curve->table_8[lo] * (1/255.0f);
246         h = curve->table_8[hi] * (1/255.0f);
247     } else {
248         uint16_t be_l, be_h;
249         memcpy(&be_l, curve->table_16 + 2*lo, 2);
250         memcpy(&be_h, curve->table_16 + 2*hi, 2);
251         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
252         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
253         l = le_l * (1/65535.0f);
254         h = le_h * (1/65535.0f);
255     }
256     return l + (h-l)*t;
257 }
258 
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)259 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
260     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
261     const float dx = 1.0f / (N - 1);
262     float err = 0;
263     for (uint32_t i = 0; i < N; i++) {
264         float x = i * dx,
265               y = eval_curve(curve, x);
266         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
267     }
268     return err;
269 }
270 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)271 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
272     return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
273 }
274 
275 // Additional ICC signature values that are only used internally
276 enum {
277     // File signature
278     skcms_Signature_acsp = 0x61637370,
279 
280     // Tag signatures
281     skcms_Signature_rTRC = 0x72545243,
282     skcms_Signature_gTRC = 0x67545243,
283     skcms_Signature_bTRC = 0x62545243,
284     skcms_Signature_kTRC = 0x6B545243,
285 
286     skcms_Signature_rXYZ = 0x7258595A,
287     skcms_Signature_gXYZ = 0x6758595A,
288     skcms_Signature_bXYZ = 0x6258595A,
289 
290     skcms_Signature_A2B0 = 0x41324230,
291     skcms_Signature_B2A0 = 0x42324130,
292 
293     skcms_Signature_CHAD = 0x63686164,
294     skcms_Signature_WTPT = 0x77747074,
295 
296     // Type signatures
297     skcms_Signature_curv = 0x63757276,
298     skcms_Signature_mft1 = 0x6D667431,
299     skcms_Signature_mft2 = 0x6D667432,
300     skcms_Signature_mAB  = 0x6D414220,
301     skcms_Signature_mBA  = 0x6D424120,
302     skcms_Signature_para = 0x70617261,
303     skcms_Signature_sf32 = 0x73663332,
304     // XYZ is also a PCS signature, so it's defined in skcms.h
305     // skcms_Signature_XYZ = 0x58595A20,
306 };
307 
read_big_u16(const uint8_t * ptr)308 static uint16_t read_big_u16(const uint8_t* ptr) {
309     uint16_t be;
310     memcpy(&be, ptr, sizeof(be));
311 #if defined(_MSC_VER)
312     return _byteswap_ushort(be);
313 #else
314     return __builtin_bswap16(be);
315 #endif
316 }
317 
read_big_u32(const uint8_t * ptr)318 static uint32_t read_big_u32(const uint8_t* ptr) {
319     uint32_t be;
320     memcpy(&be, ptr, sizeof(be));
321 #if defined(_MSC_VER)
322     return _byteswap_ulong(be);
323 #else
324     return __builtin_bswap32(be);
325 #endif
326 }
327 
read_big_i32(const uint8_t * ptr)328 static int32_t read_big_i32(const uint8_t* ptr) {
329     return (int32_t)read_big_u32(ptr);
330 }
331 
read_big_fixed(const uint8_t * ptr)332 static float read_big_fixed(const uint8_t* ptr) {
333     return read_big_i32(ptr) * (1.0f / 65536.0f);
334 }
335 
336 // Maps to an in-memory profile so that fields line up to the locations specified
337 // in ICC.1:2010, section 7.2
338 typedef struct {
339     uint8_t size                [ 4];
340     uint8_t cmm_type            [ 4];
341     uint8_t version             [ 4];
342     uint8_t profile_class       [ 4];
343     uint8_t data_color_space    [ 4];
344     uint8_t pcs                 [ 4];
345     uint8_t creation_date_time  [12];
346     uint8_t signature           [ 4];
347     uint8_t platform            [ 4];
348     uint8_t flags               [ 4];
349     uint8_t device_manufacturer [ 4];
350     uint8_t device_model        [ 4];
351     uint8_t device_attributes   [ 8];
352     uint8_t rendering_intent    [ 4];
353     uint8_t illuminant_X        [ 4];
354     uint8_t illuminant_Y        [ 4];
355     uint8_t illuminant_Z        [ 4];
356     uint8_t creator             [ 4];
357     uint8_t profile_id          [16];
358     uint8_t reserved            [28];
359     uint8_t tag_count           [ 4]; // Technically not part of header, but required
360 } header_Layout;
361 
362 typedef struct {
363     uint8_t signature [4];
364     uint8_t offset    [4];
365     uint8_t size      [4];
366 } tag_Layout;
367 
get_tag_table(const skcms_ICCProfile * profile)368 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
369     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
370 }
371 
372 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
373 // use of the type is for the CHAD tag that stores exactly nine values.
374 typedef struct {
375     uint8_t type     [ 4];
376     uint8_t reserved [ 4];
377     uint8_t values   [36];
378 } sf32_Layout;
379 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)380 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
381     skcms_ICCTag tag;
382     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
383         return false;
384     }
385 
386     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
387         return false;
388     }
389 
390     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
391     const uint8_t* values = sf32Tag->values;
392     for (int r = 0; r < 3; ++r)
393     for (int c = 0; c < 3; ++c, values += 4) {
394         m->vals[r][c] = read_big_fixed(values);
395     }
396     return true;
397 }
398 
399 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
400 // the type are for tags/data that store exactly one triple.
401 typedef struct {
402     uint8_t type     [4];
403     uint8_t reserved [4];
404     uint8_t X        [4];
405     uint8_t Y        [4];
406     uint8_t Z        [4];
407 } XYZ_Layout;
408 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)409 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
410     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
411         return false;
412     }
413 
414     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
415 
416     *x = read_big_fixed(xyzTag->X);
417     *y = read_big_fixed(xyzTag->Y);
418     *z = read_big_fixed(xyzTag->Z);
419     return true;
420 }
421 
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])422 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
423     skcms_ICCTag tag;
424     return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
425            read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
426 }
427 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)428 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
429                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
430     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
431            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
432            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
433 }
434 
435 typedef struct {
436     uint8_t type          [4];
437     uint8_t reserved_a    [4];
438     uint8_t function_type [2];
439     uint8_t reserved_b    [2];
440     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
441 } para_Layout;
442 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)443 static bool read_curve_para(const uint8_t* buf, uint32_t size,
444                             skcms_Curve* curve, uint32_t* curve_size) {
445     if (size < SAFE_FIXED_SIZE(para_Layout)) {
446         return false;
447     }
448 
449     const para_Layout* paraTag = (const para_Layout*)buf;
450 
451     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
452     uint16_t function_type = read_big_u16(paraTag->function_type);
453     if (function_type > kGABCDEF) {
454         return false;
455     }
456 
457     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
458     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
459         return false;
460     }
461 
462     if (curve_size) {
463         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
464     }
465 
466     curve->table_entries = 0;
467     curve->parametric.a  = 1.0f;
468     curve->parametric.b  = 0.0f;
469     curve->parametric.c  = 0.0f;
470     curve->parametric.d  = 0.0f;
471     curve->parametric.e  = 0.0f;
472     curve->parametric.f  = 0.0f;
473     curve->parametric.g  = read_big_fixed(paraTag->variable);
474 
475     switch (function_type) {
476         case kGAB:
477             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
478             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
479             if (curve->parametric.a == 0) {
480                 return false;
481             }
482             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
483             break;
484         case kGABC:
485             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
486             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
487             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
488             if (curve->parametric.a == 0) {
489                 return false;
490             }
491             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
492             curve->parametric.f = curve->parametric.e;
493             break;
494         case kGABCD:
495             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
496             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
497             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
498             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
499             break;
500         case kGABCDEF:
501             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
502             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
503             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
504             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
505             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
506             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
507             break;
508     }
509     return skcms_TransferFunction_isSRGBish(&curve->parametric);
510 }
511 
512 typedef struct {
513     uint8_t type          [4];
514     uint8_t reserved      [4];
515     uint8_t value_count   [4];
516     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
517 } curv_Layout;
518 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)519 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
520                             skcms_Curve* curve, uint32_t* curve_size) {
521     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
522         return false;
523     }
524 
525     const curv_Layout* curvTag = (const curv_Layout*)buf;
526 
527     uint32_t value_count = read_big_u32(curvTag->value_count);
528     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
529         return false;
530     }
531 
532     if (curve_size) {
533         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
534     }
535 
536     if (value_count < 2) {
537         curve->table_entries = 0;
538         curve->parametric.a  = 1.0f;
539         curve->parametric.b  = 0.0f;
540         curve->parametric.c  = 0.0f;
541         curve->parametric.d  = 0.0f;
542         curve->parametric.e  = 0.0f;
543         curve->parametric.f  = 0.0f;
544         if (value_count == 0) {
545             // Empty tables are a shorthand for an identity curve
546             curve->parametric.g = 1.0f;
547         } else {
548             // Single entry tables are a shorthand for simple gamma
549             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
550         }
551     } else {
552         curve->table_8       = nullptr;
553         curve->table_16      = curvTag->variable;
554         curve->table_entries = value_count;
555     }
556 
557     return true;
558 }
559 
560 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
561 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)562 static bool read_curve(const uint8_t* buf, uint32_t size,
563                        skcms_Curve* curve, uint32_t* curve_size) {
564     if (!buf || size < 4 || !curve) {
565         return false;
566     }
567 
568     uint32_t type = read_big_u32(buf);
569     if (type == skcms_Signature_para) {
570         return read_curve_para(buf, size, curve, curve_size);
571     } else if (type == skcms_Signature_curv) {
572         return read_curve_curv(buf, size, curve, curve_size);
573     }
574 
575     return false;
576 }
577 
578 // mft1 and mft2 share a large chunk of data
579 typedef struct {
580     uint8_t type                 [ 4];
581     uint8_t reserved_a           [ 4];
582     uint8_t input_channels       [ 1];
583     uint8_t output_channels      [ 1];
584     uint8_t grid_points          [ 1];
585     uint8_t reserved_b           [ 1];
586     uint8_t matrix               [36];
587 } mft_CommonLayout;
588 
589 typedef struct {
590     mft_CommonLayout common      [1];
591 
592     uint8_t variable             [1/*variable*/];
593 } mft1_Layout;
594 
595 typedef struct {
596     mft_CommonLayout common      [1];
597 
598     uint8_t input_table_entries  [2];
599     uint8_t output_table_entries [2];
600     uint8_t variable             [1/*variable*/];
601 } mft2_Layout;
602 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)603 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
604     // MFT matrices are applied before the first set of curves, but must be identity unless the
605     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
606     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
607     // field/flag.
608     a2b->matrix_channels = 0;
609     a2b-> input_channels = mftTag-> input_channels[0];
610     a2b->output_channels = mftTag->output_channels[0];
611 
612     // We require exactly three (ie XYZ/Lab/RGB) output channels
613     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
614         return false;
615     }
616     // We require at least one, and no more than four (ie CMYK) input channels
617     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
618         return false;
619     }
620 
621     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
622         a2b->grid_points[i] = mftTag->grid_points[0];
623     }
624     // The grid only makes sense with at least two points along each axis
625     if (a2b->grid_points[0] < 2) {
626         return false;
627     }
628     return true;
629 }
630 
631 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)632 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
633     // Same as A2B.
634     b2a->matrix_channels = 0;
635     b2a-> input_channels = mftTag-> input_channels[0];
636     b2a->output_channels = mftTag->output_channels[0];
637 
638 
639     // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
640     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
641         return false;
642     }
643     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
644         return false;
645     }
646 
647     // Same as A2B.
648     for (uint32_t i = 0; i < b2a->input_channels; ++i) {
649         b2a->grid_points[i] = mftTag->grid_points[0];
650     }
651     if (b2a->grid_points[0] < 2) {
652         return false;
653     }
654     return true;
655 }
656 
657 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)658 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
659                         uint32_t input_table_entries, uint32_t output_table_entries,
660                         A2B_or_B2A* out) {
661     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
662     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
663     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
664 
665     // [input|output]_channels are <= 4, so still no overflow
666     uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
667     uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
668 
669     uint64_t grid_size = out->output_channels * byte_width;
670     for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
671         grid_size *= out->grid_points[axis];
672     }
673 
674     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
675         return false;
676     }
677 
678     for (uint32_t i = 0; i < out->input_channels; ++i) {
679         out->input_curves[i].table_entries = input_table_entries;
680         if (byte_width == 1) {
681             out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
682             out->input_curves[i].table_16 = nullptr;
683         } else {
684             out->input_curves[i].table_8  = nullptr;
685             out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
686         }
687     }
688 
689     if (byte_width == 1) {
690         out->grid_8  = table_base + byte_len_all_input_tables;
691         out->grid_16 = nullptr;
692     } else {
693         out->grid_8  = nullptr;
694         out->grid_16 = table_base + byte_len_all_input_tables;
695     }
696 
697     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
698     for (uint32_t i = 0; i < out->output_channels; ++i) {
699         out->output_curves[i].table_entries = output_table_entries;
700         if (byte_width == 1) {
701             out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
702             out->output_curves[i].table_16 = nullptr;
703         } else {
704             out->output_curves[i].table_8  = nullptr;
705             out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
706         }
707     }
708 
709     return true;
710 }
711 
712 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)713 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
714     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
715         return false;
716     }
717 
718     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
719     if (!read_mft_common(mftTag->common, out)) {
720         return false;
721     }
722 
723     uint32_t input_table_entries  = 256;
724     uint32_t output_table_entries = 256;
725 
726     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
727                        input_table_entries, output_table_entries, out);
728 }
729 
730 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)731 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
732     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
733         return false;
734     }
735 
736     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
737     if (!read_mft_common(mftTag->common, out)) {
738         return false;
739     }
740 
741     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
742     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
743 
744     // ICC spec mandates that 2 <= table_entries <= 4096
745     if (input_table_entries < 2 || input_table_entries > 4096 ||
746         output_table_entries < 2 || output_table_entries > 4096) {
747         return false;
748     }
749 
750     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
751                        input_table_entries, output_table_entries, out);
752 }
753 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)754 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
755                         uint32_t num_curves, skcms_Curve* curves) {
756     for (uint32_t i = 0; i < num_curves; ++i) {
757         if (curve_offset > size) {
758             return false;
759         }
760 
761         uint32_t curve_bytes;
762         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
763             return false;
764         }
765 
766         if (curve_bytes > UINT32_MAX - 3) {
767             return false;
768         }
769         curve_bytes = (curve_bytes + 3) & ~3U;
770 
771         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
772         curve_offset = (uint32_t)new_offset_64;
773         if (new_offset_64 != curve_offset) {
774             return false;
775         }
776     }
777 
778     return true;
779 }
780 
781 // mAB and mBA tags use the same encoding, including color lookup tables.
782 typedef struct {
783     uint8_t type                 [ 4];
784     uint8_t reserved_a           [ 4];
785     uint8_t input_channels       [ 1];
786     uint8_t output_channels      [ 1];
787     uint8_t reserved_b           [ 2];
788     uint8_t b_curve_offset       [ 4];
789     uint8_t matrix_offset        [ 4];
790     uint8_t m_curve_offset       [ 4];
791     uint8_t clut_offset          [ 4];
792     uint8_t a_curve_offset       [ 4];
793 } mAB_or_mBA_Layout;
794 
795 typedef struct {
796     uint8_t grid_points          [16];
797     uint8_t grid_byte_width      [ 1];
798     uint8_t reserved             [ 3];
799     uint8_t variable             [1/*variable*/];
800 } CLUT_Layout;
801 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)802 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
803     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
804         return false;
805     }
806 
807     const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
808 
809     a2b->input_channels  = mABTag->input_channels[0];
810     a2b->output_channels = mABTag->output_channels[0];
811 
812     // We require exactly three (ie XYZ/Lab/RGB) output channels
813     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
814         return false;
815     }
816     // We require no more than four (ie CMYK) input channels
817     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
818         return false;
819     }
820 
821     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
822     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
823     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
824     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
825     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
826 
827     // "B" curves must be present
828     if (0 == b_curve_offset) {
829         return false;
830     }
831 
832     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
833                      a2b->output_curves)) {
834         return false;
835     }
836 
837     // "M" curves and Matrix must be used together
838     if (0 != m_curve_offset) {
839         if (0 == matrix_offset) {
840             return false;
841         }
842         a2b->matrix_channels = a2b->output_channels;
843         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
844                          a2b->matrix_curves)) {
845             return false;
846         }
847 
848         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
849         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
850             return false;
851         }
852         float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
853         const uint8_t* mtx_buf = tag->buf + matrix_offset;
854         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
855         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
856         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
857         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
858         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
859         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
860         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
861         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
862         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
863         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
864         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
865         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
866     } else {
867         if (0 != matrix_offset) {
868             return false;
869         }
870         a2b->matrix_channels = 0;
871     }
872 
873     // "A" curves and CLUT must be used together
874     if (0 != a_curve_offset) {
875         if (0 == clut_offset) {
876             return false;
877         }
878         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
879                          a2b->input_curves)) {
880             return false;
881         }
882 
883         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
884             return false;
885         }
886         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
887 
888         if (clut->grid_byte_width[0] == 1) {
889             a2b->grid_8  = clut->variable;
890             a2b->grid_16 = nullptr;
891         } else if (clut->grid_byte_width[0] == 2) {
892             a2b->grid_8  = nullptr;
893             a2b->grid_16 = clut->variable;
894         } else {
895             return false;
896         }
897 
898         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
899         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
900             a2b->grid_points[i] = clut->grid_points[i];
901             // The grid only makes sense with at least two points along each axis
902             if (a2b->grid_points[i] < 2) {
903                 return false;
904             }
905             grid_size *= a2b->grid_points[i];
906         }
907         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
908             return false;
909         }
910     } else {
911         if (0 != clut_offset) {
912             return false;
913         }
914 
915         // If there is no CLUT, the number of input and output channels must match
916         if (a2b->input_channels != a2b->output_channels) {
917             return false;
918         }
919 
920         // Zero out the number of input channels to signal that we're skipping this stage
921         a2b->input_channels = 0;
922     }
923 
924     return true;
925 }
926 
927 // Exactly the same as read_tag_mab(), except where there are comments.
928 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)929 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
930     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
931         return false;
932     }
933 
934     const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
935 
936     b2a->input_channels  = mBATag->input_channels[0];
937     b2a->output_channels = mBATag->output_channels[0];
938 
939     // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
940     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
941         return false;
942     }
943     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
944         return false;
945     }
946 
947     uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
948     uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
949     uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
950     uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
951     uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
952 
953     if (0 == b_curve_offset) {
954         return false;
955     }
956 
957     // "B" curves are our inputs, not outputs.
958     if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
959                      b2a->input_curves)) {
960         return false;
961     }
962 
963     if (0 != m_curve_offset) {
964         if (0 == matrix_offset) {
965             return false;
966         }
967         // Matrix channels is tied to input_channels (3), not output_channels.
968         b2a->matrix_channels = b2a->input_channels;
969 
970         if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
971                          b2a->matrix_curves)) {
972             return false;
973         }
974 
975         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
976             return false;
977         }
978         float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
979         const uint8_t* mtx_buf = tag->buf + matrix_offset;
980         b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
981         b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
982         b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
983         b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
984         b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
985         b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
986         b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
987         b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
988         b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
989         b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
990         b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
991         b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
992     } else {
993         if (0 != matrix_offset) {
994             return false;
995         }
996         b2a->matrix_channels = 0;
997     }
998 
999     if (0 != a_curve_offset) {
1000         if (0 == clut_offset) {
1001             return false;
1002         }
1003 
1004         // "A" curves are our output, not input.
1005         if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1006                          b2a->output_curves)) {
1007             return false;
1008         }
1009 
1010         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1011             return false;
1012         }
1013         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1014 
1015         if (clut->grid_byte_width[0] == 1) {
1016             b2a->grid_8  = clut->variable;
1017             b2a->grid_16 = nullptr;
1018         } else if (clut->grid_byte_width[0] == 2) {
1019             b2a->grid_8  = nullptr;
1020             b2a->grid_16 = clut->variable;
1021         } else {
1022             return false;
1023         }
1024 
1025         uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1026         for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1027             b2a->grid_points[i] = clut->grid_points[i];
1028             if (b2a->grid_points[i] < 2) {
1029                 return false;
1030             }
1031             grid_size *= b2a->grid_points[i];
1032         }
1033         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1034             return false;
1035         }
1036     } else {
1037         if (0 != clut_offset) {
1038             return false;
1039         }
1040 
1041         if (b2a->input_channels != b2a->output_channels) {
1042             return false;
1043         }
1044 
1045         // Zero out *output* channels to skip this stage.
1046         b2a->output_channels = 0;
1047     }
1048     return true;
1049 }
1050 
1051 // If you pass f, we'll fit a possibly-non-zero value for *f.
1052 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1053 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1054                       float* c, float* d, float* f = nullptr) {
1055     assert(N > 1);
1056     // We iteratively fit the first points to the TF's linear piece.
1057     // We want the cx + f line to pass through the first and last points we fit exactly.
1058     //
1059     // As we walk along the points we find the minimum and maximum slope of the line before the
1060     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
1061     // emtpy, when we definitely can't add any more points.
1062     //
1063     // Some points' error intervals may intersect the running interval but not lie fully
1064     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
1065     // and once the search is done, back up to build the line through *that* point.
1066     const float dx = 1.0f / (N - 1);
1067 
1068     int lin_points = 1;
1069 
1070     float f_zero = 0.0f;
1071     if (f) {
1072         *f = eval_curve(curve, 0);
1073     } else {
1074         f = &f_zero;
1075     }
1076 
1077 
1078     float slope_min = -INFINITY_;
1079     float slope_max = +INFINITY_;
1080     for (int i = 1; i < N; ++i) {
1081         float x = i * dx;
1082         float y = eval_curve(curve, x);
1083 
1084         float slope_max_i = (y + tol - *f) / x,
1085               slope_min_i = (y - tol - *f) / x;
1086         if (slope_max_i < slope_min || slope_max < slope_min_i) {
1087             // Slope intervals would no longer overlap.
1088             break;
1089         }
1090         slope_max = fminf_(slope_max, slope_max_i);
1091         slope_min = fmaxf_(slope_min, slope_min_i);
1092 
1093         float cur_slope = (y - *f) / x;
1094         if (slope_min <= cur_slope && cur_slope <= slope_max) {
1095             lin_points = i + 1;
1096             *c = cur_slope;
1097         }
1098     }
1099 
1100     // Set D to the last point that met our tolerance.
1101     *d = (lin_points - 1) * dx;
1102     return lin_points;
1103 }
1104 
1105 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1106 static void canonicalize_identity(skcms_Curve* curve) {
1107     if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1108         int N = (int)curve->table_entries;
1109 
1110         float c = 0.0f, d = 0.0f, f = 0.0f;
1111         if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
1112             && c == 1.0f
1113             && f == 0.0f) {
1114             curve->table_entries = 0;
1115             curve->table_8       = nullptr;
1116             curve->table_16      = nullptr;
1117             curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
1118         }
1119     }
1120 }
1121 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1122 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1123     bool ok = false;
1124     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1125     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1126     if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1127     if (!ok) {
1128         return false;
1129     }
1130 
1131     if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1132     if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1133     if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1134     if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1135 
1136     if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1137     if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1138     if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1139 
1140     if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1141     if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1142     if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1143 
1144     return true;
1145 }
1146 
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1147 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1148     bool ok = false;
1149     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1150     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1151     if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1152     if (!ok) {
1153         return false;
1154     }
1155 
1156     if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1157     if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1158     if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1159 
1160     if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1161     if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1162     if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1163 
1164     if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1165     if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1166     if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1167     if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1168 
1169     return true;
1170 }
1171 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1172 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1173     if (!profile || !profile->buffer || !tag) { return; }
1174     if (idx > profile->tag_count) { return; }
1175     const tag_Layout* tags = get_tag_table(profile);
1176     tag->signature = read_big_u32(tags[idx].signature);
1177     tag->size      = read_big_u32(tags[idx].size);
1178     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
1179     tag->type      = read_big_u32(tag->buf);
1180 }
1181 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1182 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1183     if (!profile || !profile->buffer || !tag) { return false; }
1184     const tag_Layout* tags = get_tag_table(profile);
1185     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1186         if (read_big_u32(tags[i].signature) == sig) {
1187             tag->signature = sig;
1188             tag->size      = read_big_u32(tags[i].size);
1189             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1190             tag->type      = read_big_u32(tag->buf);
1191             return true;
1192         }
1193     }
1194     return false;
1195 }
1196 
usable_as_src(const skcms_ICCProfile * profile)1197 static bool usable_as_src(const skcms_ICCProfile* profile) {
1198     return profile->has_A2B
1199        || (profile->has_trc && profile->has_toXYZD50);
1200 }
1201 
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1202 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1203                                 const int priority[], const int priorities,
1204                                 skcms_ICCProfile* profile) {
1205     assert(SAFE_SIZEOF(header_Layout) == 132);
1206 
1207     if (!profile) {
1208         return false;
1209     }
1210     memset(profile, 0, SAFE_SIZEOF(*profile));
1211 
1212     if (len < SAFE_SIZEOF(header_Layout)) {
1213         return false;
1214     }
1215 
1216     // Byte-swap all header fields
1217     const header_Layout* header  = (const header_Layout*)buf;
1218     profile->buffer              = (const uint8_t*)buf;
1219     profile->size                = read_big_u32(header->size);
1220     uint32_t version             = read_big_u32(header->version);
1221     profile->data_color_space    = read_big_u32(header->data_color_space);
1222     profile->pcs                 = read_big_u32(header->pcs);
1223     uint32_t signature           = read_big_u32(header->signature);
1224     float illuminant_X           = read_big_fixed(header->illuminant_X);
1225     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1226     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1227     profile->tag_count           = read_big_u32(header->tag_count);
1228 
1229     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1230     // and major version
1231     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1232     if (signature != skcms_Signature_acsp ||
1233         profile->size > len ||
1234         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1235         (version >> 24) > 4) {
1236         return false;
1237     }
1238 
1239     // Validate that illuminant is D50 white
1240     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1241         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1242         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1243         return false;
1244     }
1245 
1246     // Validate that all tag entries have sane offset + size
1247     const tag_Layout* tags = get_tag_table(profile);
1248     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1249         uint32_t tag_offset = read_big_u32(tags[i].offset);
1250         uint32_t tag_size   = read_big_u32(tags[i].size);
1251         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1252         if (tag_size < 4 || tag_end > profile->size) {
1253             return false;
1254         }
1255     }
1256 
1257     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1258         return false;
1259     }
1260 
1261     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1262 
1263     // Pre-parse commonly used tags.
1264     skcms_ICCTag kTRC;
1265     if (profile->data_color_space == skcms_Signature_Gray &&
1266         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1267         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1268             // Malformed tag
1269             return false;
1270         }
1271         profile->trc[1] = profile->trc[0];
1272         profile->trc[2] = profile->trc[0];
1273         profile->has_trc = true;
1274 
1275         if (pcs_is_xyz) {
1276             profile->toXYZD50.vals[0][0] = illuminant_X;
1277             profile->toXYZD50.vals[1][1] = illuminant_Y;
1278             profile->toXYZD50.vals[2][2] = illuminant_Z;
1279             profile->has_toXYZD50 = true;
1280         }
1281     } else {
1282         skcms_ICCTag rTRC, gTRC, bTRC;
1283         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1284             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1285             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1286             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1287                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1288                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1289                 // Malformed TRC tags
1290                 return false;
1291             }
1292             profile->has_trc = true;
1293         }
1294 
1295         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1296         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1297             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1298             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1299             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1300                 // Malformed XYZ tags
1301                 return false;
1302             }
1303             profile->has_toXYZD50 = true;
1304         }
1305     }
1306 
1307     for (int i = 0; i < priorities; i++) {
1308         // enum { perceptual, relative_colormetric, saturation }
1309         if (priority[i] < 0 || priority[i] > 2) {
1310             return false;
1311         }
1312         uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1313         skcms_ICCTag tag;
1314         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1315             if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1316                 // Malformed A2B tag
1317                 return false;
1318             }
1319             profile->has_A2B = true;
1320             break;
1321         }
1322     }
1323 
1324     for (int i = 0; i < priorities; i++) {
1325         // enum { perceptual, relative_colormetric, saturation }
1326         if (priority[i] < 0 || priority[i] > 2) {
1327             return false;
1328         }
1329         uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1330         skcms_ICCTag tag;
1331         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1332             if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1333                 // Malformed B2A tag
1334                 return false;
1335             }
1336             profile->has_B2A = true;
1337             break;
1338         }
1339     }
1340 
1341     return usable_as_src(profile);
1342 }
1343 
1344 
skcms_sRGB_profile()1345 const skcms_ICCProfile* skcms_sRGB_profile() {
1346     static const skcms_ICCProfile sRGB_profile = {
1347         nullptr,               // buffer, moot here
1348 
1349         0,                     // size, moot here
1350         skcms_Signature_RGB,   // data_color_space
1351         skcms_Signature_XYZ,   // pcs
1352         0,                     // tag count, moot here
1353 
1354         // We choose to represent sRGB with its canonical transfer function,
1355         // and with its canonical XYZD50 gamut matrix.
1356         true,  // has_trc, followed by the 3 trc curves
1357         {
1358             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1359             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1360             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1361         },
1362 
1363         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1364         {{
1365             { 0.436065674f, 0.385147095f, 0.143066406f },
1366             { 0.222488403f, 0.716873169f, 0.060607910f },
1367             { 0.013916016f, 0.097076416f, 0.714096069f },
1368         }},
1369 
1370         false, // has_A2B, followed by A2B itself, which we don't care about.
1371         {
1372             0,
1373             {
1374                 {{0, {0,0, 0,0,0,0,0}}},
1375                 {{0, {0,0, 0,0,0,0,0}}},
1376                 {{0, {0,0, 0,0,0,0,0}}},
1377                 {{0, {0,0, 0,0,0,0,0}}},
1378             },
1379             {0,0,0,0},
1380             nullptr,
1381             nullptr,
1382 
1383             0,
1384             {
1385                 {{0, {0,0, 0,0,0,0,0}}},
1386                 {{0, {0,0, 0,0,0,0,0}}},
1387                 {{0, {0,0, 0,0,0,0,0}}},
1388             },
1389             {{
1390                 { 0,0,0,0 },
1391                 { 0,0,0,0 },
1392                 { 0,0,0,0 },
1393             }},
1394 
1395             0,
1396             {
1397                 {{0, {0,0, 0,0,0,0,0}}},
1398                 {{0, {0,0, 0,0,0,0,0}}},
1399                 {{0, {0,0, 0,0,0,0,0}}},
1400             },
1401         },
1402 
1403         false, // has_B2A, followed by B2A itself, which we also don't care about.
1404         {
1405             0,
1406             {
1407                 {{0, {0,0, 0,0,0,0,0}}},
1408                 {{0, {0,0, 0,0,0,0,0}}},
1409                 {{0, {0,0, 0,0,0,0,0}}},
1410             },
1411 
1412             0,
1413             {{
1414                 { 0,0,0,0 },
1415                 { 0,0,0,0 },
1416                 { 0,0,0,0 },
1417             }},
1418             {
1419                 {{0, {0,0, 0,0,0,0,0}}},
1420                 {{0, {0,0, 0,0,0,0,0}}},
1421                 {{0, {0,0, 0,0,0,0,0}}},
1422             },
1423 
1424             0,
1425             {0,0,0,0},
1426             nullptr,
1427             nullptr,
1428             {
1429                 {{0, {0,0, 0,0,0,0,0}}},
1430                 {{0, {0,0, 0,0,0,0,0}}},
1431                 {{0, {0,0, 0,0,0,0,0}}},
1432                 {{0, {0,0, 0,0,0,0,0}}},
1433             },
1434         },
1435     };
1436     return &sRGB_profile;
1437 }
1438 
skcms_XYZD50_profile()1439 const skcms_ICCProfile* skcms_XYZD50_profile() {
1440     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1441     static const skcms_ICCProfile XYZD50_profile = {
1442         nullptr,               // buffer, moot here
1443 
1444         0,                     // size, moot here
1445         skcms_Signature_RGB,   // data_color_space
1446         skcms_Signature_XYZ,   // pcs
1447         0,                     // tag count, moot here
1448 
1449         true,  // has_trc, followed by the 3 trc curves
1450         {
1451             {{0, {1,1, 0,0,0,0,0}}},
1452             {{0, {1,1, 0,0,0,0,0}}},
1453             {{0, {1,1, 0,0,0,0,0}}},
1454         },
1455 
1456         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1457         {{
1458             { 1,0,0 },
1459             { 0,1,0 },
1460             { 0,0,1 },
1461         }},
1462 
1463         false, // has_A2B, followed by A2B itself, which we don't care about.
1464         {
1465             0,
1466             {
1467                 {{0, {0,0, 0,0,0,0,0}}},
1468                 {{0, {0,0, 0,0,0,0,0}}},
1469                 {{0, {0,0, 0,0,0,0,0}}},
1470                 {{0, {0,0, 0,0,0,0,0}}},
1471             },
1472             {0,0,0,0},
1473             nullptr,
1474             nullptr,
1475 
1476             0,
1477             {
1478                 {{0, {0,0, 0,0,0,0,0}}},
1479                 {{0, {0,0, 0,0,0,0,0}}},
1480                 {{0, {0,0, 0,0,0,0,0}}},
1481             },
1482             {{
1483                 { 0,0,0,0 },
1484                 { 0,0,0,0 },
1485                 { 0,0,0,0 },
1486             }},
1487 
1488             0,
1489             {
1490                 {{0, {0,0, 0,0,0,0,0}}},
1491                 {{0, {0,0, 0,0,0,0,0}}},
1492                 {{0, {0,0, 0,0,0,0,0}}},
1493             },
1494         },
1495 
1496         false, // has_B2A, followed by B2A itself, which we also don't care about.
1497         {
1498             0,
1499             {
1500                 {{0, {0,0, 0,0,0,0,0}}},
1501                 {{0, {0,0, 0,0,0,0,0}}},
1502                 {{0, {0,0, 0,0,0,0,0}}},
1503             },
1504 
1505             0,
1506             {{
1507                 { 0,0,0,0 },
1508                 { 0,0,0,0 },
1509                 { 0,0,0,0 },
1510             }},
1511             {
1512                 {{0, {0,0, 0,0,0,0,0}}},
1513                 {{0, {0,0, 0,0,0,0,0}}},
1514                 {{0, {0,0, 0,0,0,0,0}}},
1515             },
1516 
1517             0,
1518             {0,0,0,0},
1519             nullptr,
1520             nullptr,
1521             {
1522                 {{0, {0,0, 0,0,0,0,0}}},
1523                 {{0, {0,0, 0,0,0,0,0}}},
1524                 {{0, {0,0, 0,0,0,0,0}}},
1525                 {{0, {0,0, 0,0,0,0,0}}},
1526             },
1527         },
1528     };
1529 
1530     return &XYZD50_profile;
1531 }
1532 
skcms_sRGB_TransferFunction()1533 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1534     return &skcms_sRGB_profile()->trc[0].parametric;
1535 }
1536 
skcms_sRGB_Inverse_TransferFunction()1537 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1538     static const skcms_TransferFunction sRGB_inv =
1539         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1540     return &sRGB_inv;
1541 }
1542 
skcms_Identity_TransferFunction()1543 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1544     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1545     return &identity;
1546 }
1547 
1548 const uint8_t skcms_252_random_bytes[] = {
1549     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1550     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1551     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1552     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1553     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1554     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1555     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1556     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1557     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1558     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1559     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1560     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1561     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1562     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1563     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1564     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1565 };
1566 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1567 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1568     // Test for exactly equal profiles first.
1569     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1570         return true;
1571     }
1572 
1573     // For now this is the essentially the same strategy we use in test_only.c
1574     // for our skcms_Transform() smoke tests:
1575     //    1) transform A to XYZD50
1576     //    2) transform B to XYZD50
1577     //    3) return true if they're similar enough
1578     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1579 
1580     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1581     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1582 
1583     // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1584     // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1585     const auto CMYK = skcms_Signature_CMYK;
1586     if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1587         return false;
1588     }
1589 
1590     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1591     // TODO: working with RGBA_8888 either way is probably fastest.
1592     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1593     size_t npixels = 84;
1594     if (A->data_color_space == skcms_Signature_CMYK) {
1595         fmt = skcms_PixelFormat_RGBA_8888;
1596         npixels = 63;
1597     }
1598 
1599     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1600     // use pre-canned results and skip that skcms_Transform() call?
1601     uint8_t dstA[252],
1602             dstB[252];
1603     if (!skcms_Transform(
1604                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1605                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1606                 npixels)) {
1607         return false;
1608     }
1609     if (!skcms_Transform(
1610                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1611                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1612                 npixels)) {
1613         return false;
1614     }
1615 
1616     // TODO: make sure this final check has reasonable codegen.
1617     for (size_t i = 0; i < 252; i++) {
1618         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1619             return false;
1620         }
1621     }
1622     return true;
1623 }
1624 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1625 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1626                                       const skcms_TransferFunction* inv_tf) {
1627     if (!profile || !profile->has_trc) {
1628         return false;
1629     }
1630 
1631     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1632            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1633            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1634 }
1635 
is_zero_to_one(float x)1636 static bool is_zero_to_one(float x) {
1637     return 0 <= x && x <= 1;
1638 }
1639 
1640 typedef struct { float vals[3]; } skcms_Vector3;
1641 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1642 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1643     skcms_Vector3 dst = {{0,0,0}};
1644     for (int row = 0; row < 3; ++row) {
1645         dst.vals[row] = m->vals[row][0] * v->vals[0]
1646                       + m->vals[row][1] * v->vals[1]
1647                       + m->vals[row][2] * v->vals[2];
1648     }
1649     return dst;
1650 }
1651 
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1652 bool skcms_AdaptToXYZD50(float wx, float wy,
1653                          skcms_Matrix3x3* toXYZD50) {
1654     if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1655         !toXYZD50) {
1656         return false;
1657     }
1658 
1659     // Assumes that Y is 1.0f.
1660     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1661 
1662     // Now convert toXYZ matrix to toXYZD50.
1663     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1664 
1665     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1666     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1667     // to be the best.
1668     skcms_Matrix3x3 xyz_to_lms = {{
1669         {  0.8951f,  0.2664f, -0.1614f },
1670         { -0.7502f,  1.7135f,  0.0367f },
1671         {  0.0389f, -0.0685f,  1.0296f },
1672     }};
1673     skcms_Matrix3x3 lms_to_xyz = {{
1674         {  0.9869929f, -0.1470543f, 0.1599627f },
1675         {  0.4323053f,  0.5183603f, 0.0492912f },
1676         { -0.0085287f,  0.0400428f, 0.9684867f },
1677     }};
1678 
1679     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1680     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1681 
1682     *toXYZD50 = {{
1683         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1684         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1685         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1686     }};
1687     *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1688     *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1689 
1690     return true;
1691 }
1692 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1693 bool skcms_PrimariesToXYZD50(float rx, float ry,
1694                              float gx, float gy,
1695                              float bx, float by,
1696                              float wx, float wy,
1697                              skcms_Matrix3x3* toXYZD50) {
1698     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1699         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1700         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1701         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1702         !toXYZD50) {
1703         return false;
1704     }
1705 
1706     // First, we need to convert xy values (primaries) to XYZ.
1707     skcms_Matrix3x3 primaries = {{
1708         { rx, gx, bx },
1709         { ry, gy, by },
1710         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1711     }};
1712     skcms_Matrix3x3 primaries_inv;
1713     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1714         return false;
1715     }
1716 
1717     // Assumes that Y is 1.0f.
1718     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1719     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1720 
1721     skcms_Matrix3x3 toXYZ = {{
1722         { XYZ.vals[0],           0,           0 },
1723         {           0, XYZ.vals[1],           0 },
1724         {           0,           0, XYZ.vals[2] },
1725     }};
1726     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1727 
1728     skcms_Matrix3x3 DXtoD50;
1729     if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1730         return false;
1731     }
1732 
1733     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1734     return true;
1735 }
1736 
1737 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1738 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1739     double a00 = src->vals[0][0],
1740            a01 = src->vals[1][0],
1741            a02 = src->vals[2][0],
1742            a10 = src->vals[0][1],
1743            a11 = src->vals[1][1],
1744            a12 = src->vals[2][1],
1745            a20 = src->vals[0][2],
1746            a21 = src->vals[1][2],
1747            a22 = src->vals[2][2];
1748 
1749     double b0 = a00*a11 - a01*a10,
1750            b1 = a00*a12 - a02*a10,
1751            b2 = a01*a12 - a02*a11,
1752            b3 = a20,
1753            b4 = a21,
1754            b5 = a22;
1755 
1756     double determinant = b0*b5
1757                        - b1*b4
1758                        + b2*b3;
1759 
1760     if (determinant == 0) {
1761         return false;
1762     }
1763 
1764     double invdet = 1.0 / determinant;
1765     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1766         return false;
1767     }
1768 
1769     b0 *= invdet;
1770     b1 *= invdet;
1771     b2 *= invdet;
1772     b3 *= invdet;
1773     b4 *= invdet;
1774     b5 *= invdet;
1775 
1776     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1777     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1778     dst->vals[2][0] = (float)(        +     b2 );
1779     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1780     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1781     dst->vals[2][1] = (float)(        -     b1 );
1782     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1783     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1784     dst->vals[2][2] = (float)(        +     b0 );
1785 
1786     for (int r = 0; r < 3; ++r)
1787     for (int c = 0; c < 3; ++c) {
1788         if (!isfinitef_(dst->vals[r][c])) {
1789             return false;
1790         }
1791     }
1792     return true;
1793 }
1794 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1795 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1796     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1797     for (int r = 0; r < 3; r++)
1798         for (int c = 0; c < 3; c++) {
1799             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1800                          + A->vals[r][1] * B->vals[1][c]
1801                          + A->vals[r][2] * B->vals[2][c];
1802         }
1803     return m;
1804 }
1805 
1806 #if defined(__clang__)
1807     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1808 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1809 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1810     TF_PQish  pq;
1811     TF_HLGish hlg;
1812     switch (classify(*src, &pq, &hlg)) {
1813         case Bad: return false;
1814         case sRGBish: break;  // handled below
1815 
1816         case PQish:
1817             *dst = { TFKind_marker(PQish), -pq.A,  pq.D, 1.0f/pq.F
1818                                          ,  pq.B, -pq.E, 1.0f/pq.C};
1819             return true;
1820 
1821         case HLGish:
1822             *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1823                                              , 1.0f/hlg.a, hlg.b, hlg.c
1824                                              , hlg.K_minus_1 };
1825             return true;
1826 
1827         case HLGinvish:
1828             *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1829                                           , 1.0f/hlg.a, hlg.b, hlg.c
1830                                           , hlg.K_minus_1 };
1831             return true;
1832     }
1833 
1834     assert (classify(*src) == sRGBish);
1835 
1836     // We're inverting this function, solving for x in terms of y.
1837     //   y = (cx + f)         x < d
1838     //       (ax + b)^g + e   x ≥ d
1839     // The inverse of this function can be expressed in the same piecewise form.
1840     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1841 
1842     // We'll start by finding the new threshold inv.d.
1843     // In principle we should be able to find that by solving for y at x=d from either side.
1844     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1845     float d_l =       src->c * src->d + src->f,
1846           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1847     if (fabsf_(d_l - d_r) > 1/512.0f) {
1848         return false;
1849     }
1850     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1851 
1852     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1853     if (inv.d > 0) {
1854         // Inverting the linear section is pretty straightfoward:
1855         //        y       = cx + f
1856         //        y - f   = cx
1857         //   (1/c)y - f/c = x
1858         inv.c =    1.0f/src->c;
1859         inv.f = -src->f/src->c;
1860     }
1861 
1862     // The interesting part is inverting the nonlinear section:
1863     //         y                = (ax + b)^g + e.
1864     //         y - e            = (ax + b)^g
1865     //        (y - e)^1/g       =  ax + b
1866     //        (y - e)^1/g - b   =  ax
1867     //   (1/a)(y - e)^1/g - b/a =   x
1868     //
1869     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1870     //   let k = (1/a)^g
1871     //   (1/a)( y -  e)^1/g - b/a = x
1872     //        (ky - ke)^1/g - b/a = x
1873 
1874     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1875     inv.g = 1.0f / src->g;
1876     inv.a = k;
1877     inv.b = -k * src->e;
1878     inv.e = -src->b / src->a;
1879 
1880     // We need to enforce the same constraints here that we do when fitting a curve,
1881     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1882     // of the source function if we're here.
1883 
1884     // Just like when fitting the curve, there's really no way to rescue a < 0.
1885     if (inv.a < 0) {
1886         return false;
1887     }
1888     // On the other hand we can rescue an ad+b that's gone slightly negative here.
1889     if (inv.a * inv.d + inv.b < 0) {
1890         inv.b = -inv.a * inv.d;
1891     }
1892 
1893     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1894     // where we might still fail here, like non-finite parameter values.
1895     if (classify(inv) != sRGBish) {
1896         return false;
1897     }
1898 
1899     assert (inv.a >= 0);
1900     assert (inv.a * inv.d + inv.b >= 0);
1901 
1902     // Now in principle we're done.
1903     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1904     // e or f of the inverse, depending on which segment contains src(1.0f).
1905     float s = skcms_TransferFunction_eval(src, 1.0f);
1906     if (!isfinitef_(s)) {
1907         return false;
1908     }
1909 
1910     float sign = s < 0 ? -1.0f : 1.0f;
1911     s *= sign;
1912     if (s < inv.d) {
1913         inv.f = 1.0f - sign * inv.c * s;
1914     } else {
1915         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1916     }
1917 
1918     *dst = inv;
1919     return classify(*dst) == sRGBish;
1920 }
1921 
1922 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1923 
1924 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1925 //
1926 //   tf(x) =  cx + f          x < d
1927 //   tf(x) = (ax + b)^g + e   x ≥ d
1928 //
1929 // When fitting, we add the additional constraint that both pieces meet at d:
1930 //
1931 //   cd + f = (ad + b)^g + e
1932 //
1933 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1934 //
1935 //   tf(x) =                           cx + f   x < d
1936 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1937 //
1938 // Our overall strategy is then:
1939 //    For a couple tolerances,
1940 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1941 //       - invert c,d,f
1942 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1943 //                          (and by constraint, inverted e) to the inverse of the table.
1944 //    Return the parameters with least maximum error.
1945 //
1946 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1947 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1948 //
1949 //    let y = Table(x)
1950 //    r(x) = x - f_inv(y)
1951 //
1952 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1953 //          - ln(ad + b)*(ad + b)^g
1954 //    ∂r/∂a = yg(ay + b)^(g-1)
1955 //          - dg(ad + b)^(g-1)
1956 //    ∂r/∂b =  g(ay + b)^(g-1)
1957 //          -  g(ad + b)^(g-1)
1958 
1959 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1960 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1961 static float rg_nonlinear(float x,
1962                           const skcms_Curve* curve,
1963                           const skcms_TransferFunction* tf,
1964                           float dfdP[3]) {
1965     const float y = eval_curve(curve, x);
1966 
1967     const float g = tf->g, a = tf->a, b = tf->b,
1968                 c = tf->c, d = tf->d, f = tf->f;
1969 
1970     const float Y = fmaxf_(a*y + b, 0.0f),
1971                 D =        a*d + b;
1972     assert (D >= 0);
1973 
1974     // The gradient.
1975     dfdP[0] = logf_(Y)*powf_(Y, g)
1976             - logf_(D)*powf_(D, g);
1977     dfdP[1] = y*g*powf_(Y, g-1)
1978             - d*g*powf_(D, g-1);
1979     dfdP[2] =   g*powf_(Y, g-1)
1980             -   g*powf_(D, g-1);
1981 
1982     // The residual.
1983     const float f_inv = powf_(Y, g)
1984                       - powf_(D, g)
1985                       + c*d + f;
1986     return x - f_inv;
1987 }
1988 
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1989 static bool gauss_newton_step(const skcms_Curve* curve,
1990                                     skcms_TransferFunction* tf,
1991                               float x0, float dx, int N) {
1992     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1993     //
1994     // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1995     //
1996     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1997     //   where r(P) is the residual vector
1998     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1999     //
2000     // Let's review the shape of each of these expressions:
2001     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
2002     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
2003     //   Jf^T   is [3 x N], the transpose of Jf
2004     //
2005     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2006     //                                              and so is its inverse (Jf^T Jf)^-1
2007     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2008     //
2009     // Our implementation strategy to get to the final ∆P is
2010     //   1) evaluate Jf^T Jf,   call that lhs
2011     //   2) evaluate Jf^T r(P), call that rhs
2012     //   3) invert lhs
2013     //   4) multiply inverse lhs by rhs
2014     //
2015     // This is a friendly implementation strategy because we don't have to have any
2016     // buffers that scale with N, and equally nice don't have to perform any matrix
2017     // operations that are variable size.
2018     //
2019     // Other implementation strategies could trade this off, e.g. evaluating the
2020     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2021     // the residuals.  That would probably require implementing singular value
2022     // decomposition, and would create a [3 x N] matrix to be multiplied by the
2023     // [N x 1] residual vector, but on the upside I think that'd eliminate the
2024     // possibility of this gauss_newton_step() function ever failing.
2025 
2026     // 0) start off with lhs and rhs safely zeroed.
2027     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2028     skcms_Vector3   rhs = {  {0,0,0} };
2029 
2030     // 1,2) evaluate lhs and evaluate rhs
2031     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2032     //   so we'll have to update lhs and rhs at the same time.
2033     for (int i = 0; i < N; i++) {
2034         float x = x0 + i*dx;
2035 
2036         float dfdP[3] = {0,0,0};
2037         float resid = rg_nonlinear(x,curve,tf, dfdP);
2038 
2039         for (int r = 0; r < 3; r++) {
2040             for (int c = 0; c < 3; c++) {
2041                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2042             }
2043             rhs.vals[r] += dfdP[r] * resid;
2044         }
2045     }
2046 
2047     // If any of the 3 P parameters are unused, this matrix will be singular.
2048     // Detect those cases and fix them up to indentity instead, so we can invert.
2049     for (int k = 0; k < 3; k++) {
2050         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2051             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2052             lhs.vals[k][k] = 1;
2053         }
2054     }
2055 
2056     // 3) invert lhs
2057     skcms_Matrix3x3 lhs_inv;
2058     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2059         return false;
2060     }
2061 
2062     // 4) multiply inverse lhs by rhs
2063     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2064     tf->g += dP.vals[0];
2065     tf->a += dP.vals[1];
2066     tf->b += dP.vals[2];
2067     return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2068 }
2069 
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2070 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2071                                          const skcms_TransferFunction* tf_inv) {
2072     skcms_TransferFunction tf;
2073     if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
2074         return INFINITY_;
2075     }
2076 
2077     skcms_TransferFunction tf_inv_again;
2078     if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2079         return INFINITY_;
2080     }
2081 
2082     return skcms_MaxRoundtripError(curve, &tf_inv_again);
2083 }
2084 
2085 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2086 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2087     // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2088     auto fixup_tf = [tf]() {
2089         // a must be non-negative. That ensures the function is monotonically increasing.
2090         // We don't really know how to fix up a if it goes negative.
2091         if (tf->a < 0) {
2092             return false;
2093         }
2094         // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2095         // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2096         if (tf->a * tf->d + tf->b < 0) {
2097             tf->b = -tf->a * tf->d;
2098         }
2099         assert (tf->a >= 0 &&
2100                 tf->a * tf->d + tf->b >= 0);
2101 
2102         // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2103         // parameter so we can guarantee this.
2104         tf->e =   tf->c*tf->d + tf->f
2105           - powf_(tf->a*tf->d + tf->b, tf->g);
2106 
2107         return true;
2108     };
2109 
2110     if (!fixup_tf()) {
2111         return false;
2112     }
2113 
2114     // No matter where we start, dx should always represent N even steps from 0 to 1.
2115     const float dx = 1.0f / (N-1);
2116 
2117     skcms_TransferFunction best_tf = *tf;
2118     float best_max_error = INFINITY_;
2119 
2120     // Need this or several curves get worse... *sigh*
2121     float init_error = max_roundtrip_error_checked(curve, tf);
2122     if (init_error < best_max_error) {
2123         best_max_error = init_error;
2124         best_tf = *tf;
2125     }
2126 
2127     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2128     for (int j = 0; j < 8; j++) {
2129         if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
2130             *tf = best_tf;
2131             return isfinitef_(best_max_error);
2132         }
2133 
2134         float max_error = max_roundtrip_error_checked(curve, tf);
2135         if (max_error < best_max_error) {
2136             best_max_error = max_error;
2137             best_tf = *tf;
2138         }
2139     }
2140 
2141     *tf = best_tf;
2142     return isfinitef_(best_max_error);
2143 }
2144 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2145 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2146                             skcms_TransferFunction* approx,
2147                             float* max_error) {
2148     if (!curve || !approx || !max_error) {
2149         return false;
2150     }
2151 
2152     if (curve->table_entries == 0) {
2153         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2154         return false;
2155     }
2156 
2157     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2158         // We need at least two points, and must put some reasonable cap on the maximum number.
2159         return false;
2160     }
2161 
2162     int N = (int)curve->table_entries;
2163     const float dx = 1.0f / (N - 1);
2164 
2165     *max_error = INFINITY_;
2166     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2167     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2168         skcms_TransferFunction tf,
2169                                tf_inv;
2170 
2171         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2172         tf.f = 0.0f;
2173         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2174 
2175         if (L == N) {
2176             // If the entire data set was linear, move the coefficients to the nonlinear portion
2177             // with G == 1.  This lets use a canonical representation with d == 0.
2178             tf.g = 1;
2179             tf.a = tf.c;
2180             tf.b = tf.f;
2181             tf.c = tf.d = tf.e = tf.f = 0;
2182         } else if (L == N - 1) {
2183             // Degenerate case with only two points in the nonlinear segment. Solve directly.
2184             tf.g = 1;
2185             tf.a = (eval_curve(curve, (N-1)*dx) -
2186                     eval_curve(curve, (N-2)*dx))
2187                  / dx;
2188             tf.b = eval_curve(curve, (N-2)*dx)
2189                  - tf.a * (N-2)*dx;
2190             tf.e = 0;
2191         } else {
2192             // Start by guessing a gamma-only curve through the midpoint.
2193             int mid = (L + N) / 2;
2194             float mid_x = mid / (N - 1.0f);
2195             float mid_y = eval_curve(curve, mid_x);
2196             tf.g = log2f_(mid_y) / log2f_(mid_x);
2197             tf.a = 1;
2198             tf.b = 0;
2199             tf.e =    tf.c*tf.d + tf.f
2200               - powf_(tf.a*tf.d + tf.b, tf.g);
2201 
2202 
2203             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2204                 !fit_nonlinear(curve, L,N, &tf_inv)) {
2205                 continue;
2206             }
2207 
2208             // We fit tf_inv, so calculate tf to keep in sync.
2209             // fit_nonlinear() should guarantee invertibility.
2210             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2211                 assert(false);
2212                 continue;
2213             }
2214         }
2215 
2216         // We'd better have a sane, sRGB-ish TF by now.
2217         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2218         // anything else is just some accident of math and the way we pun tf.g as a type flag.
2219         // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2220         if (sRGBish != classify(tf)) {
2221             continue;
2222         }
2223 
2224         // We find our error by roundtripping the table through tf_inv.
2225         //
2226         // (The most likely use case for this approximation is to be inverted and
2227         // used as the transfer function for a destination color space.)
2228         //
2229         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2230         // invertible, so re-verify that here (and use the new inverse for testing).
2231         // fit_nonlinear() should guarantee this, but the special cases that don't use
2232         // it may fail this test.
2233         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2234             continue;
2235         }
2236 
2237         float err = skcms_MaxRoundtripError(curve, &tf_inv);
2238         if (*max_error > err) {
2239             *max_error = err;
2240             *approx    = tf;
2241         }
2242     }
2243     return isfinitef_(*max_error);
2244 }
2245 
2246 // ~~~~ Impl. of skcms_Transform() ~~~~
2247 
2248 typedef enum {
2249     Op_load_a8,
2250     Op_load_g8,
2251     Op_load_8888_palette8,
2252     Op_load_4444,
2253     Op_load_565,
2254     Op_load_888,
2255     Op_load_8888,
2256     Op_load_1010102,
2257     Op_load_161616LE,
2258     Op_load_16161616LE,
2259     Op_load_161616BE,
2260     Op_load_16161616BE,
2261     Op_load_hhh,
2262     Op_load_hhhh,
2263     Op_load_fff,
2264     Op_load_ffff,
2265 
2266     Op_swap_rb,
2267     Op_clamp,
2268     Op_invert,
2269     Op_force_opaque,
2270     Op_premul,
2271     Op_unpremul,
2272     Op_matrix_3x3,
2273     Op_matrix_3x4,
2274 
2275     Op_lab_to_xyz,
2276     Op_xyz_to_lab,
2277 
2278     Op_tf_r,
2279     Op_tf_g,
2280     Op_tf_b,
2281     Op_tf_a,
2282 
2283     Op_pq_r,
2284     Op_pq_g,
2285     Op_pq_b,
2286     Op_pq_a,
2287 
2288     Op_hlg_r,
2289     Op_hlg_g,
2290     Op_hlg_b,
2291     Op_hlg_a,
2292 
2293     Op_hlginv_r,
2294     Op_hlginv_g,
2295     Op_hlginv_b,
2296     Op_hlginv_a,
2297 
2298     Op_table_r,
2299     Op_table_g,
2300     Op_table_b,
2301     Op_table_a,
2302 
2303     Op_clut_A2B,
2304     Op_clut_B2A,
2305 
2306     Op_store_a8,
2307     Op_store_g8,
2308     Op_store_4444,
2309     Op_store_565,
2310     Op_store_888,
2311     Op_store_8888,
2312     Op_store_1010102,
2313     Op_store_161616LE,
2314     Op_store_16161616LE,
2315     Op_store_161616BE,
2316     Op_store_16161616BE,
2317     Op_store_hhh,
2318     Op_store_hhhh,
2319     Op_store_fff,
2320     Op_store_ffff,
2321 } Op;
2322 
2323 #if defined(__clang__)
2324     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2325 #elif defined(__GNUC__)
2326     // For some reason GCC accepts this nonsense, but not the more straightforward version,
2327     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2328     template <int N, typename T>
2329     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2330 
2331     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2332 #endif
2333 
2334 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2335 
2336 namespace baseline {
2337 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2338                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2339     #define N 1
2340     template <typename T> using V = T;
2341     using Color = float;
2342 #elif defined(__AVX512F__)
2343     #define N 16
2344     template <typename T> using V = Vec<N,T>;
2345     using Color = float;
2346 #elif defined(__AVX__)
2347     #define N 8
2348     template <typename T> using V = Vec<N,T>;
2349     using Color = float;
2350 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2351     #define N 8
2352     template <typename T> using V = Vec<N,T>;
2353     using Color = _Float16;
2354 #else
2355     #define N 4
2356     template <typename T> using V = Vec<N,T>;
2357     using Color = float;
2358 #endif
2359 
2360     #include "src/Transform_inl.h"
2361     #undef N
2362 }
2363 
2364 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2365 #if !defined(SKCMS_PORTABLE) &&                           \
2366     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
2367         (( defined(__clang__) && __clang_major__ >= 5) || \
2368          (!defined(__clang__) && defined(__GNUC__)))      \
2369      && defined(__x86_64__)
2370 
2371     #if !defined(__AVX2__)
2372         #if defined(__clang__)
2373             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2374         #elif defined(__GNUC__)
2375             #pragma GCC push_options
2376             #pragma GCC target("avx2,f16c")
2377         #endif
2378 
2379         namespace hsw {
2380             #define USING_AVX
2381             #define USING_AVX_F16C
2382             #define USING_AVX2
2383             #define N 8
2384             template <typename T> using V = Vec<N,T>;
2385             using Color = float;
2386 
2387             #include "src/Transform_inl.h"
2388 
2389             // src/Transform_inl.h will undefine USING_* for us.
2390             #undef N
2391         }
2392 
2393         #if defined(__clang__)
2394             #pragma clang attribute pop
2395         #elif defined(__GNUC__)
2396             #pragma GCC pop_options
2397         #endif
2398 
2399         #define TEST_FOR_HSW
2400     #endif
2401 
2402     #if !defined(__AVX512F__)
2403         #if defined(__clang__)
2404             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2405         #elif defined(__GNUC__)
2406             #pragma GCC push_options
2407             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2408         #endif
2409 
2410         namespace skx {
2411             #define USING_AVX512F
2412             #define N 16
2413             template <typename T> using V = Vec<N,T>;
2414             using Color = float;
2415 
2416             #include "src/Transform_inl.h"
2417 
2418             // src/Transform_inl.h will undefine USING_* for us.
2419             #undef N
2420         }
2421 
2422         #if defined(__clang__)
2423             #pragma clang attribute pop
2424         #elif defined(__GNUC__)
2425             #pragma GCC pop_options
2426         #endif
2427 
2428         #define TEST_FOR_SKX
2429     #endif
2430 
2431     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2432         enum class CpuType { None, HSW, SKX };
cpu_type()2433         static CpuType cpu_type() {
2434             static const CpuType type = []{
2435                 if (!runtime_cpu_detection) {
2436                     return CpuType::None;
2437                 }
2438                 // See http://www.sandpile.org/x86/cpuid.htm
2439 
2440                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2441                 uint32_t eax, ebx, ecx, edx;
2442                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2443                                              : "0"(1), "2"(0));
2444                 if ((edx & (1u<<25)) &&  // SSE
2445                     (edx & (1u<<26)) &&  // SSE2
2446                     (ecx & (1u<< 0)) &&  // SSE3
2447                     (ecx & (1u<< 9)) &&  // SSSE3
2448                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2449                     (ecx & (1u<<19)) &&  // SSE4.1
2450                     (ecx & (1u<<20)) &&  // SSE4.2
2451                     (ecx & (1u<<26)) &&  // XSAVE
2452                     (ecx & (1u<<27)) &&  // OSXSAVE
2453                     (ecx & (1u<<28)) &&  // AVX
2454                     (ecx & (1u<<29))) {  // F16C
2455 
2456                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2457                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2458                                                  : "0"(7), "2"(0));
2459                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2460                     uint32_t xcr0, dont_need_edx;
2461                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2462 
2463                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2464                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
2465                         (ebx  & (1u<<5))) {  // AVX2
2466                         // At this point we're at least HSW.  Continue checking for SKX.
2467                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2468                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2469                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2470                             (ebx  & (1u<<16)) && // AVX512F
2471                             (ebx  & (1u<<17)) && // AVX512DQ
2472                             (ebx  & (1u<<28)) && // AVX512CD
2473                             (ebx  & (1u<<30)) && // AVX512BW
2474                             (ebx  & (1u<<31))) { // AVX512VL
2475                             return CpuType::SKX;
2476                         }
2477                         return CpuType::HSW;
2478                     }
2479                 }
2480                 return CpuType::None;
2481             }();
2482             return type;
2483         }
2484     #endif
2485 
2486 #endif
2487 
2488 typedef struct {
2489     Op          op;
2490     const void* arg;
2491 } OpAndArg;
2492 
select_curve_op(const skcms_Curve * curve,int channel)2493 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2494     static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2495         { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2496         { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2497         { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2498         { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2499     };
2500     const auto& op = ops[channel];
2501 
2502     if (curve->table_entries == 0) {
2503         const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2504 
2505         const skcms_TransferFunction& tf = curve->parametric;
2506 
2507         if (tf.g == 1 && tf.a == 1 &&
2508             tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2509             return noop;
2510         }
2511 
2512         switch (classify(tf)) {
2513             case Bad:        return noop;
2514             case sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2515             case PQish:      return OpAndArg{op.PQish,     &tf};
2516             case HLGish:     return OpAndArg{op.HLGish,    &tf};
2517             case HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2518         }
2519     }
2520     return OpAndArg{op.table, curve};
2521 }
2522 
bytes_per_pixel(skcms_PixelFormat fmt)2523 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2524     switch (fmt >> 1) {   // ignore rgb/bgr
2525         case skcms_PixelFormat_A_8                >> 1: return  1;
2526         case skcms_PixelFormat_G_8                >> 1: return  1;
2527         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2528         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2529         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2530         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2531         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2532         case skcms_PixelFormat_RGBA_8888_sRGB     >> 1: return  4;
2533         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2534         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2535         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2536         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2537         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2538         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2539         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2540         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2541         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2542         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2543         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2544     }
2545     assert(false);
2546     return 0;
2547 }
2548 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2549 static bool prep_for_destination(const skcms_ICCProfile* profile,
2550                                  skcms_Matrix3x3* fromXYZD50,
2551                                  skcms_TransferFunction* invR,
2552                                  skcms_TransferFunction* invG,
2553                                  skcms_TransferFunction* invB) {
2554     // skcms_Transform() supports B2A destinations...
2555     if (profile->has_B2A) { return true; }
2556     // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2557     return profile->has_trc
2558         && profile->has_toXYZD50
2559         && profile->trc[0].table_entries == 0
2560         && profile->trc[1].table_entries == 0
2561         && profile->trc[2].table_entries == 0
2562         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2563         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2564         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2565         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2566 }
2567 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2568 bool skcms_Transform(const void*             src,
2569                      skcms_PixelFormat       srcFmt,
2570                      skcms_AlphaFormat       srcAlpha,
2571                      const skcms_ICCProfile* srcProfile,
2572                      void*                   dst,
2573                      skcms_PixelFormat       dstFmt,
2574                      skcms_AlphaFormat       dstAlpha,
2575                      const skcms_ICCProfile* dstProfile,
2576                      size_t                  npixels) {
2577     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2578                                       dst, dstFmt, dstAlpha, dstProfile,
2579                                       npixels, nullptr);
2580 }
2581 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2582 bool skcms_TransformWithPalette(const void*             src,
2583                                 skcms_PixelFormat       srcFmt,
2584                                 skcms_AlphaFormat       srcAlpha,
2585                                 const skcms_ICCProfile* srcProfile,
2586                                 void*                   dst,
2587                                 skcms_PixelFormat       dstFmt,
2588                                 skcms_AlphaFormat       dstAlpha,
2589                                 const skcms_ICCProfile* dstProfile,
2590                                 size_t                  nz,
2591                                 const void*             palette) {
2592     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2593                  src_bpp = bytes_per_pixel(srcFmt);
2594     // Let's just refuse if the request is absurdly big.
2595     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2596         return false;
2597     }
2598     int n = (int)nz;
2599 
2600     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2601     if (!srcProfile) {
2602         srcProfile = skcms_sRGB_profile();
2603     }
2604     if (!dstProfile) {
2605         dstProfile = skcms_sRGB_profile();
2606     }
2607 
2608     // We can't transform in place unless the PixelFormats are the same size.
2609     if (dst == src && dst_bpp != src_bpp) {
2610         return false;
2611     }
2612     // TODO: more careful alias rejection (like, dst == src + 1)?
2613 
2614     if (needs_palette(srcFmt) && !palette) {
2615         return false;
2616     }
2617 
2618     Op          program  [32];
2619     const void* arguments[32];
2620 
2621     Op*          ops  = program;
2622     const void** args = arguments;
2623 
2624     // These are always parametric curves of some sort.
2625     skcms_Curve dst_curves[3];
2626     dst_curves[0].table_entries =
2627     dst_curves[1].table_entries =
2628     dst_curves[2].table_entries = 0;
2629 
2630     skcms_Matrix3x3        from_xyz;
2631 
2632     switch (srcFmt >> 1) {
2633         default: return false;
2634         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2635         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2636         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2637         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2638         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2639         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2640         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2641         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2642         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2643         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2644         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2645         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2646         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2647         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2648         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2649         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2650         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2651 
2652         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2653                                                         *args++ = palette;
2654                                                         break;
2655         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2656             *ops++ = Op_load_8888;
2657             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_TransferFunction();
2658             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_TransferFunction();
2659             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_TransferFunction();
2660             break;
2661     }
2662     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2663         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2664         *ops++ = Op_clamp;
2665     }
2666     if (srcFmt & 1) {
2667         *ops++ = Op_swap_rb;
2668     }
2669     skcms_ICCProfile gray_dst_profile;
2670     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2671         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2672         // luminance (Y) by the destination transfer function.
2673         gray_dst_profile = *dstProfile;
2674         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2675         dstProfile = &gray_dst_profile;
2676     }
2677 
2678     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2679         // Photoshop creates CMYK images as inverse CMYK.
2680         // These happen to be the only ones we've _ever_ seen.
2681         *ops++ = Op_invert;
2682         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2683         srcAlpha = skcms_AlphaFormat_Unpremul;
2684     }
2685 
2686     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2687         *ops++ = Op_force_opaque;
2688     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2689         *ops++ = Op_unpremul;
2690     }
2691 
2692     if (dstProfile != srcProfile) {
2693 
2694         if (!prep_for_destination(dstProfile,
2695                                   &from_xyz,
2696                                   &dst_curves[0].parametric,
2697                                   &dst_curves[1].parametric,
2698                                   &dst_curves[2].parametric)) {
2699             return false;
2700         }
2701 
2702         if (srcProfile->has_A2B) {
2703             if (srcProfile->A2B.input_channels) {
2704                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2705                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2706                     if (oa.arg) {
2707                         *ops++  = oa.op;
2708                         *args++ = oa.arg;
2709                     }
2710                 }
2711                 *ops++  = Op_clamp;
2712                 *ops++  = Op_clut_A2B;
2713                 *args++ = &srcProfile->A2B;
2714             }
2715 
2716             if (srcProfile->A2B.matrix_channels == 3) {
2717                 for (int i = 0; i < 3; i++) {
2718                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2719                     if (oa.arg) {
2720                         *ops++  = oa.op;
2721                         *args++ = oa.arg;
2722                     }
2723                 }
2724 
2725                 static const skcms_Matrix3x4 I = {{
2726                     {1,0,0,0},
2727                     {0,1,0,0},
2728                     {0,0,1,0},
2729                 }};
2730                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2731                     *ops++  = Op_matrix_3x4;
2732                     *args++ = &srcProfile->A2B.matrix;
2733                 }
2734             }
2735 
2736             if (srcProfile->A2B.output_channels == 3) {
2737                 for (int i = 0; i < 3; i++) {
2738                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2739                     if (oa.arg) {
2740                         *ops++  = oa.op;
2741                         *args++ = oa.arg;
2742                     }
2743                 }
2744             }
2745 
2746             if (srcProfile->pcs == skcms_Signature_Lab) {
2747                 *ops++ = Op_lab_to_xyz;
2748             }
2749 
2750         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2751             for (int i = 0; i < 3; i++) {
2752                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2753                 if (oa.arg) {
2754                     *ops++  = oa.op;
2755                     *args++ = oa.arg;
2756                 }
2757             }
2758         } else {
2759             return false;
2760         }
2761 
2762         // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2763         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2764 
2765         if (dstProfile->has_B2A) {
2766             // B2A needs its input in XYZD50, so transform TRC sources now.
2767             if (!srcProfile->has_A2B) {
2768                 *ops++  = Op_matrix_3x3;
2769                 *args++ = &srcProfile->toXYZD50;
2770             }
2771 
2772             if (dstProfile->pcs == skcms_Signature_Lab) {
2773                 *ops++ = Op_xyz_to_lab;
2774             }
2775 
2776             if (dstProfile->B2A.input_channels == 3) {
2777                 for (int i = 0; i < 3; i++) {
2778                     OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
2779                     if (oa.arg) {
2780                         *ops++  = oa.op;
2781                         *args++ = oa.arg;
2782                     }
2783                 }
2784             }
2785 
2786             if (dstProfile->B2A.matrix_channels == 3) {
2787                 static const skcms_Matrix3x4 I = {{
2788                     {1,0,0,0},
2789                     {0,1,0,0},
2790                     {0,0,1,0},
2791                 }};
2792                 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2793                     *ops++  = Op_matrix_3x4;
2794                     *args++ = &dstProfile->B2A.matrix;
2795                 }
2796 
2797                 for (int i = 0; i < 3; i++) {
2798                     OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
2799                     if (oa.arg) {
2800                         *ops++  = oa.op;
2801                         *args++ = oa.arg;
2802                     }
2803                 }
2804             }
2805 
2806             if (dstProfile->B2A.output_channels) {
2807                 *ops++  = Op_clamp;
2808                 *ops++  = Op_clut_B2A;
2809                 *args++ = &dstProfile->B2A;
2810                 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2811                     OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
2812                     if (oa.arg) {
2813                         *ops++  = oa.op;
2814                         *args++ = oa.arg;
2815                     }
2816                 }
2817             }
2818         } else {
2819             // This is a TRC destination.
2820             // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2821             // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2822             static const skcms_Matrix3x3 I = {{
2823                 { 1.0f, 0.0f, 0.0f },
2824                 { 0.0f, 1.0f, 0.0f },
2825                 { 0.0f, 0.0f, 1.0f },
2826             }};
2827             const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2828 
2829             // There's a chance the source and destination gamuts are identical,
2830             // in which case we can skip the gamut transform.
2831             if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2832                 // Concat the entire gamut transform into from_xyz,
2833                 // now slightly misnamed but it's a handy spot to stash the result.
2834                 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2835                 *ops++  = Op_matrix_3x3;
2836                 *args++ = &from_xyz;
2837             }
2838 
2839             // Encode back to dst RGB using its parametric transfer functions.
2840             for (int i = 0; i < 3; i++) {
2841                 OpAndArg oa = select_curve_op(dst_curves+i, i);
2842                 if (oa.arg) {
2843                     assert (oa.op != Op_table_r &&
2844                             oa.op != Op_table_g &&
2845                             oa.op != Op_table_b &&
2846                             oa.op != Op_table_a);
2847                     *ops++  = oa.op;
2848                     *args++ = oa.arg;
2849                 }
2850             }
2851         }
2852     }
2853 
2854     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2855     // not just to values that fit in [0,1].
2856     //
2857     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2858     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2859     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2860         *ops++ = Op_clamp;
2861     }
2862 
2863     if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2864         // Photoshop creates CMYK images as inverse CMYK.
2865         // These happen to be the only ones we've _ever_ seen.
2866         *ops++ = Op_invert;
2867 
2868         // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2869         dstAlpha = skcms_AlphaFormat_Unpremul;
2870     }
2871 
2872     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2873         *ops++ = Op_force_opaque;
2874     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2875         *ops++ = Op_premul;
2876     }
2877     if (dstFmt & 1) {
2878         *ops++ = Op_swap_rb;
2879     }
2880     switch (dstFmt >> 1) {
2881         default: return false;
2882         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2883         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2884         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2885         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2886         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2887         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2888         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2889         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2890         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2891         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2892         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2893         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2894         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2895         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2896         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2897         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2898         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2899 
2900         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2901             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2902             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2903             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2904             *ops++ = Op_store_8888;
2905             break;
2906     }
2907 
2908     auto run = baseline::run_program;
2909 #if defined(TEST_FOR_HSW)
2910     switch (cpu_type()) {
2911         case CpuType::None:                        break;
2912         case CpuType::HSW: run = hsw::run_program; break;
2913         case CpuType::SKX: run = hsw::run_program; break;
2914     }
2915 #endif
2916 #if defined(TEST_FOR_SKX)
2917     switch (cpu_type()) {
2918         case CpuType::None:                        break;
2919         case CpuType::HSW:                         break;
2920         case CpuType::SKX: run = skx::run_program; break;
2921     }
2922 #endif
2923     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2924     return true;
2925 }
2926 
assert_usable_as_destination(const skcms_ICCProfile * profile)2927 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2928 #if defined(NDEBUG)
2929     (void)profile;
2930 #else
2931     skcms_Matrix3x3 fromXYZD50;
2932     skcms_TransferFunction invR, invG, invB;
2933     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2934 #endif
2935 }
2936 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2937 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2938     if (!profile->has_B2A) {
2939         skcms_Matrix3x3 fromXYZD50;
2940         if (!profile->has_trc || !profile->has_toXYZD50
2941             || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2942             return false;
2943         }
2944 
2945         skcms_TransferFunction tf[3];
2946         for (int i = 0; i < 3; i++) {
2947             skcms_TransferFunction inv;
2948             if (profile->trc[i].table_entries == 0
2949                 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2950                 tf[i] = profile->trc[i].parametric;
2951                 continue;
2952             }
2953 
2954             float max_error;
2955             // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2956             if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2957                 return false;
2958             }
2959         }
2960 
2961         for (int i = 0; i < 3; ++i) {
2962             profile->trc[i].table_entries = 0;
2963             profile->trc[i].parametric = tf[i];
2964         }
2965     }
2966     assert_usable_as_destination(profile);
2967     return true;
2968 }
2969 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2970 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2971     // Call skcms_MakeUsableAsDestination() with B2A disabled;
2972     // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2973     skcms_ICCProfile result = *profile;
2974     result.has_B2A = false;
2975     if (!skcms_MakeUsableAsDestination(&result)) {
2976         return false;
2977     }
2978 
2979     // Of the three, pick the transfer function that best fits the other two.
2980     int best_tf = 0;
2981     float min_max_error = INFINITY_;
2982     for (int i = 0; i < 3; i++) {
2983         skcms_TransferFunction inv;
2984         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2985             return false;
2986         }
2987 
2988         float err = 0;
2989         for (int j = 0; j < 3; ++j) {
2990             err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2991         }
2992         if (min_max_error > err) {
2993             min_max_error = err;
2994             best_tf = i;
2995         }
2996     }
2997 
2998     for (int i = 0; i < 3; i++) {
2999         result.trc[i].parametric = result.trc[best_tf].parametric;
3000     }
3001 
3002     *profile = result;
3003     assert_usable_as_destination(profile);
3004     return true;
3005 }
3006