1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef Sk4px_DEFINED 9 #define Sk4px_DEFINED 10 11 #include "SkNx.h" 12 #include "SkColor.h" 13 14 // 1, 2 or 4 SkPMColors, generally vectorized. 15 class Sk4px : public Sk16b { 16 public: Sk4px(SkAlpha a)17 Sk4px(SkAlpha a) : INHERITED(a) {} // Duplicate 16x: a -> aaaa aaaa aaaa aaaa 18 Sk4px(SkPMColor); // Duplicate 4x: argb -> argb argb argb argb Sk4px(const Sk16b & v)19 Sk4px(const Sk16b& v) : INHERITED(v) {} 20 21 Sk4px alphas() const; // ARGB argb XYZW xyzw -> AAAA aaaa XXXX xxxx 22 23 // Mask away color or alpha lanes. 24 Sk4px zeroColors() const; // ARGB argb XYZW xyzw -> A000 a000 X000 x000 25 Sk4px zeroAlphas() const; // ARGB argb XYZW xyzw -> 0RGB 0rgb 0YZW 0yzw 26 inv()27 Sk4px inv() const { return Sk16b(255) - *this; } 28 29 // When loading or storing fewer than 4 SkPMColors, we use the low lanes. 30 static Sk4px Load4(const SkPMColor[4]); // PMColor[4] -> ARGB argb XYZW xyzw 31 static Sk4px Load2(const SkPMColor[2]); // PMColor[2] -> ARGB argb ???? ???? 32 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ???? 33 34 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px. 35 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx 36 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ???? 37 38 void store4(SkPMColor[4]) const; 39 void store2(SkPMColor[2]) const; 40 void store1(SkPMColor[1]) const; 41 42 // 1, 2, or 4 SkPMColors with 16-bit components. 43 // This is most useful as the result of a multiply, e.g. from mulWiden(). 44 class Wide : public Sk16h { 45 public: Wide(const Sk16h & v)46 Wide(const Sk16h& v) : Sk16h(v) {} 47 48 // Pack the top byte of each component back down into 4 SkPMColors. 49 Sk4px addNarrowHi(const Sk16h&) const; 50 div255TruncNarrow()51 Sk4px div255TruncNarrow() const { return this->addNarrowHi(*this >> 8); } div255RoundNarrow()52 Sk4px div255RoundNarrow() const { 53 return Sk4px::Wide(*this + Sk16h(128)).div255TruncNarrow(); 54 } 55 56 private: 57 typedef Sk16h INHERITED; 58 }; 59 60 Wide widenLo() const; // ARGB -> 0A 0R 0G 0B 61 Wide widenHi() const; // ARGB -> A0 R0 G0 B0 62 Wide mulWiden(const Sk16b&) const; // 8-bit x 8-bit -> 16-bit components. mul255Widen()63 Wide mul255Widen() const { 64 // TODO: x*255 = x*256-x, so something like this->widenHi() - this->widenLo()? 65 return this->mulWiden(Sk16b(255)); 66 } 67 68 // A generic driver that maps fn over a src array into a dst array. 69 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels). 70 template <typename Fn> MapSrc(int count,SkPMColor * dst,const SkPMColor * src,Fn fn)71 static void MapSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn) { 72 // This looks a bit odd, but it helps loop-invariant hoisting across different calls to fn. 73 // Basically, we need to make sure we keep things inside a single loop. 74 while (count > 0) { 75 if (count >= 8) { 76 Sk4px dst0 = fn(Load4(src+0)), 77 dst4 = fn(Load4(src+4)); 78 dst0.store4(dst+0); 79 dst4.store4(dst+4); 80 dst += 8; src += 8; count -= 8; 81 continue; // Keep our stride at 8 pixels as long as possible. 82 } 83 SkASSERT(count <= 7); 84 if (count >= 4) { 85 fn(Load4(src)).store4(dst); 86 dst += 4; src += 4; count -= 4; 87 } 88 if (count >= 2) { 89 fn(Load2(src)).store2(dst); 90 dst += 2; src += 2; count -= 2; 91 } 92 if (count >= 1) { 93 fn(Load1(src)).store1(dst); 94 } 95 break; 96 } 97 } 98 99 // As above, but with dst4' = fn(dst4, src4). 100 template <typename Fn> MapDstSrc(int count,SkPMColor * dst,const SkPMColor * src,Fn fn)101 static void MapDstSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn) { 102 while (count > 0) { 103 if (count >= 8) { 104 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), 105 dst4 = fn(Load4(dst+4), Load4(src+4)); 106 dst0.store4(dst+0); 107 dst4.store4(dst+4); 108 dst += 8; src += 8; count -= 8; 109 continue; // Keep our stride at 8 pixels as long as possible. 110 } 111 SkASSERT(count <= 7); 112 if (count >= 4) { 113 fn(Load4(dst), Load4(src)).store4(dst); 114 dst += 4; src += 4; count -= 4; 115 } 116 if (count >= 2) { 117 fn(Load2(dst), Load2(src)).store2(dst); 118 dst += 2; src += 2; count -= 2; 119 } 120 if (count >= 1) { 121 fn(Load1(dst), Load1(src)).store1(dst); 122 } 123 break; 124 } 125 } 126 127 // As above, but with dst4' = fn(dst4, src4, alpha4). 128 template <typename Fn> MapDstSrcAlpha(int count,SkPMColor * dst,const SkPMColor * src,const SkAlpha * a,Fn fn)129 static void MapDstSrcAlpha( 130 int count, SkPMColor* dst, const SkPMColor* src, const SkAlpha* a, Fn fn) { 131 while (count > 0) { 132 if (count >= 8) { 133 Sk4px alpha0 = Load4Alphas(a+0), 134 alpha4 = Load4Alphas(a+4); 135 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), alpha0), 136 dst4 = fn(Load4(dst+4), Load4(src+4), alpha4); 137 dst0.store4(dst+0); 138 dst4.store4(dst+4); 139 dst += 8; src += 8; a += 8; count -= 8; 140 continue; // Keep our stride at 8 pixels as long as possible. 141 } 142 SkASSERT(count <= 7); 143 if (count >= 4) { 144 Sk4px alpha = Load4Alphas(a); 145 fn(Load4(dst), Load4(src), alpha).store4(dst); 146 dst += 4; src += 4; a += 4; count -= 4; 147 } 148 if (count >= 2) { 149 Sk4px alpha = Load2Alphas(a); 150 fn(Load2(dst), Load2(src), alpha).store2(dst); 151 dst += 2; src += 2; a += 2; count -= 2; 152 } 153 if (count >= 1) { 154 Sk4px alpha(*a); 155 fn(Load1(dst), Load1(src), alpha).store1(dst); 156 } 157 break; 158 } 159 } 160 161 private: 162 typedef Sk16b INHERITED; 163 }; 164 165 #ifdef SKNX_NO_SIMD 166 #include "../opts/Sk4px_none.h" 167 #else 168 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 169 #include "../opts/Sk4px_SSE2.h" 170 #elif defined(SK_ARM_HAS_NEON) 171 #include "../opts/Sk4px_NEON.h" 172 #else 173 #include "../opts/Sk4px_none.h" 174 #endif 175 #endif 176 177 #endif//Sk4px_DEFINED 178