1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #include <xmmintrin.h> 29 #include <emmintrin.h> 30 31 #undef min 32 #undef max 33 34 namespace sw 35 { 36 extern bool quadLayoutEnabled; 37 extern bool complementaryDepthBuffer; 38 extern TranscendentalPrecision logPrecision; 39 40 unsigned int *Surface::palette = 0; 41 unsigned int Surface::paletteID = 0; 42 clip(int minX,int minY,int maxX,int maxY)43 void Rect::clip(int minX, int minY, int maxX, int maxY) 44 { 45 x0 = clamp(x0, minX, maxX); 46 y0 = clamp(y0, minY, maxY); 47 x1 = clamp(x1, minX, maxX); 48 y1 = clamp(y1, minY, maxY); 49 } 50 write(int x,int y,int z,const Color<float> & color)51 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 52 { 53 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 54 55 write(element, color); 56 } 57 write(int x,int y,const Color<float> & color)58 void Surface::Buffer::write(int x, int y, const Color<float> &color) 59 { 60 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 61 62 write(element, color); 63 } 64 write(void * element,const Color<float> & color)65 inline void Surface::Buffer::write(void *element, const Color<float> &color) 66 { 67 switch(format) 68 { 69 case FORMAT_A8: 70 *(unsigned char*)element = unorm<8>(color.a); 71 break; 72 case FORMAT_R8I_SNORM: 73 *(char*)element = snorm<8>(color.r); 74 break; 75 case FORMAT_R8: 76 *(unsigned char*)element = unorm<8>(color.r); 77 break; 78 case FORMAT_R8I: 79 *(char*)element = scast<8>(color.r); 80 break; 81 case FORMAT_R8UI: 82 *(unsigned char*)element = ucast<8>(color.r); 83 break; 84 case FORMAT_R16I: 85 *(short*)element = scast<16>(color.r); 86 break; 87 case FORMAT_R16UI: 88 *(unsigned short*)element = ucast<16>(color.r); 89 break; 90 case FORMAT_R32I: 91 *(int*)element = static_cast<int>(color.r); 92 break; 93 case FORMAT_R32UI: 94 *(unsigned int*)element = static_cast<unsigned int>(color.r); 95 break; 96 case FORMAT_R3G3B2: 97 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 98 break; 99 case FORMAT_A8R3G3B2: 100 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 101 break; 102 case FORMAT_X4R4G4B4: 103 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 104 break; 105 case FORMAT_A4R4G4B4: 106 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 107 break; 108 case FORMAT_R4G4B4A4: 109 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0); 110 break; 111 case FORMAT_R5G6B5: 112 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0); 113 break; 114 case FORMAT_A1R5G5B5: 115 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 116 break; 117 case FORMAT_R5G5B5A1: 118 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0); 119 break; 120 case FORMAT_X1R5G5B5: 121 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 122 break; 123 case FORMAT_A8R8G8B8: 124 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 125 break; 126 case FORMAT_X8R8G8B8: 127 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 128 break; 129 case FORMAT_A8B8G8R8I_SNORM: 130 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) | 131 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 132 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 133 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 134 break; 135 case FORMAT_A8B8G8R8: 136 case FORMAT_SRGB8_A8: 137 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 138 break; 139 case FORMAT_A8B8G8R8I: 140 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) | 141 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 142 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 143 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 144 break; 145 case FORMAT_A8B8G8R8UI: 146 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 147 break; 148 case FORMAT_X8B8G8R8I_SNORM: 149 *(unsigned int*)element = 0x7F000000 | 150 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 151 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 152 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 153 break; 154 case FORMAT_X8B8G8R8: 155 case FORMAT_SRGB8_X8: 156 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 157 break; 158 case FORMAT_X8B8G8R8I: 159 *(unsigned int*)element = 0x7F000000 | 160 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 161 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 162 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 163 case FORMAT_X8B8G8R8UI: 164 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 165 break; 166 case FORMAT_A2R10G10B10: 167 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0); 168 break; 169 case FORMAT_A2B10G10R10: 170 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0); 171 break; 172 case FORMAT_G8R8I_SNORM: 173 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) | 174 (static_cast<unsigned short>(snorm<8>(color.r)) << 0); 175 break; 176 case FORMAT_G8R8: 177 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 178 break; 179 case FORMAT_G8R8I: 180 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) | 181 (static_cast<unsigned short>(scast<8>(color.r)) << 0); 182 break; 183 case FORMAT_G8R8UI: 184 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 185 break; 186 case FORMAT_G16R16: 187 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0); 188 break; 189 case FORMAT_G16R16I: 190 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) | 191 (static_cast<unsigned int>(scast<16>(color.r)) << 0); 192 break; 193 case FORMAT_G16R16UI: 194 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0); 195 break; 196 case FORMAT_G32R32I: 197 case FORMAT_G32R32UI: 198 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 199 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 200 break; 201 case FORMAT_A16B16G16R16: 202 ((unsigned short*)element)[0] = unorm<16>(color.r); 203 ((unsigned short*)element)[1] = unorm<16>(color.g); 204 ((unsigned short*)element)[2] = unorm<16>(color.b); 205 ((unsigned short*)element)[3] = unorm<16>(color.a); 206 break; 207 case FORMAT_A16B16G16R16I: 208 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 209 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 210 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 211 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a)); 212 break; 213 case FORMAT_A16B16G16R16UI: 214 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 215 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 216 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 217 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a)); 218 break; 219 case FORMAT_X16B16G16R16I: 220 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 221 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 222 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 223 break; 224 case FORMAT_X16B16G16R16UI: 225 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 226 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 227 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 228 break; 229 case FORMAT_A32B32G32R32I: 230 case FORMAT_A32B32G32R32UI: 231 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 232 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 233 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 234 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a); 235 break; 236 case FORMAT_X32B32G32R32I: 237 case FORMAT_X32B32G32R32UI: 238 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 239 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 240 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 241 break; 242 case FORMAT_V8U8: 243 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 244 break; 245 case FORMAT_L6V5U5: 246 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0); 247 break; 248 case FORMAT_Q8W8V8U8: 249 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 250 break; 251 case FORMAT_X8L8V8U8: 252 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 253 break; 254 case FORMAT_V16U16: 255 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0); 256 break; 257 case FORMAT_A2W10V10U10: 258 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0); 259 break; 260 case FORMAT_A16W16V16U16: 261 ((unsigned short*)element)[0] = snorm<16>(color.r); 262 ((unsigned short*)element)[1] = snorm<16>(color.g); 263 ((unsigned short*)element)[2] = snorm<16>(color.b); 264 ((unsigned short*)element)[3] = unorm<16>(color.a); 265 break; 266 case FORMAT_Q16W16V16U16: 267 ((unsigned short*)element)[0] = snorm<16>(color.r); 268 ((unsigned short*)element)[1] = snorm<16>(color.g); 269 ((unsigned short*)element)[2] = snorm<16>(color.b); 270 ((unsigned short*)element)[3] = snorm<16>(color.a); 271 break; 272 case FORMAT_R8G8B8: 273 ((unsigned char*)element)[0] = unorm<8>(color.b); 274 ((unsigned char*)element)[1] = unorm<8>(color.g); 275 ((unsigned char*)element)[2] = unorm<8>(color.r); 276 break; 277 case FORMAT_B8G8R8: 278 ((unsigned char*)element)[0] = unorm<8>(color.r); 279 ((unsigned char*)element)[1] = unorm<8>(color.g); 280 ((unsigned char*)element)[2] = unorm<8>(color.b); 281 break; 282 case FORMAT_R16F: 283 *(half*)element = (half)color.r; 284 break; 285 case FORMAT_A16F: 286 *(half*)element = (half)color.a; 287 break; 288 case FORMAT_G16R16F: 289 ((half*)element)[0] = (half)color.r; 290 ((half*)element)[1] = (half)color.g; 291 break; 292 case FORMAT_B16G16R16F: 293 ((half*)element)[0] = (half)color.r; 294 ((half*)element)[1] = (half)color.g; 295 ((half*)element)[2] = (half)color.b; 296 break; 297 case FORMAT_A16B16G16R16F: 298 ((half*)element)[0] = (half)color.r; 299 ((half*)element)[1] = (half)color.g; 300 ((half*)element)[2] = (half)color.b; 301 ((half*)element)[3] = (half)color.a; 302 break; 303 case FORMAT_A32F: 304 *(float*)element = color.a; 305 break; 306 case FORMAT_R32F: 307 *(float*)element = color.r; 308 break; 309 case FORMAT_G32R32F: 310 ((float*)element)[0] = color.r; 311 ((float*)element)[1] = color.g; 312 break; 313 case FORMAT_X32B32G32R32F: 314 ((float*)element)[3] = 1.0f; 315 case FORMAT_B32G32R32F: 316 ((float*)element)[0] = color.r; 317 ((float*)element)[1] = color.g; 318 ((float*)element)[2] = color.b; 319 break; 320 case FORMAT_A32B32G32R32F: 321 ((float*)element)[0] = color.r; 322 ((float*)element)[1] = color.g; 323 ((float*)element)[2] = color.b; 324 ((float*)element)[3] = color.a; 325 break; 326 case FORMAT_D32F: 327 case FORMAT_D32F_LOCKABLE: 328 case FORMAT_D32FS8_TEXTURE: 329 case FORMAT_D32FS8_SHADOW: 330 *((float*)element) = color.r; 331 break; 332 case FORMAT_D32F_COMPLEMENTARY: 333 *((float*)element) = 1 - color.r; 334 break; 335 case FORMAT_S8: 336 *((unsigned char*)element) = unorm<8>(color.r); 337 break; 338 case FORMAT_L8: 339 *(unsigned char*)element = unorm<8>(color.r); 340 break; 341 case FORMAT_A4L4: 342 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0); 343 break; 344 case FORMAT_L16: 345 *(unsigned short*)element = unorm<16>(color.r); 346 break; 347 case FORMAT_A8L8: 348 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0); 349 break; 350 case FORMAT_L16F: 351 *(half*)element = (half)color.r; 352 break; 353 case FORMAT_A16L16F: 354 ((half*)element)[0] = (half)color.r; 355 ((half*)element)[1] = (half)color.a; 356 break; 357 case FORMAT_L32F: 358 *(float*)element = color.r; 359 break; 360 case FORMAT_A32L32F: 361 ((float*)element)[0] = color.r; 362 ((float*)element)[1] = color.a; 363 break; 364 default: 365 ASSERT(false); 366 } 367 } 368 read(int x,int y,int z) const369 Color<float> Surface::Buffer::read(int x, int y, int z) const 370 { 371 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 372 373 return read(element); 374 } 375 read(int x,int y) const376 Color<float> Surface::Buffer::read(int x, int y) const 377 { 378 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 379 380 return read(element); 381 } 382 read(void * element) const383 inline Color<float> Surface::Buffer::read(void *element) const 384 { 385 float r = 0.0f; 386 float g = 0.0f; 387 float b = 0.0f; 388 float a = 1.0f; 389 390 switch(format) 391 { 392 case FORMAT_P8: 393 { 394 ASSERT(palette); 395 396 unsigned int abgr = palette[*(unsigned char*)element]; 397 398 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 399 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 400 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 401 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 402 } 403 break; 404 case FORMAT_A8P8: 405 { 406 ASSERT(palette); 407 408 unsigned int bgr = palette[((unsigned char*)element)[0]]; 409 410 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 411 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 412 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 413 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 414 } 415 break; 416 case FORMAT_A8: 417 r = 0; 418 g = 0; 419 b = 0; 420 a = *(unsigned char*)element * (1.0f / 0xFF); 421 break; 422 case FORMAT_R8I_SNORM: 423 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 424 break; 425 case FORMAT_R8: 426 r = *(unsigned char*)element * (1.0f / 0xFF); 427 break; 428 case FORMAT_R8I: 429 r = *(signed char*)element; 430 break; 431 case FORMAT_R8UI: 432 r = *(unsigned char*)element; 433 break; 434 case FORMAT_R3G3B2: 435 { 436 unsigned char rgb = *(unsigned char*)element; 437 438 r = (rgb & 0xE0) * (1.0f / 0xE0); 439 g = (rgb & 0x1C) * (1.0f / 0x1C); 440 b = (rgb & 0x03) * (1.0f / 0x03); 441 } 442 break; 443 case FORMAT_A8R3G3B2: 444 { 445 unsigned short argb = *(unsigned short*)element; 446 447 a = (argb & 0xFF00) * (1.0f / 0xFF00); 448 r = (argb & 0x00E0) * (1.0f / 0x00E0); 449 g = (argb & 0x001C) * (1.0f / 0x001C); 450 b = (argb & 0x0003) * (1.0f / 0x0003); 451 } 452 break; 453 case FORMAT_X4R4G4B4: 454 { 455 unsigned short rgb = *(unsigned short*)element; 456 457 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 458 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 459 b = (rgb & 0x000F) * (1.0f / 0x000F); 460 } 461 break; 462 case FORMAT_A4R4G4B4: 463 { 464 unsigned short argb = *(unsigned short*)element; 465 466 a = (argb & 0xF000) * (1.0f / 0xF000); 467 r = (argb & 0x0F00) * (1.0f / 0x0F00); 468 g = (argb & 0x00F0) * (1.0f / 0x00F0); 469 b = (argb & 0x000F) * (1.0f / 0x000F); 470 } 471 break; 472 case FORMAT_R4G4B4A4: 473 { 474 unsigned short rgba = *(unsigned short*)element; 475 476 r = (rgba & 0xF000) * (1.0f / 0xF000); 477 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 478 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 479 a = (rgba & 0x000F) * (1.0f / 0x000F); 480 } 481 break; 482 case FORMAT_R5G6B5: 483 { 484 unsigned short rgb = *(unsigned short*)element; 485 486 r = (rgb & 0xF800) * (1.0f / 0xF800); 487 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 488 b = (rgb & 0x001F) * (1.0f / 0x001F); 489 } 490 break; 491 case FORMAT_A1R5G5B5: 492 { 493 unsigned short argb = *(unsigned short*)element; 494 495 a = (argb & 0x8000) * (1.0f / 0x8000); 496 r = (argb & 0x7C00) * (1.0f / 0x7C00); 497 g = (argb & 0x03E0) * (1.0f / 0x03E0); 498 b = (argb & 0x001F) * (1.0f / 0x001F); 499 } 500 break; 501 case FORMAT_R5G5B5A1: 502 { 503 unsigned short rgba = *(unsigned short*)element; 504 505 r = (rgba & 0xF800) * (1.0f / 0xF800); 506 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 507 b = (rgba & 0x003E) * (1.0f / 0x003E); 508 a = (rgba & 0x0001) * (1.0f / 0x0001); 509 } 510 break; 511 case FORMAT_X1R5G5B5: 512 { 513 unsigned short xrgb = *(unsigned short*)element; 514 515 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 516 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 517 b = (xrgb & 0x001F) * (1.0f / 0x001F); 518 } 519 break; 520 case FORMAT_A8R8G8B8: 521 { 522 unsigned int argb = *(unsigned int*)element; 523 524 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 525 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 526 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 527 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 528 } 529 break; 530 case FORMAT_X8R8G8B8: 531 { 532 unsigned int xrgb = *(unsigned int*)element; 533 534 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 535 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 536 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 537 } 538 break; 539 case FORMAT_A8B8G8R8I_SNORM: 540 { 541 signed char* abgr = (signed char*)element; 542 543 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 544 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 545 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 546 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 547 } 548 break; 549 case FORMAT_A8B8G8R8: 550 case FORMAT_SRGB8_A8: 551 { 552 unsigned int abgr = *(unsigned int*)element; 553 554 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 555 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 556 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 557 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 558 } 559 break; 560 case FORMAT_A8B8G8R8I: 561 { 562 signed char* abgr = (signed char*)element; 563 564 r = abgr[0]; 565 g = abgr[1]; 566 b = abgr[2]; 567 a = abgr[3]; 568 } 569 break; 570 case FORMAT_A8B8G8R8UI: 571 { 572 unsigned char* abgr = (unsigned char*)element; 573 574 r = abgr[0]; 575 g = abgr[1]; 576 b = abgr[2]; 577 a = abgr[3]; 578 } 579 break; 580 case FORMAT_X8B8G8R8I_SNORM: 581 { 582 signed char* bgr = (signed char*)element; 583 584 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 585 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 586 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 587 } 588 break; 589 case FORMAT_X8B8G8R8: 590 case FORMAT_SRGB8_X8: 591 { 592 unsigned int xbgr = *(unsigned int*)element; 593 594 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 595 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 596 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 597 } 598 break; 599 case FORMAT_X8B8G8R8I: 600 { 601 signed char* bgr = (signed char*)element; 602 603 r = bgr[0]; 604 g = bgr[1]; 605 b = bgr[2]; 606 } 607 break; 608 case FORMAT_X8B8G8R8UI: 609 { 610 unsigned char* bgr = (unsigned char*)element; 611 612 r = bgr[0]; 613 g = bgr[1]; 614 b = bgr[2]; 615 } 616 break; 617 case FORMAT_G8R8I_SNORM: 618 { 619 signed char* gr = (signed char*)element; 620 621 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 622 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 623 } 624 break; 625 case FORMAT_G8R8: 626 { 627 unsigned short gr = *(unsigned short*)element; 628 629 g = (gr & 0xFF00) * (1.0f / 0xFF00); 630 r = (gr & 0x00FF) * (1.0f / 0x00FF); 631 } 632 break; 633 case FORMAT_G8R8I: 634 { 635 signed char* gr = (signed char*)element; 636 637 r = gr[0]; 638 g = gr[1]; 639 } 640 break; 641 case FORMAT_G8R8UI: 642 { 643 unsigned char* gr = (unsigned char*)element; 644 645 r = gr[0]; 646 g = gr[1]; 647 } 648 break; 649 case FORMAT_R16I: 650 r = *((short*)element); 651 break; 652 case FORMAT_R16UI: 653 r = *((unsigned short*)element); 654 break; 655 case FORMAT_G16R16I: 656 { 657 short* gr = (short*)element; 658 659 r = gr[0]; 660 g = gr[1]; 661 } 662 break; 663 case FORMAT_G16R16: 664 { 665 unsigned int gr = *(unsigned int*)element; 666 667 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 668 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 669 } 670 break; 671 case FORMAT_G16R16UI: 672 { 673 unsigned short* gr = (unsigned short*)element; 674 675 r = gr[0]; 676 g = gr[1]; 677 } 678 break; 679 case FORMAT_A2R10G10B10: 680 { 681 unsigned int argb = *(unsigned int*)element; 682 683 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 684 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 685 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 686 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 687 } 688 break; 689 case FORMAT_A2B10G10R10: 690 { 691 unsigned int abgr = *(unsigned int*)element; 692 693 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 694 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 695 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 696 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 697 } 698 break; 699 case FORMAT_A16B16G16R16I: 700 { 701 short* abgr = (short*)element; 702 703 r = abgr[0]; 704 g = abgr[1]; 705 b = abgr[2]; 706 a = abgr[3]; 707 } 708 break; 709 case FORMAT_A16B16G16R16: 710 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 711 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 712 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 713 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 714 break; 715 case FORMAT_A16B16G16R16UI: 716 { 717 unsigned short* abgr = (unsigned short*)element; 718 719 r = abgr[0]; 720 g = abgr[1]; 721 b = abgr[2]; 722 a = abgr[3]; 723 } 724 break; 725 case FORMAT_X16B16G16R16I: 726 { 727 short* bgr = (short*)element; 728 729 r = bgr[0]; 730 g = bgr[1]; 731 b = bgr[2]; 732 } 733 break; 734 case FORMAT_X16B16G16R16UI: 735 { 736 unsigned short* bgr = (unsigned short*)element; 737 738 r = bgr[0]; 739 g = bgr[1]; 740 b = bgr[2]; 741 } 742 break; 743 case FORMAT_A32B32G32R32I: 744 { 745 int* abgr = (int*)element; 746 747 r = static_cast<float>(abgr[0]); 748 g = static_cast<float>(abgr[1]); 749 b = static_cast<float>(abgr[2]); 750 a = static_cast<float>(abgr[3]); 751 } 752 break; 753 case FORMAT_A32B32G32R32UI: 754 { 755 unsigned int* abgr = (unsigned int*)element; 756 757 r = static_cast<float>(abgr[0]); 758 g = static_cast<float>(abgr[1]); 759 b = static_cast<float>(abgr[2]); 760 a = static_cast<float>(abgr[3]); 761 } 762 break; 763 case FORMAT_X32B32G32R32I: 764 { 765 int* bgr = (int*)element; 766 767 r = static_cast<float>(bgr[0]); 768 g = static_cast<float>(bgr[1]); 769 b = static_cast<float>(bgr[2]); 770 } 771 break; 772 case FORMAT_X32B32G32R32UI: 773 { 774 unsigned int* bgr = (unsigned int*)element; 775 776 r = static_cast<float>(bgr[0]); 777 g = static_cast<float>(bgr[1]); 778 b = static_cast<float>(bgr[2]); 779 } 780 break; 781 case FORMAT_G32R32I: 782 { 783 int* gr = (int*)element; 784 785 r = static_cast<float>(gr[0]); 786 g = static_cast<float>(gr[1]); 787 } 788 break; 789 case FORMAT_G32R32UI: 790 { 791 unsigned int* gr = (unsigned int*)element; 792 793 r = static_cast<float>(gr[0]); 794 g = static_cast<float>(gr[1]); 795 } 796 break; 797 case FORMAT_R32I: 798 r = static_cast<float>(*((int*)element)); 799 break; 800 case FORMAT_R32UI: 801 r = static_cast<float>(*((unsigned int*)element)); 802 break; 803 case FORMAT_V8U8: 804 { 805 unsigned short vu = *(unsigned short*)element; 806 807 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 808 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 809 } 810 break; 811 case FORMAT_L6V5U5: 812 { 813 unsigned short lvu = *(unsigned short*)element; 814 815 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 816 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 817 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 818 } 819 break; 820 case FORMAT_Q8W8V8U8: 821 { 822 unsigned int qwvu = *(unsigned int*)element; 823 824 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 825 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 826 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 827 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 828 } 829 break; 830 case FORMAT_X8L8V8U8: 831 { 832 unsigned int xlvu = *(unsigned int*)element; 833 834 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 835 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 836 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 837 } 838 break; 839 case FORMAT_R8G8B8: 840 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 841 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 842 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 843 break; 844 case FORMAT_B8G8R8: 845 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 846 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 847 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 848 break; 849 case FORMAT_V16U16: 850 { 851 unsigned int vu = *(unsigned int*)element; 852 853 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 854 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 855 } 856 break; 857 case FORMAT_A2W10V10U10: 858 { 859 unsigned int awvu = *(unsigned int*)element; 860 861 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 862 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 863 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 864 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 865 } 866 break; 867 case FORMAT_A16W16V16U16: 868 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 869 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 870 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 871 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 872 break; 873 case FORMAT_Q16W16V16U16: 874 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 875 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 876 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 877 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 878 break; 879 case FORMAT_L8: 880 r = 881 g = 882 b = *(unsigned char*)element * (1.0f / 0xFF); 883 break; 884 case FORMAT_A4L4: 885 { 886 unsigned char al = *(unsigned char*)element; 887 888 r = 889 g = 890 b = (al & 0x0F) * (1.0f / 0x0F); 891 a = (al & 0xF0) * (1.0f / 0xF0); 892 } 893 break; 894 case FORMAT_L16: 895 r = 896 g = 897 b = *(unsigned short*)element * (1.0f / 0xFFFF); 898 break; 899 case FORMAT_A8L8: 900 r = 901 g = 902 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 903 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 904 break; 905 case FORMAT_L16F: 906 r = 907 g = 908 b = *(half*)element; 909 break; 910 case FORMAT_A16L16F: 911 r = 912 g = 913 b = ((half*)element)[0]; 914 a = ((half*)element)[1]; 915 break; 916 case FORMAT_L32F: 917 r = 918 g = 919 b = *(float*)element; 920 break; 921 case FORMAT_A32L32F: 922 r = 923 g = 924 b = ((float*)element)[0]; 925 a = ((float*)element)[1]; 926 break; 927 case FORMAT_A16F: 928 a = *(half*)element; 929 break; 930 case FORMAT_R16F: 931 r = *(half*)element; 932 break; 933 case FORMAT_G16R16F: 934 r = ((half*)element)[0]; 935 g = ((half*)element)[1]; 936 break; 937 case FORMAT_B16G16R16F: 938 r = ((half*)element)[0]; 939 g = ((half*)element)[1]; 940 b = ((half*)element)[2]; 941 break; 942 case FORMAT_A16B16G16R16F: 943 r = ((half*)element)[0]; 944 g = ((half*)element)[1]; 945 b = ((half*)element)[2]; 946 a = ((half*)element)[3]; 947 break; 948 case FORMAT_A32F: 949 a = *(float*)element; 950 break; 951 case FORMAT_R32F: 952 r = *(float*)element; 953 break; 954 case FORMAT_G32R32F: 955 r = ((float*)element)[0]; 956 g = ((float*)element)[1]; 957 break; 958 case FORMAT_X32B32G32R32F: 959 case FORMAT_B32G32R32F: 960 r = ((float*)element)[0]; 961 g = ((float*)element)[1]; 962 b = ((float*)element)[2]; 963 break; 964 case FORMAT_A32B32G32R32F: 965 r = ((float*)element)[0]; 966 g = ((float*)element)[1]; 967 b = ((float*)element)[2]; 968 a = ((float*)element)[3]; 969 break; 970 case FORMAT_D32F: 971 case FORMAT_D32F_LOCKABLE: 972 case FORMAT_D32FS8_TEXTURE: 973 case FORMAT_D32FS8_SHADOW: 974 r = *(float*)element; 975 g = r; 976 b = r; 977 a = r; 978 break; 979 case FORMAT_D32F_COMPLEMENTARY: 980 r = 1.0f - *(float*)element; 981 g = r; 982 b = r; 983 a = r; 984 break; 985 case FORMAT_S8: 986 r = *(unsigned char*)element * (1.0f / 0xFF); 987 break; 988 default: 989 ASSERT(false); 990 } 991 992 // if(sRGB) 993 // { 994 // r = sRGBtoLinear(r); 995 // g = sRGBtoLinear(g); 996 // b = sRGBtoLinear(b); 997 // } 998 999 return Color<float>(r, g, b, a); 1000 } 1001 sample(float x,float y,float z) const1002 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1003 { 1004 x -= 0.5f; 1005 y -= 0.5f; 1006 z -= 0.5f; 1007 1008 int x0 = clamp((int)x, 0, width - 1); 1009 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1010 1011 int y0 = clamp((int)y, 0, height - 1); 1012 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1013 1014 int z0 = clamp((int)z, 0, depth - 1); 1015 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1016 1017 Color<float> c000 = read(x0, y0, z0); 1018 Color<float> c100 = read(x1, y0, z0); 1019 Color<float> c010 = read(x0, y1, z0); 1020 Color<float> c110 = read(x1, y1, z0); 1021 Color<float> c001 = read(x0, y0, z1); 1022 Color<float> c101 = read(x1, y0, z1); 1023 Color<float> c011 = read(x0, y1, z1); 1024 Color<float> c111 = read(x1, y1, z1); 1025 1026 float fx = x - x0; 1027 float fy = y - y0; 1028 float fz = z - z0; 1029 1030 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1031 c100 *= fx * (1 - fy) * (1 - fz); 1032 c010 *= (1 - fx) * fy * (1 - fz); 1033 c110 *= fx * fy * (1 - fz); 1034 c001 *= (1 - fx) * (1 - fy) * fz; 1035 c101 *= fx * (1 - fy) * fz; 1036 c011 *= (1 - fx) * fy * fz; 1037 c111 *= fx * fy * fz; 1038 1039 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1040 } 1041 sample(float x,float y) const1042 Color<float> Surface::Buffer::sample(float x, float y) const 1043 { 1044 x -= 0.5f; 1045 y -= 0.5f; 1046 1047 int x0 = clamp((int)x, 0, width - 1); 1048 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1049 1050 int y0 = clamp((int)y, 0, height - 1); 1051 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1052 1053 Color<float> c00 = read(x0, y0); 1054 Color<float> c10 = read(x1, y0); 1055 Color<float> c01 = read(x0, y1); 1056 Color<float> c11 = read(x1, y1); 1057 1058 float fx = x - x0; 1059 float fy = y - y0; 1060 1061 c00 *= (1 - fx) * (1 - fy); 1062 c10 *= fx * (1 - fy); 1063 c01 *= (1 - fx) * fy; 1064 c11 *= fx * fy; 1065 1066 return c00 + c10 + c01 + c11; 1067 } 1068 lockRect(int x,int y,int z,Lock lock)1069 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1070 { 1071 this->lock = lock; 1072 1073 switch(lock) 1074 { 1075 case LOCK_UNLOCKED: 1076 case LOCK_READONLY: 1077 break; 1078 case LOCK_WRITEONLY: 1079 case LOCK_READWRITE: 1080 case LOCK_DISCARD: 1081 dirty = true; 1082 break; 1083 default: 1084 ASSERT(false); 1085 } 1086 1087 if(buffer) 1088 { 1089 switch(format) 1090 { 1091 #if S3TC_SUPPORT 1092 case FORMAT_DXT1: 1093 #endif 1094 case FORMAT_ATI1: 1095 case FORMAT_ETC1: 1096 case FORMAT_R11_EAC: 1097 case FORMAT_SIGNED_R11_EAC: 1098 case FORMAT_RGB8_ETC2: 1099 case FORMAT_SRGB8_ETC2: 1100 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1101 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1102 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1103 case FORMAT_RG11_EAC: 1104 case FORMAT_SIGNED_RG11_EAC: 1105 case FORMAT_RGBA8_ETC2_EAC: 1106 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1107 case FORMAT_RGBA_ASTC_4x4_KHR: 1108 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1109 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1110 case FORMAT_RGBA_ASTC_5x4_KHR: 1111 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1112 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1113 case FORMAT_RGBA_ASTC_5x5_KHR: 1114 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1115 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1116 case FORMAT_RGBA_ASTC_6x5_KHR: 1117 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1118 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1119 case FORMAT_RGBA_ASTC_6x6_KHR: 1120 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1121 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1122 case FORMAT_RGBA_ASTC_8x5_KHR: 1123 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1124 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1125 case FORMAT_RGBA_ASTC_8x6_KHR: 1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1128 case FORMAT_RGBA_ASTC_8x8_KHR: 1129 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1130 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1131 case FORMAT_RGBA_ASTC_10x5_KHR: 1132 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1133 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1134 case FORMAT_RGBA_ASTC_10x6_KHR: 1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1137 case FORMAT_RGBA_ASTC_10x8_KHR: 1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1140 case FORMAT_RGBA_ASTC_10x10_KHR: 1141 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1142 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1143 case FORMAT_RGBA_ASTC_12x10_KHR: 1144 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1145 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1146 case FORMAT_RGBA_ASTC_12x12_KHR: 1147 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1148 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1149 #if S3TC_SUPPORT 1150 case FORMAT_DXT3: 1151 case FORMAT_DXT5: 1152 #endif 1153 case FORMAT_ATI2: 1154 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1155 default: 1156 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 1157 } 1158 } 1159 1160 return 0; 1161 } 1162 unlockRect()1163 void Surface::Buffer::unlockRect() 1164 { 1165 lock = LOCK_UNLOCKED; 1166 } 1167 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1168 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1169 { 1170 resource = new Resource(0); 1171 hasParent = false; 1172 ownExternal = false; 1173 depth = max(1, depth); 1174 1175 external.buffer = pixels; 1176 external.width = width; 1177 external.height = height; 1178 external.depth = depth; 1179 external.format = format; 1180 external.bytes = bytes(external.format); 1181 external.pitchB = pitch; 1182 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1183 external.sliceB = slice; 1184 external.sliceP = external.bytes ? slice / external.bytes : 0; 1185 external.lock = LOCK_UNLOCKED; 1186 external.dirty = true; 1187 1188 internal.buffer = 0; 1189 internal.width = width; 1190 internal.height = height; 1191 internal.depth = depth; 1192 internal.format = selectInternalFormat(format); 1193 internal.bytes = bytes(internal.format); 1194 internal.pitchB = pitchB(internal.width, internal.format, false); 1195 internal.pitchP = pitchP(internal.width, internal.format, false); 1196 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false); 1197 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false); 1198 internal.lock = LOCK_UNLOCKED; 1199 internal.dirty = false; 1200 1201 stencil.buffer = 0; 1202 stencil.width = width; 1203 stencil.height = height; 1204 stencil.depth = depth; 1205 stencil.format = FORMAT_S8; 1206 stencil.bytes = bytes(stencil.format); 1207 stencil.pitchB = pitchB(stencil.width, stencil.format, false); 1208 stencil.pitchP = pitchP(stencil.width, stencil.format, false); 1209 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false); 1210 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false); 1211 stencil.lock = LOCK_UNLOCKED; 1212 stencil.dirty = false; 1213 1214 dirtyMipmaps = true; 1215 paletteUsed = 0; 1216 } 1217 Surface(Resource * texture,int width,int height,int depth,Format format,bool lockable,bool renderTarget,int pitchPprovided)1218 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1219 { 1220 resource = texture ? texture : new Resource(0); 1221 hasParent = texture != 0; 1222 ownExternal = true; 1223 depth = max(1, depth); 1224 1225 external.buffer = 0; 1226 external.width = width; 1227 external.height = height; 1228 external.depth = depth; 1229 external.format = format; 1230 external.bytes = bytes(external.format); 1231 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture); 1232 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture); 1233 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture); 1234 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture); 1235 external.lock = LOCK_UNLOCKED; 1236 external.dirty = false; 1237 1238 internal.buffer = 0; 1239 internal.width = width; 1240 internal.height = height; 1241 internal.depth = depth; 1242 internal.format = selectInternalFormat(format); 1243 internal.bytes = bytes(internal.format); 1244 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1245 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided; 1246 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget); 1247 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget); 1248 internal.lock = LOCK_UNLOCKED; 1249 internal.dirty = false; 1250 1251 stencil.buffer = 0; 1252 stencil.width = width; 1253 stencil.height = height; 1254 stencil.depth = depth; 1255 stencil.format = FORMAT_S8; 1256 stencil.bytes = bytes(stencil.format); 1257 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget); 1258 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget); 1259 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget); 1260 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget); 1261 stencil.lock = LOCK_UNLOCKED; 1262 stencil.dirty = false; 1263 1264 dirtyMipmaps = true; 1265 paletteUsed = 0; 1266 } 1267 ~Surface()1268 Surface::~Surface() 1269 { 1270 // Synchronize so we can deallocate the buffers below 1271 resource->lock(DESTRUCT); 1272 resource->unlock(); 1273 1274 if(!hasParent) 1275 { 1276 resource->destruct(); 1277 } 1278 1279 if(ownExternal) 1280 { 1281 deallocate(external.buffer); 1282 } 1283 1284 if(internal.buffer != external.buffer) 1285 { 1286 deallocate(internal.buffer); 1287 } 1288 1289 deallocate(stencil.buffer); 1290 1291 external.buffer = 0; 1292 internal.buffer = 0; 1293 stencil.buffer = 0; 1294 } 1295 lockExternal(int x,int y,int z,Lock lock,Accessor client)1296 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1297 { 1298 resource->lock(client); 1299 1300 if(!external.buffer) 1301 { 1302 if(internal.buffer && identicalFormats()) 1303 { 1304 external.buffer = internal.buffer; 1305 } 1306 else 1307 { 1308 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format); 1309 } 1310 } 1311 1312 if(internal.dirty) 1313 { 1314 if(lock != LOCK_DISCARD) 1315 { 1316 update(external, internal); 1317 } 1318 1319 internal.dirty = false; 1320 } 1321 1322 switch(lock) 1323 { 1324 case LOCK_READONLY: 1325 break; 1326 case LOCK_WRITEONLY: 1327 case LOCK_READWRITE: 1328 case LOCK_DISCARD: 1329 dirtyMipmaps = true; 1330 break; 1331 default: 1332 ASSERT(false); 1333 } 1334 1335 return external.lockRect(x, y, z, lock); 1336 } 1337 unlockExternal()1338 void Surface::unlockExternal() 1339 { 1340 resource->unlock(); 1341 1342 external.unlockRect(); 1343 } 1344 lockInternal(int x,int y,int z,Lock lock,Accessor client)1345 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1346 { 1347 if(lock != LOCK_UNLOCKED) 1348 { 1349 resource->lock(client); 1350 } 1351 1352 if(!internal.buffer) 1353 { 1354 if(external.buffer && identicalFormats()) 1355 { 1356 internal.buffer = external.buffer; 1357 } 1358 else 1359 { 1360 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format); 1361 } 1362 } 1363 1364 // FIXME: WHQL requires conversion to lower external precision and back 1365 if(logPrecision >= WHQL) 1366 { 1367 if(internal.dirty && renderTarget && internal.format != external.format) 1368 { 1369 if(lock != LOCK_DISCARD) 1370 { 1371 switch(external.format) 1372 { 1373 case FORMAT_R3G3B2: 1374 case FORMAT_A8R3G3B2: 1375 case FORMAT_A1R5G5B5: 1376 case FORMAT_A2R10G10B10: 1377 case FORMAT_A2B10G10R10: 1378 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1379 unlockExternal(); 1380 break; 1381 default: 1382 // Difference passes WHQL 1383 break; 1384 } 1385 } 1386 } 1387 } 1388 1389 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1390 { 1391 if(lock != LOCK_DISCARD) 1392 { 1393 update(internal, external); 1394 } 1395 1396 external.dirty = false; 1397 paletteUsed = Surface::paletteID; 1398 } 1399 1400 switch(lock) 1401 { 1402 case LOCK_UNLOCKED: 1403 case LOCK_READONLY: 1404 break; 1405 case LOCK_WRITEONLY: 1406 case LOCK_READWRITE: 1407 case LOCK_DISCARD: 1408 dirtyMipmaps = true; 1409 break; 1410 default: 1411 ASSERT(false); 1412 } 1413 1414 if(lock == LOCK_READONLY && client == PUBLIC) 1415 { 1416 resolve(); 1417 } 1418 1419 return internal.lockRect(x, y, z, lock); 1420 } 1421 unlockInternal()1422 void Surface::unlockInternal() 1423 { 1424 resource->unlock(); 1425 1426 internal.unlockRect(); 1427 } 1428 lockStencil(int front,Accessor client)1429 void *Surface::lockStencil(int front, Accessor client) 1430 { 1431 resource->lock(client); 1432 1433 if(!stencil.buffer) 1434 { 1435 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format); 1436 } 1437 1438 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME 1439 } 1440 unlockStencil()1441 void Surface::unlockStencil() 1442 { 1443 resource->unlock(); 1444 1445 stencil.unlockRect(); 1446 } 1447 bytes(Format format)1448 int Surface::bytes(Format format) 1449 { 1450 switch(format) 1451 { 1452 case FORMAT_NULL: return 0; 1453 case FORMAT_P8: return 1; 1454 case FORMAT_A8P8: return 2; 1455 case FORMAT_A8: return 1; 1456 case FORMAT_R8I: return 1; 1457 case FORMAT_R8: return 1; 1458 case FORMAT_R3G3B2: return 1; 1459 case FORMAT_R16I: return 2; 1460 case FORMAT_R16UI: return 2; 1461 case FORMAT_A8R3G3B2: return 2; 1462 case FORMAT_R5G6B5: return 2; 1463 case FORMAT_A1R5G5B5: return 2; 1464 case FORMAT_X1R5G5B5: return 2; 1465 case FORMAT_R5G5B5A1: return 2; 1466 case FORMAT_X4R4G4B4: return 2; 1467 case FORMAT_A4R4G4B4: return 2; 1468 case FORMAT_R4G4B4A4: return 2; 1469 case FORMAT_R8G8B8: return 3; 1470 case FORMAT_B8G8R8: return 3; 1471 case FORMAT_R32I: return 4; 1472 case FORMAT_R32UI: return 4; 1473 case FORMAT_X8R8G8B8: return 4; 1474 // case FORMAT_X8G8R8B8Q: return 4; 1475 case FORMAT_A8R8G8B8: return 4; 1476 // case FORMAT_A8G8R8B8Q: return 4; 1477 case FORMAT_X8B8G8R8I: return 4; 1478 case FORMAT_X8B8G8R8: return 4; 1479 case FORMAT_SRGB8_X8: return 4; 1480 case FORMAT_SRGB8_A8: return 4; 1481 case FORMAT_A8B8G8R8I: return 4; 1482 case FORMAT_R8UI: return 1; 1483 case FORMAT_G8R8UI: return 2; 1484 case FORMAT_X8B8G8R8UI: return 4; 1485 case FORMAT_A8B8G8R8UI: return 4; 1486 case FORMAT_A8B8G8R8: return 4; 1487 case FORMAT_R8I_SNORM: return 1; 1488 case FORMAT_G8R8I_SNORM: return 2; 1489 case FORMAT_X8B8G8R8I_SNORM: return 4; 1490 case FORMAT_A8B8G8R8I_SNORM: return 4; 1491 case FORMAT_A2R10G10B10: return 4; 1492 case FORMAT_A2B10G10R10: return 4; 1493 case FORMAT_G8R8I: return 2; 1494 case FORMAT_G8R8: return 2; 1495 case FORMAT_G16R16I: return 4; 1496 case FORMAT_G16R16UI: return 4; 1497 case FORMAT_G16R16: return 4; 1498 case FORMAT_G32R32I: return 8; 1499 case FORMAT_G32R32UI: return 8; 1500 case FORMAT_X16B16G16R16I: return 8; 1501 case FORMAT_X16B16G16R16UI: return 8; 1502 case FORMAT_A16B16G16R16I: return 8; 1503 case FORMAT_A16B16G16R16UI: return 8; 1504 case FORMAT_A16B16G16R16: return 8; 1505 case FORMAT_X32B32G32R32I: return 16; 1506 case FORMAT_X32B32G32R32UI: return 16; 1507 case FORMAT_A32B32G32R32I: return 16; 1508 case FORMAT_A32B32G32R32UI: return 16; 1509 // Compressed formats 1510 #if S3TC_SUPPORT 1511 case FORMAT_DXT1: return 2; // Column of four pixels 1512 case FORMAT_DXT3: return 4; // Column of four pixels 1513 case FORMAT_DXT5: return 4; // Column of four pixels 1514 #endif 1515 case FORMAT_ATI1: return 2; // Column of four pixels 1516 case FORMAT_ATI2: return 4; // Column of four pixels 1517 case FORMAT_ETC1: return 2; // Column of four pixels 1518 case FORMAT_R11_EAC: return 2; 1519 case FORMAT_SIGNED_R11_EAC: return 2; 1520 case FORMAT_RG11_EAC: return 4; 1521 case FORMAT_SIGNED_RG11_EAC: return 4; 1522 case FORMAT_RGB8_ETC2: return 2; 1523 case FORMAT_SRGB8_ETC2: return 2; 1524 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1525 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1526 case FORMAT_RGBA8_ETC2_EAC: return 4; 1527 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1528 case FORMAT_RGBA_ASTC_4x4_KHR: 1529 case FORMAT_RGBA_ASTC_5x4_KHR: 1530 case FORMAT_RGBA_ASTC_5x5_KHR: 1531 case FORMAT_RGBA_ASTC_6x5_KHR: 1532 case FORMAT_RGBA_ASTC_6x6_KHR: 1533 case FORMAT_RGBA_ASTC_8x5_KHR: 1534 case FORMAT_RGBA_ASTC_8x6_KHR: 1535 case FORMAT_RGBA_ASTC_8x8_KHR: 1536 case FORMAT_RGBA_ASTC_10x5_KHR: 1537 case FORMAT_RGBA_ASTC_10x6_KHR: 1538 case FORMAT_RGBA_ASTC_10x8_KHR: 1539 case FORMAT_RGBA_ASTC_10x10_KHR: 1540 case FORMAT_RGBA_ASTC_12x10_KHR: 1541 case FORMAT_RGBA_ASTC_12x12_KHR: 1542 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1543 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1544 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1545 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1546 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1547 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1548 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1549 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1550 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1551 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1552 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1553 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1554 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1555 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1556 // Bumpmap formats 1557 case FORMAT_V8U8: return 2; 1558 case FORMAT_L6V5U5: return 2; 1559 case FORMAT_Q8W8V8U8: return 4; 1560 case FORMAT_X8L8V8U8: return 4; 1561 case FORMAT_A2W10V10U10: return 4; 1562 case FORMAT_V16U16: return 4; 1563 case FORMAT_A16W16V16U16: return 8; 1564 case FORMAT_Q16W16V16U16: return 8; 1565 // Luminance formats 1566 case FORMAT_L8: return 1; 1567 case FORMAT_A4L4: return 1; 1568 case FORMAT_L16: return 2; 1569 case FORMAT_A8L8: return 2; 1570 case FORMAT_L16F: return 2; 1571 case FORMAT_A16L16F: return 4; 1572 case FORMAT_L32F: return 4; 1573 case FORMAT_A32L32F: return 8; 1574 // Floating-point formats 1575 case FORMAT_A16F: return 2; 1576 case FORMAT_R16F: return 2; 1577 case FORMAT_G16R16F: return 4; 1578 case FORMAT_B16G16R16F: return 6; 1579 case FORMAT_A16B16G16R16F: return 8; 1580 case FORMAT_A32F: return 4; 1581 case FORMAT_R32F: return 4; 1582 case FORMAT_G32R32F: return 8; 1583 case FORMAT_B32G32R32F: return 12; 1584 case FORMAT_X32B32G32R32F: return 16; 1585 case FORMAT_A32B32G32R32F: return 16; 1586 // Depth/stencil formats 1587 case FORMAT_D16: return 2; 1588 case FORMAT_D32: return 4; 1589 case FORMAT_D24X8: return 4; 1590 case FORMAT_D24S8: return 4; 1591 case FORMAT_D24FS8: return 4; 1592 case FORMAT_D32F: return 4; 1593 case FORMAT_D32F_COMPLEMENTARY: return 4; 1594 case FORMAT_D32F_LOCKABLE: return 4; 1595 case FORMAT_D32FS8_TEXTURE: return 4; 1596 case FORMAT_D32FS8_SHADOW: return 4; 1597 case FORMAT_DF24S8: return 4; 1598 case FORMAT_DF16S8: return 2; 1599 case FORMAT_INTZ: return 4; 1600 case FORMAT_S8: return 1; 1601 case FORMAT_YV12_BT601: return 1; // Y plane only 1602 case FORMAT_YV12_BT709: return 1; // Y plane only 1603 case FORMAT_YV12_JFIF: return 1; // Y plane only 1604 default: 1605 ASSERT(false); 1606 } 1607 1608 return 0; 1609 } 1610 pitchB(int width,Format format,bool target)1611 int Surface::pitchB(int width, Format format, bool target) 1612 { 1613 if(target || isDepth(format) || isStencil(format)) 1614 { 1615 width = align(width, 2); 1616 } 1617 1618 switch(format) 1619 { 1620 #if S3TC_SUPPORT 1621 case FORMAT_DXT1: 1622 #endif 1623 case FORMAT_ETC1: 1624 case FORMAT_R11_EAC: 1625 case FORMAT_SIGNED_R11_EAC: 1626 case FORMAT_RGB8_ETC2: 1627 case FORMAT_SRGB8_ETC2: 1628 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1629 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1630 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1631 case FORMAT_RG11_EAC: 1632 case FORMAT_SIGNED_RG11_EAC: 1633 case FORMAT_RGBA8_ETC2_EAC: 1634 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1635 case FORMAT_RGBA_ASTC_4x4_KHR: 1636 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1637 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1638 case FORMAT_RGBA_ASTC_5x4_KHR: 1639 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1640 case FORMAT_RGBA_ASTC_5x5_KHR: 1641 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1642 return 16 * ((width + 4) / 5); 1643 case FORMAT_RGBA_ASTC_6x5_KHR: 1644 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1645 case FORMAT_RGBA_ASTC_6x6_KHR: 1646 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1647 return 16 * ((width + 5) / 6); 1648 case FORMAT_RGBA_ASTC_8x5_KHR: 1649 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1650 case FORMAT_RGBA_ASTC_8x6_KHR: 1651 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1652 case FORMAT_RGBA_ASTC_8x8_KHR: 1653 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1654 return 16 * ((width + 7) / 8); 1655 case FORMAT_RGBA_ASTC_10x5_KHR: 1656 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1657 case FORMAT_RGBA_ASTC_10x6_KHR: 1658 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1659 case FORMAT_RGBA_ASTC_10x8_KHR: 1660 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1661 case FORMAT_RGBA_ASTC_10x10_KHR: 1662 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1663 return 16 * ((width + 9) / 10); 1664 case FORMAT_RGBA_ASTC_12x10_KHR: 1665 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1666 case FORMAT_RGBA_ASTC_12x12_KHR: 1667 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1668 return 16 * ((width + 11) / 12); 1669 #if S3TC_SUPPORT 1670 case FORMAT_DXT3: 1671 case FORMAT_DXT5: 1672 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1673 #endif 1674 case FORMAT_ATI1: 1675 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1676 case FORMAT_ATI2: 1677 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1678 case FORMAT_YV12_BT601: 1679 case FORMAT_YV12_BT709: 1680 case FORMAT_YV12_JFIF: 1681 return align(width, 16); 1682 default: 1683 return bytes(format) * width; 1684 } 1685 } 1686 pitchP(int width,Format format,bool target)1687 int Surface::pitchP(int width, Format format, bool target) 1688 { 1689 int B = bytes(format); 1690 1691 return B > 0 ? pitchB(width, format, target) / B : 0; 1692 } 1693 sliceB(int width,int height,Format format,bool target)1694 int Surface::sliceB(int width, int height, Format format, bool target) 1695 { 1696 if(target || isDepth(format) || isStencil(format)) 1697 { 1698 height = ((height + 1) & ~1); 1699 } 1700 1701 switch(format) 1702 { 1703 #if S3TC_SUPPORT 1704 case FORMAT_DXT1: 1705 case FORMAT_DXT3: 1706 case FORMAT_DXT5: 1707 #endif 1708 case FORMAT_ETC1: 1709 case FORMAT_R11_EAC: 1710 case FORMAT_SIGNED_R11_EAC: 1711 case FORMAT_RG11_EAC: 1712 case FORMAT_SIGNED_RG11_EAC: 1713 case FORMAT_RGB8_ETC2: 1714 case FORMAT_SRGB8_ETC2: 1715 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1716 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1717 case FORMAT_RGBA8_ETC2_EAC: 1718 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1719 case FORMAT_RGBA_ASTC_4x4_KHR: 1720 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1721 case FORMAT_RGBA_ASTC_5x4_KHR: 1722 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1723 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1724 case FORMAT_RGBA_ASTC_5x5_KHR: 1725 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1726 case FORMAT_RGBA_ASTC_6x5_KHR: 1727 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1728 case FORMAT_RGBA_ASTC_8x5_KHR: 1729 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1730 case FORMAT_RGBA_ASTC_10x5_KHR: 1731 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1732 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1733 case FORMAT_RGBA_ASTC_6x6_KHR: 1734 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1735 case FORMAT_RGBA_ASTC_8x6_KHR: 1736 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1737 case FORMAT_RGBA_ASTC_10x6_KHR: 1738 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1739 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1740 case FORMAT_RGBA_ASTC_8x8_KHR: 1741 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1742 case FORMAT_RGBA_ASTC_10x8_KHR: 1743 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1744 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1745 case FORMAT_RGBA_ASTC_10x10_KHR: 1746 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1747 case FORMAT_RGBA_ASTC_12x10_KHR: 1748 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1749 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1750 case FORMAT_RGBA_ASTC_12x12_KHR: 1751 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1752 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1753 case FORMAT_ATI1: 1754 case FORMAT_ATI2: 1755 default: 1756 return pitchB(width, format, target) * height; // Pitch computed per row 1757 } 1758 } 1759 sliceP(int width,int height,Format format,bool target)1760 int Surface::sliceP(int width, int height, Format format, bool target) 1761 { 1762 int B = bytes(format); 1763 1764 return B > 0 ? sliceB(width, height, format, target) / B : 0; 1765 } 1766 update(Buffer & destination,Buffer & source)1767 void Surface::update(Buffer &destination, Buffer &source) 1768 { 1769 // ASSERT(source.lock != LOCK_UNLOCKED); 1770 // ASSERT(destination.lock != LOCK_UNLOCKED); 1771 1772 if(destination.buffer != source.buffer) 1773 { 1774 ASSERT(source.dirty && !destination.dirty); 1775 1776 switch(source.format) 1777 { 1778 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1779 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1780 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1781 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1782 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1783 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1784 #if S3TC_SUPPORT 1785 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1786 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1787 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1788 #endif 1789 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1790 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1791 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1792 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1793 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1794 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1795 case FORMAT_ETC1: 1796 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1797 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1798 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1799 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1800 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1801 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1802 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1803 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1804 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1805 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1806 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1807 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1808 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1809 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1810 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1811 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1812 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1813 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1814 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1815 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1816 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1817 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1818 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1819 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1820 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1821 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1822 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1823 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1824 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1825 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1826 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1827 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1828 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1829 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1830 default: genericUpdate(destination, source); break; 1831 } 1832 } 1833 } 1834 genericUpdate(Buffer & destination,Buffer & source)1835 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1836 { 1837 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1838 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1839 1840 int depth = min(destination.depth, source.depth); 1841 int height = min(destination.height, source.height); 1842 int width = min(destination.width, source.width); 1843 int rowBytes = width * source.bytes; 1844 1845 for(int z = 0; z < depth; z++) 1846 { 1847 unsigned char *sourceRow = sourceSlice; 1848 unsigned char *destinationRow = destinationSlice; 1849 1850 for(int y = 0; y < height; y++) 1851 { 1852 if(source.format == destination.format) 1853 { 1854 memcpy(destinationRow, sourceRow, rowBytes); 1855 } 1856 else 1857 { 1858 unsigned char *sourceElement = sourceRow; 1859 unsigned char *destinationElement = destinationRow; 1860 1861 for(int x = 0; x < width; x++) 1862 { 1863 Color<float> color = source.read(sourceElement); 1864 destination.write(destinationElement, color); 1865 1866 sourceElement += source.bytes; 1867 destinationElement += destination.bytes; 1868 } 1869 } 1870 1871 sourceRow += source.pitchB; 1872 destinationRow += destination.pitchB; 1873 } 1874 1875 sourceSlice += source.sliceB; 1876 destinationSlice += destination.sliceB; 1877 } 1878 } 1879 decodeR8G8B8(Buffer & destination,const Buffer & source)1880 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source) 1881 { 1882 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1883 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1884 1885 for(int z = 0; z < destination.depth && z < source.depth; z++) 1886 { 1887 unsigned char *sourceRow = sourceSlice; 1888 unsigned char *destinationRow = destinationSlice; 1889 1890 for(int y = 0; y < destination.height && y < source.height; y++) 1891 { 1892 unsigned char *sourceElement = sourceRow; 1893 unsigned char *destinationElement = destinationRow; 1894 1895 for(int x = 0; x < destination.width && x < source.width; x++) 1896 { 1897 unsigned int b = sourceElement[0]; 1898 unsigned int g = sourceElement[1]; 1899 unsigned int r = sourceElement[2]; 1900 1901 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1902 1903 sourceElement += source.bytes; 1904 destinationElement += destination.bytes; 1905 } 1906 1907 sourceRow += source.pitchB; 1908 destinationRow += destination.pitchB; 1909 } 1910 1911 sourceSlice += source.sliceB; 1912 destinationSlice += destination.sliceB; 1913 } 1914 } 1915 decodeX1R5G5B5(Buffer & destination,const Buffer & source)1916 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source) 1917 { 1918 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1919 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1920 1921 for(int z = 0; z < destination.depth && z < source.depth; z++) 1922 { 1923 unsigned char *sourceRow = sourceSlice; 1924 unsigned char *destinationRow = destinationSlice; 1925 1926 for(int y = 0; y < destination.height && y < source.height; y++) 1927 { 1928 unsigned char *sourceElement = sourceRow; 1929 unsigned char *destinationElement = destinationRow; 1930 1931 for(int x = 0; x < destination.width && x < source.width; x++) 1932 { 1933 unsigned int xrgb = *(unsigned short*)sourceElement; 1934 1935 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1936 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1937 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1938 1939 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1940 1941 sourceElement += source.bytes; 1942 destinationElement += destination.bytes; 1943 } 1944 1945 sourceRow += source.pitchB; 1946 destinationRow += destination.pitchB; 1947 } 1948 1949 sourceSlice += source.sliceB; 1950 destinationSlice += destination.sliceB; 1951 } 1952 } 1953 decodeA1R5G5B5(Buffer & destination,const Buffer & source)1954 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source) 1955 { 1956 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1957 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1958 1959 for(int z = 0; z < destination.depth && z < source.depth; z++) 1960 { 1961 unsigned char *sourceRow = sourceSlice; 1962 unsigned char *destinationRow = destinationSlice; 1963 1964 for(int y = 0; y < destination.height && y < source.height; y++) 1965 { 1966 unsigned char *sourceElement = sourceRow; 1967 unsigned char *destinationElement = destinationRow; 1968 1969 for(int x = 0; x < destination.width && x < source.width; x++) 1970 { 1971 unsigned int argb = *(unsigned short*)sourceElement; 1972 1973 unsigned int a = (argb & 0x8000) * 130560; 1974 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1975 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1976 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 1977 1978 *(unsigned int*)destinationElement = a | r | g | b; 1979 1980 sourceElement += source.bytes; 1981 destinationElement += destination.bytes; 1982 } 1983 1984 sourceRow += source.pitchB; 1985 destinationRow += destination.pitchB; 1986 } 1987 1988 sourceSlice += source.sliceB; 1989 destinationSlice += destination.sliceB; 1990 } 1991 } 1992 decodeX4R4G4B4(Buffer & destination,const Buffer & source)1993 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source) 1994 { 1995 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1996 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1997 1998 for(int z = 0; z < destination.depth && z < source.depth; z++) 1999 { 2000 unsigned char *sourceRow = sourceSlice; 2001 unsigned char *destinationRow = destinationSlice; 2002 2003 for(int y = 0; y < destination.height && y < source.height; y++) 2004 { 2005 unsigned char *sourceElement = sourceRow; 2006 unsigned char *destinationElement = destinationRow; 2007 2008 for(int x = 0; x < destination.width && x < source.width; x++) 2009 { 2010 unsigned int xrgb = *(unsigned short*)sourceElement; 2011 2012 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2013 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2014 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2015 2016 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2017 2018 sourceElement += source.bytes; 2019 destinationElement += destination.bytes; 2020 } 2021 2022 sourceRow += source.pitchB; 2023 destinationRow += destination.pitchB; 2024 } 2025 2026 sourceSlice += source.sliceB; 2027 destinationSlice += destination.sliceB; 2028 } 2029 } 2030 decodeA4R4G4B4(Buffer & destination,const Buffer & source)2031 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source) 2032 { 2033 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2034 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2035 2036 for(int z = 0; z < destination.depth && z < source.depth; z++) 2037 { 2038 unsigned char *sourceRow = sourceSlice; 2039 unsigned char *destinationRow = destinationSlice; 2040 2041 for(int y = 0; y < destination.height && y < source.height; y++) 2042 { 2043 unsigned char *sourceElement = sourceRow; 2044 unsigned char *destinationElement = destinationRow; 2045 2046 for(int x = 0; x < destination.width && x < source.width; x++) 2047 { 2048 unsigned int argb = *(unsigned short*)sourceElement; 2049 2050 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2051 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2052 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2053 unsigned int b = (argb & 0x000F) * 0x00000011; 2054 2055 *(unsigned int*)destinationElement = a | r | g | b; 2056 2057 sourceElement += source.bytes; 2058 destinationElement += destination.bytes; 2059 } 2060 2061 sourceRow += source.pitchB; 2062 destinationRow += destination.pitchB; 2063 } 2064 2065 sourceSlice += source.sliceB; 2066 destinationSlice += destination.sliceB; 2067 } 2068 } 2069 decodeP8(Buffer & destination,const Buffer & source)2070 void Surface::decodeP8(Buffer &destination, const Buffer &source) 2071 { 2072 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2073 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2074 2075 for(int z = 0; z < destination.depth && z < source.depth; z++) 2076 { 2077 unsigned char *sourceRow = sourceSlice; 2078 unsigned char *destinationRow = destinationSlice; 2079 2080 for(int y = 0; y < destination.height && y < source.height; y++) 2081 { 2082 unsigned char *sourceElement = sourceRow; 2083 unsigned char *destinationElement = destinationRow; 2084 2085 for(int x = 0; x < destination.width && x < source.width; x++) 2086 { 2087 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2088 2089 unsigned int r = (abgr & 0x000000FF) << 16; 2090 unsigned int g = (abgr & 0x0000FF00) << 0; 2091 unsigned int b = (abgr & 0x00FF0000) >> 16; 2092 unsigned int a = (abgr & 0xFF000000) >> 0; 2093 2094 *(unsigned int*)destinationElement = a | r | g | b; 2095 2096 sourceElement += source.bytes; 2097 destinationElement += destination.bytes; 2098 } 2099 2100 sourceRow += source.pitchB; 2101 destinationRow += destination.pitchB; 2102 } 2103 2104 sourceSlice += source.sliceB; 2105 destinationSlice += destination.sliceB; 2106 } 2107 } 2108 2109 #if S3TC_SUPPORT decodeDXT1(Buffer & internal,const Buffer & external)2110 void Surface::decodeDXT1(Buffer &internal, const Buffer &external) 2111 { 2112 unsigned int *destSlice = (unsigned int*)internal.buffer; 2113 const DXT1 *source = (const DXT1*)external.buffer; 2114 2115 for(int z = 0; z < external.depth; z++) 2116 { 2117 unsigned int *dest = destSlice; 2118 2119 for(int y = 0; y < external.height; y += 4) 2120 { 2121 for(int x = 0; x < external.width; x += 4) 2122 { 2123 Color<byte> c[4]; 2124 2125 c[0] = source->c0; 2126 c[1] = source->c1; 2127 2128 if(source->c0 > source->c1) // No transparency 2129 { 2130 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2131 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2132 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2133 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2134 c[2].a = 0xFF; 2135 2136 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2137 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2138 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2139 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2140 c[3].a = 0xFF; 2141 } 2142 else // c3 transparent 2143 { 2144 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2145 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2146 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2147 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2148 c[2].a = 0xFF; 2149 2150 c[3].r = 0; 2151 c[3].g = 0; 2152 c[3].b = 0; 2153 c[3].a = 0; 2154 } 2155 2156 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2157 { 2158 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2159 { 2160 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2161 } 2162 } 2163 2164 source++; 2165 } 2166 } 2167 2168 (byte*&)destSlice += internal.sliceB; 2169 } 2170 } 2171 decodeDXT3(Buffer & internal,const Buffer & external)2172 void Surface::decodeDXT3(Buffer &internal, const Buffer &external) 2173 { 2174 unsigned int *destSlice = (unsigned int*)internal.buffer; 2175 const DXT3 *source = (const DXT3*)external.buffer; 2176 2177 for(int z = 0; z < external.depth; z++) 2178 { 2179 unsigned int *dest = destSlice; 2180 2181 for(int y = 0; y < external.height; y += 4) 2182 { 2183 for(int x = 0; x < external.width; x += 4) 2184 { 2185 Color<byte> c[4]; 2186 2187 c[0] = source->c0; 2188 c[1] = source->c1; 2189 2190 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2191 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2192 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2193 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2194 2195 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2196 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2197 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2198 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2199 2200 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2201 { 2202 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2203 { 2204 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2205 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2206 2207 dest[(x + i) + (y + j) * internal.width] = color; 2208 } 2209 } 2210 2211 source++; 2212 } 2213 } 2214 2215 (byte*&)destSlice += internal.sliceB; 2216 } 2217 } 2218 decodeDXT5(Buffer & internal,const Buffer & external)2219 void Surface::decodeDXT5(Buffer &internal, const Buffer &external) 2220 { 2221 unsigned int *destSlice = (unsigned int*)internal.buffer; 2222 const DXT5 *source = (const DXT5*)external.buffer; 2223 2224 for(int z = 0; z < external.depth; z++) 2225 { 2226 unsigned int *dest = destSlice; 2227 2228 for(int y = 0; y < external.height; y += 4) 2229 { 2230 for(int x = 0; x < external.width; x += 4) 2231 { 2232 Color<byte> c[4]; 2233 2234 c[0] = source->c0; 2235 c[1] = source->c1; 2236 2237 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2238 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2239 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2240 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2241 2242 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2243 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2244 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2245 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2246 2247 byte a[8]; 2248 2249 a[0] = source->a0; 2250 a[1] = source->a1; 2251 2252 if(a[0] > a[1]) 2253 { 2254 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2255 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2256 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2257 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2258 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2259 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2260 } 2261 else 2262 { 2263 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2264 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2265 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2266 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2267 a[6] = 0; 2268 a[7] = 0xFF; 2269 } 2270 2271 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2272 { 2273 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2274 { 2275 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2276 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2277 2278 dest[(x + i) + (y + j) * internal.width] = color; 2279 } 2280 } 2281 2282 source++; 2283 } 2284 } 2285 2286 (byte*&)destSlice += internal.sliceB; 2287 } 2288 } 2289 #endif 2290 decodeATI1(Buffer & internal,const Buffer & external)2291 void Surface::decodeATI1(Buffer &internal, const Buffer &external) 2292 { 2293 byte *destSlice = (byte*)internal.buffer; 2294 const ATI1 *source = (const ATI1*)external.buffer; 2295 2296 for(int z = 0; z < external.depth; z++) 2297 { 2298 byte *dest = destSlice; 2299 2300 for(int y = 0; y < external.height; y += 4) 2301 { 2302 for(int x = 0; x < external.width; x += 4) 2303 { 2304 byte r[8]; 2305 2306 r[0] = source->r0; 2307 r[1] = source->r1; 2308 2309 if(r[0] > r[1]) 2310 { 2311 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2312 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2313 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2314 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2315 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2316 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2317 } 2318 else 2319 { 2320 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2321 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2322 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2323 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2324 r[6] = 0; 2325 r[7] = 0xFF; 2326 } 2327 2328 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2329 { 2330 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2331 { 2332 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2333 } 2334 } 2335 2336 source++; 2337 } 2338 } 2339 2340 destSlice += internal.sliceB; 2341 } 2342 } 2343 decodeATI2(Buffer & internal,const Buffer & external)2344 void Surface::decodeATI2(Buffer &internal, const Buffer &external) 2345 { 2346 word *destSlice = (word*)internal.buffer; 2347 const ATI2 *source = (const ATI2*)external.buffer; 2348 2349 for(int z = 0; z < external.depth; z++) 2350 { 2351 word *dest = destSlice; 2352 2353 for(int y = 0; y < external.height; y += 4) 2354 { 2355 for(int x = 0; x < external.width; x += 4) 2356 { 2357 byte X[8]; 2358 2359 X[0] = source->x0; 2360 X[1] = source->x1; 2361 2362 if(X[0] > X[1]) 2363 { 2364 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2365 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2366 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2367 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2368 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2369 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2370 } 2371 else 2372 { 2373 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2374 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2375 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2376 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2377 X[6] = 0; 2378 X[7] = 0xFF; 2379 } 2380 2381 byte Y[8]; 2382 2383 Y[0] = source->y0; 2384 Y[1] = source->y1; 2385 2386 if(Y[0] > Y[1]) 2387 { 2388 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2389 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2390 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2391 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2392 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2393 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2394 } 2395 else 2396 { 2397 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2398 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2399 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2400 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2401 Y[6] = 0; 2402 Y[7] = 0xFF; 2403 } 2404 2405 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2406 { 2407 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2408 { 2409 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2410 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2411 2412 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2413 } 2414 } 2415 2416 source++; 2417 } 2418 } 2419 2420 (byte*&)destSlice += internal.sliceB; 2421 } 2422 } 2423 decodeETC2(Buffer & internal,const Buffer & external,int nbAlphaBits,bool isSRGB)2424 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB) 2425 { 2426 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2427 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2428 2429 if(isSRGB) 2430 { 2431 static byte sRGBtoLinearTable[256]; 2432 static bool sRGBtoLinearTableDirty = true; 2433 if(sRGBtoLinearTableDirty) 2434 { 2435 for(int i = 0; i < 256; i++) 2436 { 2437 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2438 } 2439 sRGBtoLinearTableDirty = false; 2440 } 2441 2442 // Perform sRGB conversion in place after decoding 2443 byte* src = (byte*)internal.buffer; 2444 for(int y = 0; y < internal.height; y++) 2445 { 2446 byte* srcRow = src + y * internal.pitchB; 2447 for(int x = 0; x < internal.width; x++) 2448 { 2449 byte* srcPix = srcRow + x * internal.bytes; 2450 for(int i = 0; i < 3; i++) 2451 { 2452 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2453 } 2454 } 2455 } 2456 } 2457 } 2458 decodeEAC(Buffer & internal,const Buffer & external,int nbChannels,bool isSigned)2459 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned) 2460 { 2461 ASSERT(nbChannels == 1 || nbChannels == 2); 2462 2463 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2464 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2465 2466 // FIXME: We convert signed data to float, until signed integer internal formats are supported 2467 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats 2468 if(isSigned) 2469 { 2470 sbyte* src = (sbyte*)internal.buffer; 2471 2472 for(int y = 0; y < internal.height; y++) 2473 { 2474 sbyte* srcRow = src + y * internal.pitchB; 2475 for(int x = internal.width - 1; x >= 0; x--) 2476 { 2477 int dx = x & 0xFFFFFFFC; 2478 int mx = x - dx; 2479 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels; 2480 float* dstPix = (float*)(srcRow + x * internal.bytes); 2481 for(int c = nbChannels - 1; c >= 0; c--) 2482 { 2483 static const float normalization = 1.0f / 127.875f; 2484 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2485 } 2486 } 2487 } 2488 } 2489 } 2490 decodeASTC(Buffer & internal,const Buffer & external,int xBlockSize,int yBlockSize,int zBlockSize,bool isSRGB)2491 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2492 { 2493 } 2494 size(int width,int height,int depth,Format format)2495 unsigned int Surface::size(int width, int height, int depth, Format format) 2496 { 2497 // Dimensions rounded up to multiples of 4, used for compressed formats 2498 int width4 = align(width, 4); 2499 int height4 = align(height, 4); 2500 2501 switch(format) 2502 { 2503 #if S3TC_SUPPORT 2504 case FORMAT_DXT1: 2505 #endif 2506 case FORMAT_ATI1: 2507 case FORMAT_ETC1: 2508 case FORMAT_R11_EAC: 2509 case FORMAT_SIGNED_R11_EAC: 2510 case FORMAT_RGB8_ETC2: 2511 case FORMAT_SRGB8_ETC2: 2512 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2513 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2514 return width4 * height4 * depth / 2; 2515 #if S3TC_SUPPORT 2516 case FORMAT_DXT3: 2517 case FORMAT_DXT5: 2518 #endif 2519 case FORMAT_ATI2: 2520 case FORMAT_RG11_EAC: 2521 case FORMAT_SIGNED_RG11_EAC: 2522 case FORMAT_RGBA8_ETC2_EAC: 2523 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2524 case FORMAT_RGBA_ASTC_4x4_KHR: 2525 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2526 return width4 * height4 * depth; 2527 case FORMAT_RGBA_ASTC_5x4_KHR: 2528 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2529 return align(width, 5) * height4 * depth; 2530 case FORMAT_RGBA_ASTC_5x5_KHR: 2531 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2532 return align(width, 5) * align(height, 5) * depth; 2533 case FORMAT_RGBA_ASTC_6x5_KHR: 2534 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2535 return align(width, 6) * align(height, 5) * depth; 2536 case FORMAT_RGBA_ASTC_6x6_KHR: 2537 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2538 return align(width, 6) * align(height, 6) * depth; 2539 case FORMAT_RGBA_ASTC_8x5_KHR: 2540 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2541 return align(width, 8) * align(height, 5) * depth; 2542 case FORMAT_RGBA_ASTC_8x6_KHR: 2543 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2544 return align(width, 8) * align(height, 6) * depth; 2545 case FORMAT_RGBA_ASTC_8x8_KHR: 2546 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2547 return align(width, 8) * align(height, 8) * depth; 2548 case FORMAT_RGBA_ASTC_10x5_KHR: 2549 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2550 return align(width, 10) * align(height, 5) * depth; 2551 case FORMAT_RGBA_ASTC_10x6_KHR: 2552 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2553 return align(width, 10) * align(height, 6) * depth; 2554 case FORMAT_RGBA_ASTC_10x8_KHR: 2555 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2556 return align(width, 10) * align(height, 8) * depth; 2557 case FORMAT_RGBA_ASTC_10x10_KHR: 2558 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2559 return align(width, 10) * align(height, 10) * depth; 2560 case FORMAT_RGBA_ASTC_12x10_KHR: 2561 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2562 return align(width, 12) * align(height, 10) * depth; 2563 case FORMAT_RGBA_ASTC_12x12_KHR: 2564 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2565 return align(width, 12) * align(height, 12) * depth; 2566 case FORMAT_YV12_BT601: 2567 case FORMAT_YV12_BT709: 2568 case FORMAT_YV12_JFIF: 2569 { 2570 unsigned int YStride = align(width, 16); 2571 unsigned int YSize = YStride * height; 2572 unsigned int CStride = align(YStride / 2, 16); 2573 unsigned int CSize = CStride * height / 2; 2574 2575 return YSize + 2 * CSize; 2576 } 2577 default: 2578 return bytes(format) * width * height * depth; 2579 } 2580 2581 return 0; 2582 } 2583 isStencil(Format format)2584 bool Surface::isStencil(Format format) 2585 { 2586 switch(format) 2587 { 2588 case FORMAT_D32: 2589 case FORMAT_D16: 2590 case FORMAT_D24X8: 2591 case FORMAT_D32F: 2592 case FORMAT_D32F_COMPLEMENTARY: 2593 case FORMAT_D32F_LOCKABLE: 2594 return false; 2595 case FORMAT_D24S8: 2596 case FORMAT_D24FS8: 2597 case FORMAT_S8: 2598 case FORMAT_DF24S8: 2599 case FORMAT_DF16S8: 2600 case FORMAT_D32FS8_TEXTURE: 2601 case FORMAT_D32FS8_SHADOW: 2602 case FORMAT_INTZ: 2603 return true; 2604 default: 2605 return false; 2606 } 2607 } 2608 isDepth(Format format)2609 bool Surface::isDepth(Format format) 2610 { 2611 switch(format) 2612 { 2613 case FORMAT_D32: 2614 case FORMAT_D16: 2615 case FORMAT_D24X8: 2616 case FORMAT_D24S8: 2617 case FORMAT_D24FS8: 2618 case FORMAT_D32F: 2619 case FORMAT_D32F_COMPLEMENTARY: 2620 case FORMAT_D32F_LOCKABLE: 2621 case FORMAT_DF24S8: 2622 case FORMAT_DF16S8: 2623 case FORMAT_D32FS8_TEXTURE: 2624 case FORMAT_D32FS8_SHADOW: 2625 case FORMAT_INTZ: 2626 return true; 2627 case FORMAT_S8: 2628 return false; 2629 default: 2630 return false; 2631 } 2632 } 2633 isPalette(Format format)2634 bool Surface::isPalette(Format format) 2635 { 2636 switch(format) 2637 { 2638 case FORMAT_P8: 2639 case FORMAT_A8P8: 2640 return true; 2641 default: 2642 return false; 2643 } 2644 } 2645 isFloatFormat(Format format)2646 bool Surface::isFloatFormat(Format format) 2647 { 2648 switch(format) 2649 { 2650 case FORMAT_R5G6B5: 2651 case FORMAT_R8G8B8: 2652 case FORMAT_B8G8R8: 2653 case FORMAT_X8R8G8B8: 2654 case FORMAT_X8B8G8R8I: 2655 case FORMAT_X8B8G8R8: 2656 case FORMAT_A8R8G8B8: 2657 case FORMAT_SRGB8_X8: 2658 case FORMAT_SRGB8_A8: 2659 case FORMAT_A8B8G8R8I: 2660 case FORMAT_R8UI: 2661 case FORMAT_G8R8UI: 2662 case FORMAT_X8B8G8R8UI: 2663 case FORMAT_A8B8G8R8UI: 2664 case FORMAT_A8B8G8R8: 2665 case FORMAT_G8R8I: 2666 case FORMAT_G8R8: 2667 case FORMAT_A2B10G10R10: 2668 case FORMAT_R8I_SNORM: 2669 case FORMAT_G8R8I_SNORM: 2670 case FORMAT_X8B8G8R8I_SNORM: 2671 case FORMAT_A8B8G8R8I_SNORM: 2672 case FORMAT_R16I: 2673 case FORMAT_R16UI: 2674 case FORMAT_G16R16I: 2675 case FORMAT_G16R16UI: 2676 case FORMAT_G16R16: 2677 case FORMAT_X16B16G16R16I: 2678 case FORMAT_X16B16G16R16UI: 2679 case FORMAT_A16B16G16R16I: 2680 case FORMAT_A16B16G16R16UI: 2681 case FORMAT_A16B16G16R16: 2682 case FORMAT_V8U8: 2683 case FORMAT_Q8W8V8U8: 2684 case FORMAT_X8L8V8U8: 2685 case FORMAT_V16U16: 2686 case FORMAT_A16W16V16U16: 2687 case FORMAT_Q16W16V16U16: 2688 case FORMAT_A8: 2689 case FORMAT_R8I: 2690 case FORMAT_R8: 2691 case FORMAT_L8: 2692 case FORMAT_L16: 2693 case FORMAT_A8L8: 2694 case FORMAT_YV12_BT601: 2695 case FORMAT_YV12_BT709: 2696 case FORMAT_YV12_JFIF: 2697 case FORMAT_R32I: 2698 case FORMAT_R32UI: 2699 case FORMAT_G32R32I: 2700 case FORMAT_G32R32UI: 2701 case FORMAT_X32B32G32R32I: 2702 case FORMAT_X32B32G32R32UI: 2703 case FORMAT_A32B32G32R32I: 2704 case FORMAT_A32B32G32R32UI: 2705 return false; 2706 case FORMAT_R32F: 2707 case FORMAT_G32R32F: 2708 case FORMAT_X32B32G32R32F: 2709 case FORMAT_A32B32G32R32F: 2710 case FORMAT_D32F: 2711 case FORMAT_D32F_COMPLEMENTARY: 2712 case FORMAT_D32F_LOCKABLE: 2713 case FORMAT_D32FS8_TEXTURE: 2714 case FORMAT_D32FS8_SHADOW: 2715 case FORMAT_L16F: 2716 case FORMAT_A16L16F: 2717 case FORMAT_L32F: 2718 case FORMAT_A32L32F: 2719 return true; 2720 default: 2721 ASSERT(false); 2722 } 2723 2724 return false; 2725 } 2726 isUnsignedComponent(Format format,int component)2727 bool Surface::isUnsignedComponent(Format format, int component) 2728 { 2729 switch(format) 2730 { 2731 case FORMAT_NULL: 2732 case FORMAT_R5G6B5: 2733 case FORMAT_R8G8B8: 2734 case FORMAT_B8G8R8: 2735 case FORMAT_X8R8G8B8: 2736 case FORMAT_X8B8G8R8: 2737 case FORMAT_A8R8G8B8: 2738 case FORMAT_A8B8G8R8: 2739 case FORMAT_SRGB8_X8: 2740 case FORMAT_SRGB8_A8: 2741 case FORMAT_G8R8: 2742 case FORMAT_A2B10G10R10: 2743 case FORMAT_R16UI: 2744 case FORMAT_G16R16: 2745 case FORMAT_G16R16UI: 2746 case FORMAT_X16B16G16R16UI: 2747 case FORMAT_A16B16G16R16: 2748 case FORMAT_A16B16G16R16UI: 2749 case FORMAT_R32UI: 2750 case FORMAT_G32R32UI: 2751 case FORMAT_X32B32G32R32UI: 2752 case FORMAT_A32B32G32R32UI: 2753 case FORMAT_R8UI: 2754 case FORMAT_G8R8UI: 2755 case FORMAT_X8B8G8R8UI: 2756 case FORMAT_A8B8G8R8UI: 2757 case FORMAT_D32F: 2758 case FORMAT_D32F_COMPLEMENTARY: 2759 case FORMAT_D32F_LOCKABLE: 2760 case FORMAT_D32FS8_TEXTURE: 2761 case FORMAT_D32FS8_SHADOW: 2762 case FORMAT_A8: 2763 case FORMAT_R8: 2764 case FORMAT_L8: 2765 case FORMAT_L16: 2766 case FORMAT_A8L8: 2767 case FORMAT_YV12_BT601: 2768 case FORMAT_YV12_BT709: 2769 case FORMAT_YV12_JFIF: 2770 return true; 2771 case FORMAT_A8B8G8R8I: 2772 case FORMAT_A16B16G16R16I: 2773 case FORMAT_A32B32G32R32I: 2774 case FORMAT_A8B8G8R8I_SNORM: 2775 case FORMAT_Q8W8V8U8: 2776 case FORMAT_Q16W16V16U16: 2777 case FORMAT_A32B32G32R32F: 2778 return false; 2779 case FORMAT_R32F: 2780 case FORMAT_R8I: 2781 case FORMAT_R16I: 2782 case FORMAT_R32I: 2783 case FORMAT_R8I_SNORM: 2784 return component >= 1; 2785 case FORMAT_V8U8: 2786 case FORMAT_X8L8V8U8: 2787 case FORMAT_V16U16: 2788 case FORMAT_G32R32F: 2789 case FORMAT_G8R8I: 2790 case FORMAT_G16R16I: 2791 case FORMAT_G32R32I: 2792 case FORMAT_G8R8I_SNORM: 2793 return component >= 2; 2794 case FORMAT_A16W16V16U16: 2795 case FORMAT_X32B32G32R32F: 2796 case FORMAT_X8B8G8R8I: 2797 case FORMAT_X16B16G16R16I: 2798 case FORMAT_X32B32G32R32I: 2799 case FORMAT_X8B8G8R8I_SNORM: 2800 return component >= 3; 2801 default: 2802 ASSERT(false); 2803 } 2804 2805 return false; 2806 } 2807 isSRGBreadable(Format format)2808 bool Surface::isSRGBreadable(Format format) 2809 { 2810 // Keep in sync with Capabilities::isSRGBreadable 2811 switch(format) 2812 { 2813 case FORMAT_L8: 2814 case FORMAT_A8L8: 2815 case FORMAT_R8G8B8: 2816 case FORMAT_A8R8G8B8: 2817 case FORMAT_X8R8G8B8: 2818 case FORMAT_A8B8G8R8: 2819 case FORMAT_X8B8G8R8: 2820 case FORMAT_SRGB8_X8: 2821 case FORMAT_SRGB8_A8: 2822 case FORMAT_R5G6B5: 2823 case FORMAT_X1R5G5B5: 2824 case FORMAT_A1R5G5B5: 2825 case FORMAT_A4R4G4B4: 2826 #if S3TC_SUPPORT 2827 case FORMAT_DXT1: 2828 case FORMAT_DXT3: 2829 case FORMAT_DXT5: 2830 #endif 2831 case FORMAT_ATI1: 2832 case FORMAT_ATI2: 2833 return true; 2834 default: 2835 return false; 2836 } 2837 2838 return false; 2839 } 2840 isSRGBwritable(Format format)2841 bool Surface::isSRGBwritable(Format format) 2842 { 2843 // Keep in sync with Capabilities::isSRGBwritable 2844 switch(format) 2845 { 2846 case FORMAT_NULL: 2847 case FORMAT_A8R8G8B8: 2848 case FORMAT_X8R8G8B8: 2849 case FORMAT_A8B8G8R8: 2850 case FORMAT_X8B8G8R8: 2851 case FORMAT_SRGB8_X8: 2852 case FORMAT_SRGB8_A8: 2853 case FORMAT_R5G6B5: 2854 return true; 2855 default: 2856 return false; 2857 } 2858 } 2859 isCompressed(Format format)2860 bool Surface::isCompressed(Format format) 2861 { 2862 switch(format) 2863 { 2864 #if S3TC_SUPPORT 2865 case FORMAT_DXT1: 2866 case FORMAT_DXT3: 2867 case FORMAT_DXT5: 2868 #endif 2869 case FORMAT_ATI1: 2870 case FORMAT_ATI2: 2871 case FORMAT_ETC1: 2872 case FORMAT_R11_EAC: 2873 case FORMAT_SIGNED_R11_EAC: 2874 case FORMAT_RG11_EAC: 2875 case FORMAT_SIGNED_RG11_EAC: 2876 case FORMAT_RGB8_ETC2: 2877 case FORMAT_SRGB8_ETC2: 2878 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2879 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2880 case FORMAT_RGBA8_ETC2_EAC: 2881 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2882 case FORMAT_RGBA_ASTC_4x4_KHR: 2883 case FORMAT_RGBA_ASTC_5x4_KHR: 2884 case FORMAT_RGBA_ASTC_5x5_KHR: 2885 case FORMAT_RGBA_ASTC_6x5_KHR: 2886 case FORMAT_RGBA_ASTC_6x6_KHR: 2887 case FORMAT_RGBA_ASTC_8x5_KHR: 2888 case FORMAT_RGBA_ASTC_8x6_KHR: 2889 case FORMAT_RGBA_ASTC_8x8_KHR: 2890 case FORMAT_RGBA_ASTC_10x5_KHR: 2891 case FORMAT_RGBA_ASTC_10x6_KHR: 2892 case FORMAT_RGBA_ASTC_10x8_KHR: 2893 case FORMAT_RGBA_ASTC_10x10_KHR: 2894 case FORMAT_RGBA_ASTC_12x10_KHR: 2895 case FORMAT_RGBA_ASTC_12x12_KHR: 2896 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2897 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2898 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2899 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2900 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2901 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2902 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2903 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2904 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2905 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2906 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2907 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2908 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2909 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2910 return true; 2911 default: 2912 return false; 2913 } 2914 } 2915 isNonNormalizedInteger(Format format)2916 bool Surface::isNonNormalizedInteger(Format format) 2917 { 2918 switch(format) 2919 { 2920 case FORMAT_A8B8G8R8I: 2921 case FORMAT_X8B8G8R8I: 2922 case FORMAT_G8R8I: 2923 case FORMAT_R8I: 2924 case FORMAT_A8B8G8R8UI: 2925 case FORMAT_X8B8G8R8UI: 2926 case FORMAT_G8R8UI: 2927 case FORMAT_R8UI: 2928 case FORMAT_A16B16G16R16I: 2929 case FORMAT_X16B16G16R16I: 2930 case FORMAT_G16R16I: 2931 case FORMAT_R16I: 2932 case FORMAT_A16B16G16R16UI: 2933 case FORMAT_X16B16G16R16UI: 2934 case FORMAT_G16R16UI: 2935 case FORMAT_R16UI: 2936 case FORMAT_A32B32G32R32I: 2937 case FORMAT_X32B32G32R32I: 2938 case FORMAT_G32R32I: 2939 case FORMAT_R32I: 2940 case FORMAT_A32B32G32R32UI: 2941 case FORMAT_X32B32G32R32UI: 2942 case FORMAT_G32R32UI: 2943 case FORMAT_R32UI: 2944 return true; 2945 default: 2946 return false; 2947 } 2948 } 2949 componentCount(Format format)2950 int Surface::componentCount(Format format) 2951 { 2952 switch(format) 2953 { 2954 case FORMAT_R5G6B5: return 3; 2955 case FORMAT_X8R8G8B8: return 3; 2956 case FORMAT_X8B8G8R8I: return 3; 2957 case FORMAT_X8B8G8R8: return 3; 2958 case FORMAT_A8R8G8B8: return 4; 2959 case FORMAT_SRGB8_X8: return 3; 2960 case FORMAT_SRGB8_A8: return 4; 2961 case FORMAT_A8B8G8R8I: return 4; 2962 case FORMAT_A8B8G8R8: return 4; 2963 case FORMAT_G8R8I: return 2; 2964 case FORMAT_G8R8: return 2; 2965 case FORMAT_R8I_SNORM: return 1; 2966 case FORMAT_G8R8I_SNORM: return 2; 2967 case FORMAT_X8B8G8R8I_SNORM:return 3; 2968 case FORMAT_A8B8G8R8I_SNORM:return 4; 2969 case FORMAT_R8UI: return 1; 2970 case FORMAT_G8R8UI: return 2; 2971 case FORMAT_X8B8G8R8UI: return 3; 2972 case FORMAT_A8B8G8R8UI: return 4; 2973 case FORMAT_A2B10G10R10: return 4; 2974 case FORMAT_G16R16I: return 2; 2975 case FORMAT_G16R16UI: return 2; 2976 case FORMAT_G16R16: return 2; 2977 case FORMAT_G32R32I: return 2; 2978 case FORMAT_G32R32UI: return 2; 2979 case FORMAT_X16B16G16R16I: return 3; 2980 case FORMAT_X16B16G16R16UI: return 3; 2981 case FORMAT_A16B16G16R16I: return 4; 2982 case FORMAT_A16B16G16R16UI: return 4; 2983 case FORMAT_A16B16G16R16: return 4; 2984 case FORMAT_X32B32G32R32I: return 3; 2985 case FORMAT_X32B32G32R32UI: return 3; 2986 case FORMAT_A32B32G32R32I: return 4; 2987 case FORMAT_A32B32G32R32UI: return 4; 2988 case FORMAT_V8U8: return 2; 2989 case FORMAT_Q8W8V8U8: return 4; 2990 case FORMAT_X8L8V8U8: return 3; 2991 case FORMAT_V16U16: return 2; 2992 case FORMAT_A16W16V16U16: return 4; 2993 case FORMAT_Q16W16V16U16: return 4; 2994 case FORMAT_R32F: return 1; 2995 case FORMAT_G32R32F: return 2; 2996 case FORMAT_X32B32G32R32F: return 3; 2997 case FORMAT_A32B32G32R32F: return 4; 2998 case FORMAT_D32F: return 1; 2999 case FORMAT_D32F_LOCKABLE: return 1; 3000 case FORMAT_D32FS8_TEXTURE: return 1; 3001 case FORMAT_D32FS8_SHADOW: return 1; 3002 case FORMAT_A8: return 1; 3003 case FORMAT_R8I: return 1; 3004 case FORMAT_R8: return 1; 3005 case FORMAT_R16I: return 1; 3006 case FORMAT_R16UI: return 1; 3007 case FORMAT_R32I: return 1; 3008 case FORMAT_R32UI: return 1; 3009 case FORMAT_L8: return 1; 3010 case FORMAT_L16: return 1; 3011 case FORMAT_A8L8: return 2; 3012 case FORMAT_YV12_BT601: return 3; 3013 case FORMAT_YV12_BT709: return 3; 3014 case FORMAT_YV12_JFIF: return 3; 3015 default: 3016 ASSERT(false); 3017 } 3018 3019 return 1; 3020 } 3021 allocateBuffer(int width,int height,int depth,Format format)3022 void *Surface::allocateBuffer(int width, int height, int depth, Format format) 3023 { 3024 // Render targets require 2x2 quads 3025 int width2 = (width + 1) & ~1; 3026 int height2 = (height + 1) & ~1; 3027 3028 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 3029 // so we have to allocate 4 extra bytes to avoid buffer overruns. 3030 return allocateZero(size(width2, height2, depth, format) + 4); 3031 } 3032 memfill4(void * buffer,int pattern,int bytes)3033 void Surface::memfill4(void *buffer, int pattern, int bytes) 3034 { 3035 while((size_t)buffer & 0x1 && bytes >= 1) 3036 { 3037 *(char*)buffer = (char)pattern; 3038 (char*&)buffer += 1; 3039 bytes -= 1; 3040 } 3041 3042 while((size_t)buffer & 0x3 && bytes >= 2) 3043 { 3044 *(short*)buffer = (short)pattern; 3045 (short*&)buffer += 1; 3046 bytes -= 2; 3047 } 3048 3049 if(CPUID::supportsSSE()) 3050 { 3051 while((size_t)buffer & 0xF && bytes >= 4) 3052 { 3053 *(int*)buffer = pattern; 3054 (int*&)buffer += 1; 3055 bytes -= 4; 3056 } 3057 3058 __m128 quad = _mm_set_ps1((float&)pattern); 3059 3060 float *pointer = (float*)buffer; 3061 int qxwords = bytes / 64; 3062 bytes -= qxwords * 64; 3063 3064 while(qxwords--) 3065 { 3066 _mm_stream_ps(pointer + 0, quad); 3067 _mm_stream_ps(pointer + 4, quad); 3068 _mm_stream_ps(pointer + 8, quad); 3069 _mm_stream_ps(pointer + 12, quad); 3070 3071 pointer += 16; 3072 } 3073 3074 buffer = pointer; 3075 } 3076 3077 while(bytes >= 4) 3078 { 3079 *(int*)buffer = (int)pattern; 3080 (int*&)buffer += 1; 3081 bytes -= 4; 3082 } 3083 3084 while(bytes >= 2) 3085 { 3086 *(short*)buffer = (short)pattern; 3087 (short*&)buffer += 1; 3088 bytes -= 2; 3089 } 3090 3091 while(bytes >= 1) 3092 { 3093 *(char*)buffer = (char)pattern; 3094 (char*&)buffer += 1; 3095 bytes -= 1; 3096 } 3097 } 3098 isEntire(const SliceRect & rect) const3099 bool Surface::isEntire(const SliceRect& rect) const 3100 { 3101 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3102 } 3103 getRect() const3104 SliceRect Surface::getRect() const 3105 { 3106 return SliceRect(0, 0, internal.width, internal.height, 0); 3107 } 3108 clearDepth(float depth,int x0,int y0,int width,int height)3109 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3110 { 3111 if(width == 0 || height == 0) return; 3112 3113 // Not overlapping 3114 if(x0 > internal.width) return; 3115 if(y0 > internal.height) return; 3116 if(x0 + width < 0) return; 3117 if(y0 + height < 0) return; 3118 3119 // Clip against dimensions 3120 if(x0 < 0) {width += x0; x0 = 0;} 3121 if(x0 + width > internal.width) width = internal.width - x0; 3122 if(y0 < 0) {height += y0; y0 = 0;} 3123 if(y0 + height > internal.height) height = internal.height - y0; 3124 3125 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3126 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3127 3128 int width2 = (internal.width + 1) & ~1; 3129 3130 int x1 = x0 + width; 3131 int y1 = y0 + height; 3132 3133 if(internal.format == FORMAT_D32F_LOCKABLE || 3134 internal.format == FORMAT_D32FS8_TEXTURE || 3135 internal.format == FORMAT_D32FS8_SHADOW) 3136 { 3137 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0; 3138 3139 for(int z = 0; z < internal.depth; z++) 3140 { 3141 for(int y = y0; y < y1; y++) 3142 { 3143 memfill4(target, (int&)depth, 4 * width); 3144 target += width2; 3145 } 3146 } 3147 3148 unlockInternal(); 3149 } 3150 else // Quad layout 3151 { 3152 if(complementaryDepthBuffer) 3153 { 3154 depth = 1 - depth; 3155 } 3156 3157 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3158 3159 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3160 int oddX1 = (x1 & ~1) * 2; 3161 int evenX0 = ((x0 + 1) & ~1) * 2; 3162 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3163 3164 for(int z = 0; z < internal.depth; z++) 3165 { 3166 for(int y = y0; y < y1; y++) 3167 { 3168 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3169 3170 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3171 { 3172 if((x0 & 1) != 0) 3173 { 3174 target[oddX0 + 0] = depth; 3175 target[oddX0 + 2] = depth; 3176 } 3177 3178 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3179 // { 3180 // target[x2 + 0] = depth; 3181 // target[x2 + 1] = depth; 3182 // target[x2 + 2] = depth; 3183 // target[x2 + 3] = depth; 3184 // } 3185 3186 // __asm 3187 // { 3188 // movss xmm0, depth 3189 // shufps xmm0, xmm0, 0x00 3190 // 3191 // mov eax, x0 3192 // add eax, 1 3193 // and eax, 0xFFFFFFFE 3194 // cmp eax, x1 3195 // jge qEnd 3196 // 3197 // mov edi, target 3198 // 3199 // qLoop: 3200 // movntps [edi+8*eax], xmm0 3201 // 3202 // add eax, 2 3203 // cmp eax, x1 3204 // jl qLoop 3205 // qEnd: 3206 // } 3207 3208 memfill4(&target[evenX0], (int&)depth, evenBytes); 3209 3210 if((x1 & 1) != 0) 3211 { 3212 target[oddX1 + 0] = depth; 3213 target[oddX1 + 2] = depth; 3214 } 3215 3216 y++; 3217 } 3218 else 3219 { 3220 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3221 { 3222 target[i] = depth; 3223 } 3224 } 3225 } 3226 3227 buffer += internal.sliceP; 3228 } 3229 3230 unlockInternal(); 3231 } 3232 } 3233 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3234 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3235 { 3236 if(mask == 0 || width == 0 || height == 0) return; 3237 3238 // Not overlapping 3239 if(x0 > internal.width) return; 3240 if(y0 > internal.height) return; 3241 if(x0 + width < 0) return; 3242 if(y0 + height < 0) return; 3243 3244 // Clip against dimensions 3245 if(x0 < 0) {width += x0; x0 = 0;} 3246 if(x0 + width > internal.width) width = internal.width - x0; 3247 if(y0 < 0) {height += y0; y0 = 0;} 3248 if(y0 + height > internal.height) height = internal.height - y0; 3249 3250 int width2 = (internal.width + 1) & ~1; 3251 3252 int x1 = x0 + width; 3253 int y1 = y0 + height; 3254 3255 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3256 int oddX1 = (x1 & ~1) * 2; 3257 int evenX0 = ((x0 + 1) & ~1) * 2; 3258 int evenBytes = oddX1 - evenX0; 3259 3260 unsigned char maskedS = s & mask; 3261 unsigned char invMask = ~mask; 3262 unsigned int fill = maskedS; 3263 fill = fill | (fill << 8) | (fill << 16) + (fill << 24); 3264 3265 char *buffer = (char*)lockStencil(0, PUBLIC); 3266 3267 // Stencil buffers are assumed to use quad layout 3268 for(int z = 0; z < stencil.depth; z++) 3269 { 3270 for(int y = y0; y < y1; y++) 3271 { 3272 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3273 3274 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3275 { 3276 if((x0 & 1) != 0) 3277 { 3278 target[oddX0 + 0] = fill; 3279 target[oddX0 + 2] = fill; 3280 } 3281 3282 memfill4(&target[evenX0], fill, evenBytes); 3283 3284 if((x1 & 1) != 0) 3285 { 3286 target[oddX1 + 0] = fill; 3287 target[oddX1 + 2] = fill; 3288 } 3289 3290 y++; 3291 } 3292 else 3293 { 3294 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3295 { 3296 target[i] = maskedS | (target[i] & invMask); 3297 } 3298 } 3299 } 3300 3301 buffer += stencil.sliceP; 3302 } 3303 3304 unlockStencil(); 3305 } 3306 fill(const Color<float> & color,int x0,int y0,int width,int height)3307 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3308 { 3309 unsigned char *row; 3310 Buffer *buffer; 3311 3312 if(internal.dirty) 3313 { 3314 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3315 buffer = &internal; 3316 } 3317 else 3318 { 3319 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3320 buffer = &external; 3321 } 3322 3323 if(buffer->bytes <= 4) 3324 { 3325 int c; 3326 buffer->write(&c, color); 3327 3328 if(buffer->bytes <= 1) c = (c << 8) | c; 3329 if(buffer->bytes <= 2) c = (c << 16) | c; 3330 3331 for(int y = 0; y < height; y++) 3332 { 3333 memfill4(row, c, width * buffer->bytes); 3334 3335 row += buffer->pitchB; 3336 } 3337 } 3338 else // Generic 3339 { 3340 for(int y = 0; y < height; y++) 3341 { 3342 unsigned char *element = row; 3343 3344 for(int x = 0; x < width; x++) 3345 { 3346 buffer->write(element, color); 3347 3348 element += buffer->bytes; 3349 } 3350 3351 row += buffer->pitchB; 3352 } 3353 } 3354 3355 if(buffer == &internal) 3356 { 3357 unlockInternal(); 3358 } 3359 else 3360 { 3361 unlockExternal(); 3362 } 3363 } 3364 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3365 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter) 3366 { 3367 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3368 3369 sw::Color<float> color; 3370 3371 if(!filter) 3372 { 3373 color = source->internal.read((int)srcX, (int)srcY); 3374 } 3375 else // Bilinear filtering 3376 { 3377 color = source->internal.sample(srcX, srcY); 3378 } 3379 3380 internal.write(x, y, color); 3381 } 3382 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3383 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3384 { 3385 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3386 3387 sw::Color<float> color; 3388 3389 if(!filter) 3390 { 3391 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3392 } 3393 else // Bilinear filtering 3394 { 3395 color = source->internal.sample(srcX, srcY, srcZ); 3396 } 3397 3398 internal.write(x, y, z, color); 3399 } 3400 hasStencil() const3401 bool Surface::hasStencil() const 3402 { 3403 return isStencil(external.format); 3404 } 3405 hasDepth() const3406 bool Surface::hasDepth() const 3407 { 3408 return isDepth(external.format); 3409 } 3410 hasPalette() const3411 bool Surface::hasPalette() const 3412 { 3413 return isPalette(external.format); 3414 } 3415 isRenderTarget() const3416 bool Surface::isRenderTarget() const 3417 { 3418 return renderTarget; 3419 } 3420 hasDirtyMipmaps() const3421 bool Surface::hasDirtyMipmaps() const 3422 { 3423 return dirtyMipmaps; 3424 } 3425 cleanMipmaps()3426 void Surface::cleanMipmaps() 3427 { 3428 dirtyMipmaps = false; 3429 } 3430 getResource()3431 Resource *Surface::getResource() 3432 { 3433 return resource; 3434 } 3435 identicalFormats() const3436 bool Surface::identicalFormats() const 3437 { 3438 return external.format == internal.format && 3439 external.width == internal.width && 3440 external.height == internal.height && 3441 external.depth == internal.depth && 3442 external.pitchB == internal.pitchB && 3443 external.sliceB == internal.sliceB; 3444 } 3445 selectInternalFormat(Format format) const3446 Format Surface::selectInternalFormat(Format format) const 3447 { 3448 switch(format) 3449 { 3450 case FORMAT_NULL: 3451 return FORMAT_NULL; 3452 case FORMAT_P8: 3453 case FORMAT_A8P8: 3454 case FORMAT_A4R4G4B4: 3455 case FORMAT_A1R5G5B5: 3456 case FORMAT_A8R3G3B2: 3457 return FORMAT_A8R8G8B8; 3458 case FORMAT_A8: 3459 return FORMAT_A8; 3460 case FORMAT_R8I: 3461 return FORMAT_R8I; 3462 case FORMAT_R8UI: 3463 return FORMAT_R8UI; 3464 case FORMAT_R8I_SNORM: 3465 return FORMAT_R8I_SNORM; 3466 case FORMAT_R8: 3467 return FORMAT_R8; 3468 case FORMAT_R16I: 3469 return FORMAT_R16I; 3470 case FORMAT_R16UI: 3471 return FORMAT_R16UI; 3472 case FORMAT_R32I: 3473 return FORMAT_R32I; 3474 case FORMAT_R32UI: 3475 return FORMAT_R32UI; 3476 case FORMAT_X16B16G16R16I: 3477 case FORMAT_A16B16G16R16I: 3478 return FORMAT_A16B16G16R16I; 3479 case FORMAT_X16B16G16R16UI: 3480 case FORMAT_A16B16G16R16UI: 3481 return FORMAT_A16B16G16R16UI; 3482 case FORMAT_A2R10G10B10: 3483 case FORMAT_A2B10G10R10: 3484 case FORMAT_A16B16G16R16: 3485 return FORMAT_A16B16G16R16; 3486 case FORMAT_X32B32G32R32I: 3487 case FORMAT_A32B32G32R32I: 3488 return FORMAT_A32B32G32R32I; 3489 case FORMAT_X32B32G32R32UI: 3490 case FORMAT_A32B32G32R32UI: 3491 return FORMAT_A32B32G32R32UI; 3492 case FORMAT_G8R8I: 3493 return FORMAT_G8R8I; 3494 case FORMAT_G8R8UI: 3495 return FORMAT_G8R8UI; 3496 case FORMAT_G8R8I_SNORM: 3497 return FORMAT_G8R8I_SNORM; 3498 case FORMAT_G8R8: 3499 return FORMAT_G8R8; 3500 case FORMAT_G16R16I: 3501 return FORMAT_G16R16I; 3502 case FORMAT_G16R16UI: 3503 return FORMAT_G16R16UI; 3504 case FORMAT_G16R16: 3505 return FORMAT_G16R16; 3506 case FORMAT_G32R32I: 3507 return FORMAT_G32R32I; 3508 case FORMAT_G32R32UI: 3509 return FORMAT_G32R32UI; 3510 case FORMAT_A8R8G8B8: 3511 if(lockable || !quadLayoutEnabled) 3512 { 3513 return FORMAT_A8R8G8B8; 3514 } 3515 else 3516 { 3517 return FORMAT_A8G8R8B8Q; 3518 } 3519 case FORMAT_A8B8G8R8I: 3520 return FORMAT_A8B8G8R8I; 3521 case FORMAT_A8B8G8R8UI: 3522 return FORMAT_A8B8G8R8UI; 3523 case FORMAT_A8B8G8R8I_SNORM: 3524 return FORMAT_A8B8G8R8I_SNORM; 3525 case FORMAT_R5G5B5A1: 3526 case FORMAT_R4G4B4A4: 3527 case FORMAT_A8B8G8R8: 3528 return FORMAT_A8B8G8R8; 3529 case FORMAT_R5G6B5: 3530 return FORMAT_R5G6B5; 3531 case FORMAT_R3G3B2: 3532 case FORMAT_R8G8B8: 3533 case FORMAT_X4R4G4B4: 3534 case FORMAT_X1R5G5B5: 3535 case FORMAT_X8R8G8B8: 3536 if(lockable || !quadLayoutEnabled) 3537 { 3538 return FORMAT_X8R8G8B8; 3539 } 3540 else 3541 { 3542 return FORMAT_X8G8R8B8Q; 3543 } 3544 case FORMAT_X8B8G8R8I: 3545 return FORMAT_X8B8G8R8I; 3546 case FORMAT_X8B8G8R8UI: 3547 return FORMAT_X8B8G8R8UI; 3548 case FORMAT_X8B8G8R8I_SNORM: 3549 return FORMAT_X8B8G8R8I_SNORM; 3550 case FORMAT_B8G8R8: 3551 case FORMAT_X8B8G8R8: 3552 return FORMAT_X8B8G8R8; 3553 case FORMAT_SRGB8_X8: 3554 return FORMAT_SRGB8_X8; 3555 case FORMAT_SRGB8_A8: 3556 return FORMAT_SRGB8_A8; 3557 // Compressed formats 3558 #if S3TC_SUPPORT 3559 case FORMAT_DXT1: 3560 case FORMAT_DXT3: 3561 case FORMAT_DXT5: 3562 #endif 3563 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3564 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3565 case FORMAT_RGBA8_ETC2_EAC: 3566 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3567 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3568 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3569 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3570 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3571 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3572 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3573 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3574 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3575 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3576 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3577 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3578 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3579 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3580 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3581 return FORMAT_A8R8G8B8; 3582 case FORMAT_RGBA_ASTC_4x4_KHR: 3583 case FORMAT_RGBA_ASTC_5x4_KHR: 3584 case FORMAT_RGBA_ASTC_5x5_KHR: 3585 case FORMAT_RGBA_ASTC_6x5_KHR: 3586 case FORMAT_RGBA_ASTC_6x6_KHR: 3587 case FORMAT_RGBA_ASTC_8x5_KHR: 3588 case FORMAT_RGBA_ASTC_8x6_KHR: 3589 case FORMAT_RGBA_ASTC_8x8_KHR: 3590 case FORMAT_RGBA_ASTC_10x5_KHR: 3591 case FORMAT_RGBA_ASTC_10x6_KHR: 3592 case FORMAT_RGBA_ASTC_10x8_KHR: 3593 case FORMAT_RGBA_ASTC_10x10_KHR: 3594 case FORMAT_RGBA_ASTC_12x10_KHR: 3595 case FORMAT_RGBA_ASTC_12x12_KHR: 3596 // ASTC supports HDR, so a floating point format is required to represent it properly 3597 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3598 case FORMAT_ATI1: 3599 case FORMAT_R11_EAC: 3600 return FORMAT_R8; 3601 case FORMAT_SIGNED_R11_EAC: 3602 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3603 case FORMAT_ATI2: 3604 case FORMAT_RG11_EAC: 3605 return FORMAT_G8R8; 3606 case FORMAT_SIGNED_RG11_EAC: 3607 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3608 case FORMAT_ETC1: 3609 case FORMAT_RGB8_ETC2: 3610 case FORMAT_SRGB8_ETC2: 3611 return FORMAT_X8R8G8B8; 3612 // Bumpmap formats 3613 case FORMAT_V8U8: return FORMAT_V8U8; 3614 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3615 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3616 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3617 case FORMAT_V16U16: return FORMAT_V16U16; 3618 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3619 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3620 // Floating-point formats 3621 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3622 case FORMAT_R16F: return FORMAT_R32F; 3623 case FORMAT_G16R16F: return FORMAT_G32R32F; 3624 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3625 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3626 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3627 case FORMAT_R32F: return FORMAT_R32F; 3628 case FORMAT_G32R32F: return FORMAT_G32R32F; 3629 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3630 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3631 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3632 // Luminance formats 3633 case FORMAT_L8: return FORMAT_L8; 3634 case FORMAT_A4L4: return FORMAT_A8L8; 3635 case FORMAT_L16: return FORMAT_L16; 3636 case FORMAT_A8L8: return FORMAT_A8L8; 3637 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3638 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3639 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3640 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3641 // Depth/stencil formats 3642 case FORMAT_D16: 3643 case FORMAT_D32: 3644 case FORMAT_D24X8: 3645 case FORMAT_D24S8: 3646 case FORMAT_D24FS8: 3647 if(hasParent) // Texture 3648 { 3649 return FORMAT_D32FS8_SHADOW; 3650 } 3651 else if(complementaryDepthBuffer) 3652 { 3653 return FORMAT_D32F_COMPLEMENTARY; 3654 } 3655 else 3656 { 3657 return FORMAT_D32F; 3658 } 3659 case FORMAT_D32F: return FORMAT_D32F; 3660 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3661 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3662 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3663 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3664 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3665 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3666 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3667 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3668 default: 3669 ASSERT(false); 3670 } 3671 3672 return FORMAT_NULL; 3673 } 3674 setTexturePalette(unsigned int * palette)3675 void Surface::setTexturePalette(unsigned int *palette) 3676 { 3677 Surface::palette = palette; 3678 Surface::paletteID++; 3679 } 3680 resolve()3681 void Surface::resolve() 3682 { 3683 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3684 { 3685 return; 3686 } 3687 3688 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3689 3690 int quality = internal.depth; 3691 int width = internal.width; 3692 int height = internal.height; 3693 int pitch = internal.pitchB; 3694 int slice = internal.sliceB; 3695 3696 unsigned char *source0 = (unsigned char*)source; 3697 unsigned char *source1 = source0 + slice; 3698 unsigned char *source2 = source1 + slice; 3699 unsigned char *source3 = source2 + slice; 3700 unsigned char *source4 = source3 + slice; 3701 unsigned char *source5 = source4 + slice; 3702 unsigned char *source6 = source5 + slice; 3703 unsigned char *source7 = source6 + slice; 3704 unsigned char *source8 = source7 + slice; 3705 unsigned char *source9 = source8 + slice; 3706 unsigned char *sourceA = source9 + slice; 3707 unsigned char *sourceB = sourceA + slice; 3708 unsigned char *sourceC = sourceB + slice; 3709 unsigned char *sourceD = sourceC + slice; 3710 unsigned char *sourceE = sourceD + slice; 3711 unsigned char *sourceF = sourceE + slice; 3712 3713 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 3714 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 3715 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 3716 { 3717 if(CPUID::supportsSSE2() && (width % 4) == 0) 3718 { 3719 if(internal.depth == 2) 3720 { 3721 for(int y = 0; y < height; y++) 3722 { 3723 for(int x = 0; x < width; x += 4) 3724 { 3725 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3726 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3727 3728 c0 = _mm_avg_epu8(c0, c1); 3729 3730 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3731 } 3732 3733 source0 += pitch; 3734 source1 += pitch; 3735 } 3736 } 3737 else if(internal.depth == 4) 3738 { 3739 for(int y = 0; y < height; y++) 3740 { 3741 for(int x = 0; x < width; x += 4) 3742 { 3743 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3744 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3745 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3746 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3747 3748 c0 = _mm_avg_epu8(c0, c1); 3749 c2 = _mm_avg_epu8(c2, c3); 3750 c0 = _mm_avg_epu8(c0, c2); 3751 3752 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3753 } 3754 3755 source0 += pitch; 3756 source1 += pitch; 3757 source2 += pitch; 3758 source3 += pitch; 3759 } 3760 } 3761 else if(internal.depth == 8) 3762 { 3763 for(int y = 0; y < height; y++) 3764 { 3765 for(int x = 0; x < width; x += 4) 3766 { 3767 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3768 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3769 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3770 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3771 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3772 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3773 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3774 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3775 3776 c0 = _mm_avg_epu8(c0, c1); 3777 c2 = _mm_avg_epu8(c2, c3); 3778 c4 = _mm_avg_epu8(c4, c5); 3779 c6 = _mm_avg_epu8(c6, c7); 3780 c0 = _mm_avg_epu8(c0, c2); 3781 c4 = _mm_avg_epu8(c4, c6); 3782 c0 = _mm_avg_epu8(c0, c4); 3783 3784 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3785 } 3786 3787 source0 += pitch; 3788 source1 += pitch; 3789 source2 += pitch; 3790 source3 += pitch; 3791 source4 += pitch; 3792 source5 += pitch; 3793 source6 += pitch; 3794 source7 += pitch; 3795 } 3796 } 3797 else if(internal.depth == 16) 3798 { 3799 for(int y = 0; y < height; y++) 3800 { 3801 for(int x = 0; x < width; x += 4) 3802 { 3803 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3804 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3805 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3806 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3807 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3808 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3809 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3810 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3811 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3812 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3813 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3814 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3815 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3816 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3817 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3818 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3819 3820 c0 = _mm_avg_epu8(c0, c1); 3821 c2 = _mm_avg_epu8(c2, c3); 3822 c4 = _mm_avg_epu8(c4, c5); 3823 c6 = _mm_avg_epu8(c6, c7); 3824 c8 = _mm_avg_epu8(c8, c9); 3825 cA = _mm_avg_epu8(cA, cB); 3826 cC = _mm_avg_epu8(cC, cD); 3827 cE = _mm_avg_epu8(cE, cF); 3828 c0 = _mm_avg_epu8(c0, c2); 3829 c4 = _mm_avg_epu8(c4, c6); 3830 c8 = _mm_avg_epu8(c8, cA); 3831 cC = _mm_avg_epu8(cC, cE); 3832 c0 = _mm_avg_epu8(c0, c4); 3833 c8 = _mm_avg_epu8(c8, cC); 3834 c0 = _mm_avg_epu8(c0, c8); 3835 3836 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3837 } 3838 3839 source0 += pitch; 3840 source1 += pitch; 3841 source2 += pitch; 3842 source3 += pitch; 3843 source4 += pitch; 3844 source5 += pitch; 3845 source6 += pitch; 3846 source7 += pitch; 3847 source8 += pitch; 3848 source9 += pitch; 3849 sourceA += pitch; 3850 sourceB += pitch; 3851 sourceC += pitch; 3852 sourceD += pitch; 3853 sourceE += pitch; 3854 sourceF += pitch; 3855 } 3856 } 3857 else ASSERT(false); 3858 } 3859 else 3860 { 3861 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3862 3863 if(internal.depth == 2) 3864 { 3865 for(int y = 0; y < height; y++) 3866 { 3867 for(int x = 0; x < width; x++) 3868 { 3869 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3870 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3871 3872 c0 = AVERAGE(c0, c1); 3873 3874 *(unsigned int*)(source0 + 4 * x) = c0; 3875 } 3876 3877 source0 += pitch; 3878 source1 += pitch; 3879 } 3880 } 3881 else if(internal.depth == 4) 3882 { 3883 for(int y = 0; y < height; y++) 3884 { 3885 for(int x = 0; x < width; x++) 3886 { 3887 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3888 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3889 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3890 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3891 3892 c0 = AVERAGE(c0, c1); 3893 c2 = AVERAGE(c2, c3); 3894 c0 = AVERAGE(c0, c2); 3895 3896 *(unsigned int*)(source0 + 4 * x) = c0; 3897 } 3898 3899 source0 += pitch; 3900 source1 += pitch; 3901 source2 += pitch; 3902 source3 += pitch; 3903 } 3904 } 3905 else if(internal.depth == 8) 3906 { 3907 for(int y = 0; y < height; y++) 3908 { 3909 for(int x = 0; x < width; x++) 3910 { 3911 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3912 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3913 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3914 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3915 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3916 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3917 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3918 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3919 3920 c0 = AVERAGE(c0, c1); 3921 c2 = AVERAGE(c2, c3); 3922 c4 = AVERAGE(c4, c5); 3923 c6 = AVERAGE(c6, c7); 3924 c0 = AVERAGE(c0, c2); 3925 c4 = AVERAGE(c4, c6); 3926 c0 = AVERAGE(c0, c4); 3927 3928 *(unsigned int*)(source0 + 4 * x) = c0; 3929 } 3930 3931 source0 += pitch; 3932 source1 += pitch; 3933 source2 += pitch; 3934 source3 += pitch; 3935 source4 += pitch; 3936 source5 += pitch; 3937 source6 += pitch; 3938 source7 += pitch; 3939 } 3940 } 3941 else if(internal.depth == 16) 3942 { 3943 for(int y = 0; y < height; y++) 3944 { 3945 for(int x = 0; x < width; x++) 3946 { 3947 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3948 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3949 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3950 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3951 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3952 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3953 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3954 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3955 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 3956 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 3957 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 3958 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 3959 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 3960 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 3961 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 3962 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 3963 3964 c0 = AVERAGE(c0, c1); 3965 c2 = AVERAGE(c2, c3); 3966 c4 = AVERAGE(c4, c5); 3967 c6 = AVERAGE(c6, c7); 3968 c8 = AVERAGE(c8, c9); 3969 cA = AVERAGE(cA, cB); 3970 cC = AVERAGE(cC, cD); 3971 cE = AVERAGE(cE, cF); 3972 c0 = AVERAGE(c0, c2); 3973 c4 = AVERAGE(c4, c6); 3974 c8 = AVERAGE(c8, cA); 3975 cC = AVERAGE(cC, cE); 3976 c0 = AVERAGE(c0, c4); 3977 c8 = AVERAGE(c8, cC); 3978 c0 = AVERAGE(c0, c8); 3979 3980 *(unsigned int*)(source0 + 4 * x) = c0; 3981 } 3982 3983 source0 += pitch; 3984 source1 += pitch; 3985 source2 += pitch; 3986 source3 += pitch; 3987 source4 += pitch; 3988 source5 += pitch; 3989 source6 += pitch; 3990 source7 += pitch; 3991 source8 += pitch; 3992 source9 += pitch; 3993 sourceA += pitch; 3994 sourceB += pitch; 3995 sourceC += pitch; 3996 sourceD += pitch; 3997 sourceE += pitch; 3998 sourceF += pitch; 3999 } 4000 } 4001 else ASSERT(false); 4002 4003 #undef AVERAGE 4004 } 4005 } 4006 else if(internal.format == FORMAT_G16R16) 4007 { 4008 if(CPUID::supportsSSE2() && (width % 4) == 0) 4009 { 4010 if(internal.depth == 2) 4011 { 4012 for(int y = 0; y < height; y++) 4013 { 4014 for(int x = 0; x < width; x += 4) 4015 { 4016 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4017 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4018 4019 c0 = _mm_avg_epu16(c0, c1); 4020 4021 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4022 } 4023 4024 source0 += pitch; 4025 source1 += pitch; 4026 } 4027 } 4028 else if(internal.depth == 4) 4029 { 4030 for(int y = 0; y < height; y++) 4031 { 4032 for(int x = 0; x < width; x += 4) 4033 { 4034 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4035 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4036 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4037 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4038 4039 c0 = _mm_avg_epu16(c0, c1); 4040 c2 = _mm_avg_epu16(c2, c3); 4041 c0 = _mm_avg_epu16(c0, c2); 4042 4043 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4044 } 4045 4046 source0 += pitch; 4047 source1 += pitch; 4048 source2 += pitch; 4049 source3 += pitch; 4050 } 4051 } 4052 else if(internal.depth == 8) 4053 { 4054 for(int y = 0; y < height; y++) 4055 { 4056 for(int x = 0; x < width; x += 4) 4057 { 4058 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4059 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4060 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4061 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4062 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4063 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4064 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4065 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4066 4067 c0 = _mm_avg_epu16(c0, c1); 4068 c2 = _mm_avg_epu16(c2, c3); 4069 c4 = _mm_avg_epu16(c4, c5); 4070 c6 = _mm_avg_epu16(c6, c7); 4071 c0 = _mm_avg_epu16(c0, c2); 4072 c4 = _mm_avg_epu16(c4, c6); 4073 c0 = _mm_avg_epu16(c0, c4); 4074 4075 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4076 } 4077 4078 source0 += pitch; 4079 source1 += pitch; 4080 source2 += pitch; 4081 source3 += pitch; 4082 source4 += pitch; 4083 source5 += pitch; 4084 source6 += pitch; 4085 source7 += pitch; 4086 } 4087 } 4088 else if(internal.depth == 16) 4089 { 4090 for(int y = 0; y < height; y++) 4091 { 4092 for(int x = 0; x < width; x += 4) 4093 { 4094 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4095 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4096 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4097 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4098 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4099 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4100 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4101 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4102 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4103 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4104 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4105 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4106 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4107 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4108 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4109 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4110 4111 c0 = _mm_avg_epu16(c0, c1); 4112 c2 = _mm_avg_epu16(c2, c3); 4113 c4 = _mm_avg_epu16(c4, c5); 4114 c6 = _mm_avg_epu16(c6, c7); 4115 c8 = _mm_avg_epu16(c8, c9); 4116 cA = _mm_avg_epu16(cA, cB); 4117 cC = _mm_avg_epu16(cC, cD); 4118 cE = _mm_avg_epu16(cE, cF); 4119 c0 = _mm_avg_epu16(c0, c2); 4120 c4 = _mm_avg_epu16(c4, c6); 4121 c8 = _mm_avg_epu16(c8, cA); 4122 cC = _mm_avg_epu16(cC, cE); 4123 c0 = _mm_avg_epu16(c0, c4); 4124 c8 = _mm_avg_epu16(c8, cC); 4125 c0 = _mm_avg_epu16(c0, c8); 4126 4127 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4128 } 4129 4130 source0 += pitch; 4131 source1 += pitch; 4132 source2 += pitch; 4133 source3 += pitch; 4134 source4 += pitch; 4135 source5 += pitch; 4136 source6 += pitch; 4137 source7 += pitch; 4138 source8 += pitch; 4139 source9 += pitch; 4140 sourceA += pitch; 4141 sourceB += pitch; 4142 sourceC += pitch; 4143 sourceD += pitch; 4144 sourceE += pitch; 4145 sourceF += pitch; 4146 } 4147 } 4148 else ASSERT(false); 4149 } 4150 else 4151 { 4152 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4153 4154 if(internal.depth == 2) 4155 { 4156 for(int y = 0; y < height; y++) 4157 { 4158 for(int x = 0; x < width; x++) 4159 { 4160 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4161 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4162 4163 c0 = AVERAGE(c0, c1); 4164 4165 *(unsigned int*)(source0 + 4 * x) = c0; 4166 } 4167 4168 source0 += pitch; 4169 source1 += pitch; 4170 } 4171 } 4172 else if(internal.depth == 4) 4173 { 4174 for(int y = 0; y < height; y++) 4175 { 4176 for(int x = 0; x < width; x++) 4177 { 4178 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4179 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4180 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4181 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4182 4183 c0 = AVERAGE(c0, c1); 4184 c2 = AVERAGE(c2, c3); 4185 c0 = AVERAGE(c0, c2); 4186 4187 *(unsigned int*)(source0 + 4 * x) = c0; 4188 } 4189 4190 source0 += pitch; 4191 source1 += pitch; 4192 source2 += pitch; 4193 source3 += pitch; 4194 } 4195 } 4196 else if(internal.depth == 8) 4197 { 4198 for(int y = 0; y < height; y++) 4199 { 4200 for(int x = 0; x < width; x++) 4201 { 4202 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4203 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4204 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4205 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4206 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4207 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4208 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4209 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4210 4211 c0 = AVERAGE(c0, c1); 4212 c2 = AVERAGE(c2, c3); 4213 c4 = AVERAGE(c4, c5); 4214 c6 = AVERAGE(c6, c7); 4215 c0 = AVERAGE(c0, c2); 4216 c4 = AVERAGE(c4, c6); 4217 c0 = AVERAGE(c0, c4); 4218 4219 *(unsigned int*)(source0 + 4 * x) = c0; 4220 } 4221 4222 source0 += pitch; 4223 source1 += pitch; 4224 source2 += pitch; 4225 source3 += pitch; 4226 source4 += pitch; 4227 source5 += pitch; 4228 source6 += pitch; 4229 source7 += pitch; 4230 } 4231 } 4232 else if(internal.depth == 16) 4233 { 4234 for(int y = 0; y < height; y++) 4235 { 4236 for(int x = 0; x < width; x++) 4237 { 4238 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4239 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4240 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4241 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4242 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4243 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4244 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4245 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4246 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4247 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4248 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4249 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4250 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4251 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4252 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4253 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4254 4255 c0 = AVERAGE(c0, c1); 4256 c2 = AVERAGE(c2, c3); 4257 c4 = AVERAGE(c4, c5); 4258 c6 = AVERAGE(c6, c7); 4259 c8 = AVERAGE(c8, c9); 4260 cA = AVERAGE(cA, cB); 4261 cC = AVERAGE(cC, cD); 4262 cE = AVERAGE(cE, cF); 4263 c0 = AVERAGE(c0, c2); 4264 c4 = AVERAGE(c4, c6); 4265 c8 = AVERAGE(c8, cA); 4266 cC = AVERAGE(cC, cE); 4267 c0 = AVERAGE(c0, c4); 4268 c8 = AVERAGE(c8, cC); 4269 c0 = AVERAGE(c0, c8); 4270 4271 *(unsigned int*)(source0 + 4 * x) = c0; 4272 } 4273 4274 source0 += pitch; 4275 source1 += pitch; 4276 source2 += pitch; 4277 source3 += pitch; 4278 source4 += pitch; 4279 source5 += pitch; 4280 source6 += pitch; 4281 source7 += pitch; 4282 source8 += pitch; 4283 source9 += pitch; 4284 sourceA += pitch; 4285 sourceB += pitch; 4286 sourceC += pitch; 4287 sourceD += pitch; 4288 sourceE += pitch; 4289 sourceF += pitch; 4290 } 4291 } 4292 else ASSERT(false); 4293 4294 #undef AVERAGE 4295 } 4296 } 4297 else if(internal.format == FORMAT_A16B16G16R16) 4298 { 4299 if(CPUID::supportsSSE2() && (width % 2) == 0) 4300 { 4301 if(internal.depth == 2) 4302 { 4303 for(int y = 0; y < height; y++) 4304 { 4305 for(int x = 0; x < width; x += 2) 4306 { 4307 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4308 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4309 4310 c0 = _mm_avg_epu16(c0, c1); 4311 4312 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4313 } 4314 4315 source0 += pitch; 4316 source1 += pitch; 4317 } 4318 } 4319 else if(internal.depth == 4) 4320 { 4321 for(int y = 0; y < height; y++) 4322 { 4323 for(int x = 0; x < width; x += 2) 4324 { 4325 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4326 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4327 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4328 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4329 4330 c0 = _mm_avg_epu16(c0, c1); 4331 c2 = _mm_avg_epu16(c2, c3); 4332 c0 = _mm_avg_epu16(c0, c2); 4333 4334 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4335 } 4336 4337 source0 += pitch; 4338 source1 += pitch; 4339 source2 += pitch; 4340 source3 += pitch; 4341 } 4342 } 4343 else if(internal.depth == 8) 4344 { 4345 for(int y = 0; y < height; y++) 4346 { 4347 for(int x = 0; x < width; x += 2) 4348 { 4349 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4350 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4351 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4352 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4353 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4354 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4355 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4356 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4357 4358 c0 = _mm_avg_epu16(c0, c1); 4359 c2 = _mm_avg_epu16(c2, c3); 4360 c4 = _mm_avg_epu16(c4, c5); 4361 c6 = _mm_avg_epu16(c6, c7); 4362 c0 = _mm_avg_epu16(c0, c2); 4363 c4 = _mm_avg_epu16(c4, c6); 4364 c0 = _mm_avg_epu16(c0, c4); 4365 4366 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4367 } 4368 4369 source0 += pitch; 4370 source1 += pitch; 4371 source2 += pitch; 4372 source3 += pitch; 4373 source4 += pitch; 4374 source5 += pitch; 4375 source6 += pitch; 4376 source7 += pitch; 4377 } 4378 } 4379 else if(internal.depth == 16) 4380 { 4381 for(int y = 0; y < height; y++) 4382 { 4383 for(int x = 0; x < width; x += 2) 4384 { 4385 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4386 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4387 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4388 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4389 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4390 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4391 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4392 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4393 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4394 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4395 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4396 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4397 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4398 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4399 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4400 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4401 4402 c0 = _mm_avg_epu16(c0, c1); 4403 c2 = _mm_avg_epu16(c2, c3); 4404 c4 = _mm_avg_epu16(c4, c5); 4405 c6 = _mm_avg_epu16(c6, c7); 4406 c8 = _mm_avg_epu16(c8, c9); 4407 cA = _mm_avg_epu16(cA, cB); 4408 cC = _mm_avg_epu16(cC, cD); 4409 cE = _mm_avg_epu16(cE, cF); 4410 c0 = _mm_avg_epu16(c0, c2); 4411 c4 = _mm_avg_epu16(c4, c6); 4412 c8 = _mm_avg_epu16(c8, cA); 4413 cC = _mm_avg_epu16(cC, cE); 4414 c0 = _mm_avg_epu16(c0, c4); 4415 c8 = _mm_avg_epu16(c8, cC); 4416 c0 = _mm_avg_epu16(c0, c8); 4417 4418 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4419 } 4420 4421 source0 += pitch; 4422 source1 += pitch; 4423 source2 += pitch; 4424 source3 += pitch; 4425 source4 += pitch; 4426 source5 += pitch; 4427 source6 += pitch; 4428 source7 += pitch; 4429 source8 += pitch; 4430 source9 += pitch; 4431 sourceA += pitch; 4432 sourceB += pitch; 4433 sourceC += pitch; 4434 sourceD += pitch; 4435 sourceE += pitch; 4436 sourceF += pitch; 4437 } 4438 } 4439 else ASSERT(false); 4440 } 4441 else 4442 { 4443 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4444 4445 if(internal.depth == 2) 4446 { 4447 for(int y = 0; y < height; y++) 4448 { 4449 for(int x = 0; x < 2 * width; x++) 4450 { 4451 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4452 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4453 4454 c0 = AVERAGE(c0, c1); 4455 4456 *(unsigned int*)(source0 + 4 * x) = c0; 4457 } 4458 4459 source0 += pitch; 4460 source1 += pitch; 4461 } 4462 } 4463 else if(internal.depth == 4) 4464 { 4465 for(int y = 0; y < height; y++) 4466 { 4467 for(int x = 0; x < 2 * width; x++) 4468 { 4469 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4470 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4471 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4472 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4473 4474 c0 = AVERAGE(c0, c1); 4475 c2 = AVERAGE(c2, c3); 4476 c0 = AVERAGE(c0, c2); 4477 4478 *(unsigned int*)(source0 + 4 * x) = c0; 4479 } 4480 4481 source0 += pitch; 4482 source1 += pitch; 4483 source2 += pitch; 4484 source3 += pitch; 4485 } 4486 } 4487 else if(internal.depth == 8) 4488 { 4489 for(int y = 0; y < height; y++) 4490 { 4491 for(int x = 0; x < 2 * width; x++) 4492 { 4493 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4494 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4495 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4496 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4497 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4498 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4499 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4500 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4501 4502 c0 = AVERAGE(c0, c1); 4503 c2 = AVERAGE(c2, c3); 4504 c4 = AVERAGE(c4, c5); 4505 c6 = AVERAGE(c6, c7); 4506 c0 = AVERAGE(c0, c2); 4507 c4 = AVERAGE(c4, c6); 4508 c0 = AVERAGE(c0, c4); 4509 4510 *(unsigned int*)(source0 + 4 * x) = c0; 4511 } 4512 4513 source0 += pitch; 4514 source1 += pitch; 4515 source2 += pitch; 4516 source3 += pitch; 4517 source4 += pitch; 4518 source5 += pitch; 4519 source6 += pitch; 4520 source7 += pitch; 4521 } 4522 } 4523 else if(internal.depth == 16) 4524 { 4525 for(int y = 0; y < height; y++) 4526 { 4527 for(int x = 0; x < 2 * width; x++) 4528 { 4529 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4530 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4531 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4532 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4533 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4534 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4535 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4536 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4537 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4538 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4539 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4540 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4541 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4542 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4543 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4544 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4545 4546 c0 = AVERAGE(c0, c1); 4547 c2 = AVERAGE(c2, c3); 4548 c4 = AVERAGE(c4, c5); 4549 c6 = AVERAGE(c6, c7); 4550 c8 = AVERAGE(c8, c9); 4551 cA = AVERAGE(cA, cB); 4552 cC = AVERAGE(cC, cD); 4553 cE = AVERAGE(cE, cF); 4554 c0 = AVERAGE(c0, c2); 4555 c4 = AVERAGE(c4, c6); 4556 c8 = AVERAGE(c8, cA); 4557 cC = AVERAGE(cC, cE); 4558 c0 = AVERAGE(c0, c4); 4559 c8 = AVERAGE(c8, cC); 4560 c0 = AVERAGE(c0, c8); 4561 4562 *(unsigned int*)(source0 + 4 * x) = c0; 4563 } 4564 4565 source0 += pitch; 4566 source1 += pitch; 4567 source2 += pitch; 4568 source3 += pitch; 4569 source4 += pitch; 4570 source5 += pitch; 4571 source6 += pitch; 4572 source7 += pitch; 4573 source8 += pitch; 4574 source9 += pitch; 4575 sourceA += pitch; 4576 sourceB += pitch; 4577 sourceC += pitch; 4578 sourceD += pitch; 4579 sourceE += pitch; 4580 sourceF += pitch; 4581 } 4582 } 4583 else ASSERT(false); 4584 4585 #undef AVERAGE 4586 } 4587 } 4588 else if(internal.format == FORMAT_R32F) 4589 { 4590 if(CPUID::supportsSSE() && (width % 4) == 0) 4591 { 4592 if(internal.depth == 2) 4593 { 4594 for(int y = 0; y < height; y++) 4595 { 4596 for(int x = 0; x < width; x += 4) 4597 { 4598 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4599 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4600 4601 c0 = _mm_add_ps(c0, c1); 4602 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4603 4604 _mm_store_ps((float*)(source0 + 4 * x), c0); 4605 } 4606 4607 source0 += pitch; 4608 source1 += pitch; 4609 } 4610 } 4611 else if(internal.depth == 4) 4612 { 4613 for(int y = 0; y < height; y++) 4614 { 4615 for(int x = 0; x < width; x += 4) 4616 { 4617 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4618 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4619 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4620 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4621 4622 c0 = _mm_add_ps(c0, c1); 4623 c2 = _mm_add_ps(c2, c3); 4624 c0 = _mm_add_ps(c0, c2); 4625 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4626 4627 _mm_store_ps((float*)(source0 + 4 * x), c0); 4628 } 4629 4630 source0 += pitch; 4631 source1 += pitch; 4632 source2 += pitch; 4633 source3 += pitch; 4634 } 4635 } 4636 else if(internal.depth == 8) 4637 { 4638 for(int y = 0; y < height; y++) 4639 { 4640 for(int x = 0; x < width; x += 4) 4641 { 4642 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4643 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4644 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4645 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4646 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4647 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4648 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4649 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4650 4651 c0 = _mm_add_ps(c0, c1); 4652 c2 = _mm_add_ps(c2, c3); 4653 c4 = _mm_add_ps(c4, c5); 4654 c6 = _mm_add_ps(c6, c7); 4655 c0 = _mm_add_ps(c0, c2); 4656 c4 = _mm_add_ps(c4, c6); 4657 c0 = _mm_add_ps(c0, c4); 4658 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4659 4660 _mm_store_ps((float*)(source0 + 4 * x), c0); 4661 } 4662 4663 source0 += pitch; 4664 source1 += pitch; 4665 source2 += pitch; 4666 source3 += pitch; 4667 source4 += pitch; 4668 source5 += pitch; 4669 source6 += pitch; 4670 source7 += pitch; 4671 } 4672 } 4673 else if(internal.depth == 16) 4674 { 4675 for(int y = 0; y < height; y++) 4676 { 4677 for(int x = 0; x < width; x += 4) 4678 { 4679 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4680 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4681 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4682 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4683 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4684 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4685 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4686 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4687 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4688 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4689 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4690 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4691 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4692 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4693 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4694 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4695 4696 c0 = _mm_add_ps(c0, c1); 4697 c2 = _mm_add_ps(c2, c3); 4698 c4 = _mm_add_ps(c4, c5); 4699 c6 = _mm_add_ps(c6, c7); 4700 c8 = _mm_add_ps(c8, c9); 4701 cA = _mm_add_ps(cA, cB); 4702 cC = _mm_add_ps(cC, cD); 4703 cE = _mm_add_ps(cE, cF); 4704 c0 = _mm_add_ps(c0, c2); 4705 c4 = _mm_add_ps(c4, c6); 4706 c8 = _mm_add_ps(c8, cA); 4707 cC = _mm_add_ps(cC, cE); 4708 c0 = _mm_add_ps(c0, c4); 4709 c8 = _mm_add_ps(c8, cC); 4710 c0 = _mm_add_ps(c0, c8); 4711 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4712 4713 _mm_store_ps((float*)(source0 + 4 * x), c0); 4714 } 4715 4716 source0 += pitch; 4717 source1 += pitch; 4718 source2 += pitch; 4719 source3 += pitch; 4720 source4 += pitch; 4721 source5 += pitch; 4722 source6 += pitch; 4723 source7 += pitch; 4724 source8 += pitch; 4725 source9 += pitch; 4726 sourceA += pitch; 4727 sourceB += pitch; 4728 sourceC += pitch; 4729 sourceD += pitch; 4730 sourceE += pitch; 4731 sourceF += pitch; 4732 } 4733 } 4734 else ASSERT(false); 4735 } 4736 else 4737 { 4738 if(internal.depth == 2) 4739 { 4740 for(int y = 0; y < height; y++) 4741 { 4742 for(int x = 0; x < width; x++) 4743 { 4744 float c0 = *(float*)(source0 + 4 * x); 4745 float c1 = *(float*)(source1 + 4 * x); 4746 4747 c0 = c0 + c1; 4748 c0 *= 1.0f / 2.0f; 4749 4750 *(float*)(source0 + 4 * x) = c0; 4751 } 4752 4753 source0 += pitch; 4754 source1 += pitch; 4755 } 4756 } 4757 else if(internal.depth == 4) 4758 { 4759 for(int y = 0; y < height; y++) 4760 { 4761 for(int x = 0; x < width; x++) 4762 { 4763 float c0 = *(float*)(source0 + 4 * x); 4764 float c1 = *(float*)(source1 + 4 * x); 4765 float c2 = *(float*)(source2 + 4 * x); 4766 float c3 = *(float*)(source3 + 4 * x); 4767 4768 c0 = c0 + c1; 4769 c2 = c2 + c3; 4770 c0 = c0 + c2; 4771 c0 *= 1.0f / 4.0f; 4772 4773 *(float*)(source0 + 4 * x) = c0; 4774 } 4775 4776 source0 += pitch; 4777 source1 += pitch; 4778 source2 += pitch; 4779 source3 += pitch; 4780 } 4781 } 4782 else if(internal.depth == 8) 4783 { 4784 for(int y = 0; y < height; y++) 4785 { 4786 for(int x = 0; x < width; x++) 4787 { 4788 float c0 = *(float*)(source0 + 4 * x); 4789 float c1 = *(float*)(source1 + 4 * x); 4790 float c2 = *(float*)(source2 + 4 * x); 4791 float c3 = *(float*)(source3 + 4 * x); 4792 float c4 = *(float*)(source4 + 4 * x); 4793 float c5 = *(float*)(source5 + 4 * x); 4794 float c6 = *(float*)(source6 + 4 * x); 4795 float c7 = *(float*)(source7 + 4 * x); 4796 4797 c0 = c0 + c1; 4798 c2 = c2 + c3; 4799 c4 = c4 + c5; 4800 c6 = c6 + c7; 4801 c0 = c0 + c2; 4802 c4 = c4 + c6; 4803 c0 = c0 + c4; 4804 c0 *= 1.0f / 8.0f; 4805 4806 *(float*)(source0 + 4 * x) = c0; 4807 } 4808 4809 source0 += pitch; 4810 source1 += pitch; 4811 source2 += pitch; 4812 source3 += pitch; 4813 source4 += pitch; 4814 source5 += pitch; 4815 source6 += pitch; 4816 source7 += pitch; 4817 } 4818 } 4819 else if(internal.depth == 16) 4820 { 4821 for(int y = 0; y < height; y++) 4822 { 4823 for(int x = 0; x < width; x++) 4824 { 4825 float c0 = *(float*)(source0 + 4 * x); 4826 float c1 = *(float*)(source1 + 4 * x); 4827 float c2 = *(float*)(source2 + 4 * x); 4828 float c3 = *(float*)(source3 + 4 * x); 4829 float c4 = *(float*)(source4 + 4 * x); 4830 float c5 = *(float*)(source5 + 4 * x); 4831 float c6 = *(float*)(source6 + 4 * x); 4832 float c7 = *(float*)(source7 + 4 * x); 4833 float c8 = *(float*)(source8 + 4 * x); 4834 float c9 = *(float*)(source9 + 4 * x); 4835 float cA = *(float*)(sourceA + 4 * x); 4836 float cB = *(float*)(sourceB + 4 * x); 4837 float cC = *(float*)(sourceC + 4 * x); 4838 float cD = *(float*)(sourceD + 4 * x); 4839 float cE = *(float*)(sourceE + 4 * x); 4840 float cF = *(float*)(sourceF + 4 * x); 4841 4842 c0 = c0 + c1; 4843 c2 = c2 + c3; 4844 c4 = c4 + c5; 4845 c6 = c6 + c7; 4846 c8 = c8 + c9; 4847 cA = cA + cB; 4848 cC = cC + cD; 4849 cE = cE + cF; 4850 c0 = c0 + c2; 4851 c4 = c4 + c6; 4852 c8 = c8 + cA; 4853 cC = cC + cE; 4854 c0 = c0 + c4; 4855 c8 = c8 + cC; 4856 c0 = c0 + c8; 4857 c0 *= 1.0f / 16.0f; 4858 4859 *(float*)(source0 + 4 * x) = c0; 4860 } 4861 4862 source0 += pitch; 4863 source1 += pitch; 4864 source2 += pitch; 4865 source3 += pitch; 4866 source4 += pitch; 4867 source5 += pitch; 4868 source6 += pitch; 4869 source7 += pitch; 4870 source8 += pitch; 4871 source9 += pitch; 4872 sourceA += pitch; 4873 sourceB += pitch; 4874 sourceC += pitch; 4875 sourceD += pitch; 4876 sourceE += pitch; 4877 sourceF += pitch; 4878 } 4879 } 4880 else ASSERT(false); 4881 } 4882 } 4883 else if(internal.format == FORMAT_G32R32F) 4884 { 4885 if(CPUID::supportsSSE() && (width % 2) == 0) 4886 { 4887 if(internal.depth == 2) 4888 { 4889 for(int y = 0; y < height; y++) 4890 { 4891 for(int x = 0; x < width; x += 2) 4892 { 4893 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4894 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4895 4896 c0 = _mm_add_ps(c0, c1); 4897 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4898 4899 _mm_store_ps((float*)(source0 + 8 * x), c0); 4900 } 4901 4902 source0 += pitch; 4903 source1 += pitch; 4904 } 4905 } 4906 else if(internal.depth == 4) 4907 { 4908 for(int y = 0; y < height; y++) 4909 { 4910 for(int x = 0; x < width; x += 2) 4911 { 4912 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4913 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4914 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4915 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4916 4917 c0 = _mm_add_ps(c0, c1); 4918 c2 = _mm_add_ps(c2, c3); 4919 c0 = _mm_add_ps(c0, c2); 4920 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4921 4922 _mm_store_ps((float*)(source0 + 8 * x), c0); 4923 } 4924 4925 source0 += pitch; 4926 source1 += pitch; 4927 source2 += pitch; 4928 source3 += pitch; 4929 } 4930 } 4931 else if(internal.depth == 8) 4932 { 4933 for(int y = 0; y < height; y++) 4934 { 4935 for(int x = 0; x < width; x += 2) 4936 { 4937 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4938 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4939 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4940 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4941 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4942 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4943 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4944 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4945 4946 c0 = _mm_add_ps(c0, c1); 4947 c2 = _mm_add_ps(c2, c3); 4948 c4 = _mm_add_ps(c4, c5); 4949 c6 = _mm_add_ps(c6, c7); 4950 c0 = _mm_add_ps(c0, c2); 4951 c4 = _mm_add_ps(c4, c6); 4952 c0 = _mm_add_ps(c0, c4); 4953 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4954 4955 _mm_store_ps((float*)(source0 + 8 * x), c0); 4956 } 4957 4958 source0 += pitch; 4959 source1 += pitch; 4960 source2 += pitch; 4961 source3 += pitch; 4962 source4 += pitch; 4963 source5 += pitch; 4964 source6 += pitch; 4965 source7 += pitch; 4966 } 4967 } 4968 else if(internal.depth == 16) 4969 { 4970 for(int y = 0; y < height; y++) 4971 { 4972 for(int x = 0; x < width; x += 2) 4973 { 4974 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4975 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4976 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4977 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4978 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4979 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4980 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4981 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4982 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 4983 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 4984 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 4985 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 4986 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 4987 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 4988 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 4989 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 4990 4991 c0 = _mm_add_ps(c0, c1); 4992 c2 = _mm_add_ps(c2, c3); 4993 c4 = _mm_add_ps(c4, c5); 4994 c6 = _mm_add_ps(c6, c7); 4995 c8 = _mm_add_ps(c8, c9); 4996 cA = _mm_add_ps(cA, cB); 4997 cC = _mm_add_ps(cC, cD); 4998 cE = _mm_add_ps(cE, cF); 4999 c0 = _mm_add_ps(c0, c2); 5000 c4 = _mm_add_ps(c4, c6); 5001 c8 = _mm_add_ps(c8, cA); 5002 cC = _mm_add_ps(cC, cE); 5003 c0 = _mm_add_ps(c0, c4); 5004 c8 = _mm_add_ps(c8, cC); 5005 c0 = _mm_add_ps(c0, c8); 5006 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5007 5008 _mm_store_ps((float*)(source0 + 8 * x), c0); 5009 } 5010 5011 source0 += pitch; 5012 source1 += pitch; 5013 source2 += pitch; 5014 source3 += pitch; 5015 source4 += pitch; 5016 source5 += pitch; 5017 source6 += pitch; 5018 source7 += pitch; 5019 source8 += pitch; 5020 source9 += pitch; 5021 sourceA += pitch; 5022 sourceB += pitch; 5023 sourceC += pitch; 5024 sourceD += pitch; 5025 sourceE += pitch; 5026 sourceF += pitch; 5027 } 5028 } 5029 else ASSERT(false); 5030 } 5031 else 5032 { 5033 if(internal.depth == 2) 5034 { 5035 for(int y = 0; y < height; y++) 5036 { 5037 for(int x = 0; x < 2 * width; x++) 5038 { 5039 float c0 = *(float*)(source0 + 4 * x); 5040 float c1 = *(float*)(source1 + 4 * x); 5041 5042 c0 = c0 + c1; 5043 c0 *= 1.0f / 2.0f; 5044 5045 *(float*)(source0 + 4 * x) = c0; 5046 } 5047 5048 source0 += pitch; 5049 source1 += pitch; 5050 } 5051 } 5052 else if(internal.depth == 4) 5053 { 5054 for(int y = 0; y < height; y++) 5055 { 5056 for(int x = 0; x < 2 * width; x++) 5057 { 5058 float c0 = *(float*)(source0 + 4 * x); 5059 float c1 = *(float*)(source1 + 4 * x); 5060 float c2 = *(float*)(source2 + 4 * x); 5061 float c3 = *(float*)(source3 + 4 * x); 5062 5063 c0 = c0 + c1; 5064 c2 = c2 + c3; 5065 c0 = c0 + c2; 5066 c0 *= 1.0f / 4.0f; 5067 5068 *(float*)(source0 + 4 * x) = c0; 5069 } 5070 5071 source0 += pitch; 5072 source1 += pitch; 5073 source2 += pitch; 5074 source3 += pitch; 5075 } 5076 } 5077 else if(internal.depth == 8) 5078 { 5079 for(int y = 0; y < height; y++) 5080 { 5081 for(int x = 0; x < 2 * width; x++) 5082 { 5083 float c0 = *(float*)(source0 + 4 * x); 5084 float c1 = *(float*)(source1 + 4 * x); 5085 float c2 = *(float*)(source2 + 4 * x); 5086 float c3 = *(float*)(source3 + 4 * x); 5087 float c4 = *(float*)(source4 + 4 * x); 5088 float c5 = *(float*)(source5 + 4 * x); 5089 float c6 = *(float*)(source6 + 4 * x); 5090 float c7 = *(float*)(source7 + 4 * x); 5091 5092 c0 = c0 + c1; 5093 c2 = c2 + c3; 5094 c4 = c4 + c5; 5095 c6 = c6 + c7; 5096 c0 = c0 + c2; 5097 c4 = c4 + c6; 5098 c0 = c0 + c4; 5099 c0 *= 1.0f / 8.0f; 5100 5101 *(float*)(source0 + 4 * x) = c0; 5102 } 5103 5104 source0 += pitch; 5105 source1 += pitch; 5106 source2 += pitch; 5107 source3 += pitch; 5108 source4 += pitch; 5109 source5 += pitch; 5110 source6 += pitch; 5111 source7 += pitch; 5112 } 5113 } 5114 else if(internal.depth == 16) 5115 { 5116 for(int y = 0; y < height; y++) 5117 { 5118 for(int x = 0; x < 2 * width; x++) 5119 { 5120 float c0 = *(float*)(source0 + 4 * x); 5121 float c1 = *(float*)(source1 + 4 * x); 5122 float c2 = *(float*)(source2 + 4 * x); 5123 float c3 = *(float*)(source3 + 4 * x); 5124 float c4 = *(float*)(source4 + 4 * x); 5125 float c5 = *(float*)(source5 + 4 * x); 5126 float c6 = *(float*)(source6 + 4 * x); 5127 float c7 = *(float*)(source7 + 4 * x); 5128 float c8 = *(float*)(source8 + 4 * x); 5129 float c9 = *(float*)(source9 + 4 * x); 5130 float cA = *(float*)(sourceA + 4 * x); 5131 float cB = *(float*)(sourceB + 4 * x); 5132 float cC = *(float*)(sourceC + 4 * x); 5133 float cD = *(float*)(sourceD + 4 * x); 5134 float cE = *(float*)(sourceE + 4 * x); 5135 float cF = *(float*)(sourceF + 4 * x); 5136 5137 c0 = c0 + c1; 5138 c2 = c2 + c3; 5139 c4 = c4 + c5; 5140 c6 = c6 + c7; 5141 c8 = c8 + c9; 5142 cA = cA + cB; 5143 cC = cC + cD; 5144 cE = cE + cF; 5145 c0 = c0 + c2; 5146 c4 = c4 + c6; 5147 c8 = c8 + cA; 5148 cC = cC + cE; 5149 c0 = c0 + c4; 5150 c8 = c8 + cC; 5151 c0 = c0 + c8; 5152 c0 *= 1.0f / 16.0f; 5153 5154 *(float*)(source0 + 4 * x) = c0; 5155 } 5156 5157 source0 += pitch; 5158 source1 += pitch; 5159 source2 += pitch; 5160 source3 += pitch; 5161 source4 += pitch; 5162 source5 += pitch; 5163 source6 += pitch; 5164 source7 += pitch; 5165 source8 += pitch; 5166 source9 += pitch; 5167 sourceA += pitch; 5168 sourceB += pitch; 5169 sourceC += pitch; 5170 sourceD += pitch; 5171 sourceE += pitch; 5172 sourceF += pitch; 5173 } 5174 } 5175 else ASSERT(false); 5176 } 5177 } 5178 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F) 5179 { 5180 if(CPUID::supportsSSE()) 5181 { 5182 if(internal.depth == 2) 5183 { 5184 for(int y = 0; y < height; y++) 5185 { 5186 for(int x = 0; x < width; x++) 5187 { 5188 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5189 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5190 5191 c0 = _mm_add_ps(c0, c1); 5192 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5193 5194 _mm_store_ps((float*)(source0 + 16 * x), c0); 5195 } 5196 5197 source0 += pitch; 5198 source1 += pitch; 5199 } 5200 } 5201 else if(internal.depth == 4) 5202 { 5203 for(int y = 0; y < height; y++) 5204 { 5205 for(int x = 0; x < width; x++) 5206 { 5207 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5208 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5209 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5210 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5211 5212 c0 = _mm_add_ps(c0, c1); 5213 c2 = _mm_add_ps(c2, c3); 5214 c0 = _mm_add_ps(c0, c2); 5215 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5216 5217 _mm_store_ps((float*)(source0 + 16 * x), c0); 5218 } 5219 5220 source0 += pitch; 5221 source1 += pitch; 5222 source2 += pitch; 5223 source3 += pitch; 5224 } 5225 } 5226 else if(internal.depth == 8) 5227 { 5228 for(int y = 0; y < height; y++) 5229 { 5230 for(int x = 0; x < width; x++) 5231 { 5232 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5233 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5234 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5235 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5236 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5237 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5238 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5239 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5240 5241 c0 = _mm_add_ps(c0, c1); 5242 c2 = _mm_add_ps(c2, c3); 5243 c4 = _mm_add_ps(c4, c5); 5244 c6 = _mm_add_ps(c6, c7); 5245 c0 = _mm_add_ps(c0, c2); 5246 c4 = _mm_add_ps(c4, c6); 5247 c0 = _mm_add_ps(c0, c4); 5248 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5249 5250 _mm_store_ps((float*)(source0 + 16 * x), c0); 5251 } 5252 5253 source0 += pitch; 5254 source1 += pitch; 5255 source2 += pitch; 5256 source3 += pitch; 5257 source4 += pitch; 5258 source5 += pitch; 5259 source6 += pitch; 5260 source7 += pitch; 5261 } 5262 } 5263 else if(internal.depth == 16) 5264 { 5265 for(int y = 0; y < height; y++) 5266 { 5267 for(int x = 0; x < width; x++) 5268 { 5269 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5270 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5271 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5272 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5273 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5274 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5275 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5276 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5277 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5278 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5279 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5280 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5281 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5282 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5283 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5284 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5285 5286 c0 = _mm_add_ps(c0, c1); 5287 c2 = _mm_add_ps(c2, c3); 5288 c4 = _mm_add_ps(c4, c5); 5289 c6 = _mm_add_ps(c6, c7); 5290 c8 = _mm_add_ps(c8, c9); 5291 cA = _mm_add_ps(cA, cB); 5292 cC = _mm_add_ps(cC, cD); 5293 cE = _mm_add_ps(cE, cF); 5294 c0 = _mm_add_ps(c0, c2); 5295 c4 = _mm_add_ps(c4, c6); 5296 c8 = _mm_add_ps(c8, cA); 5297 cC = _mm_add_ps(cC, cE); 5298 c0 = _mm_add_ps(c0, c4); 5299 c8 = _mm_add_ps(c8, cC); 5300 c0 = _mm_add_ps(c0, c8); 5301 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5302 5303 _mm_store_ps((float*)(source0 + 16 * x), c0); 5304 } 5305 5306 source0 += pitch; 5307 source1 += pitch; 5308 source2 += pitch; 5309 source3 += pitch; 5310 source4 += pitch; 5311 source5 += pitch; 5312 source6 += pitch; 5313 source7 += pitch; 5314 source8 += pitch; 5315 source9 += pitch; 5316 sourceA += pitch; 5317 sourceB += pitch; 5318 sourceC += pitch; 5319 sourceD += pitch; 5320 sourceE += pitch; 5321 sourceF += pitch; 5322 } 5323 } 5324 else ASSERT(false); 5325 } 5326 else 5327 { 5328 if(internal.depth == 2) 5329 { 5330 for(int y = 0; y < height; y++) 5331 { 5332 for(int x = 0; x < 4 * width; x++) 5333 { 5334 float c0 = *(float*)(source0 + 4 * x); 5335 float c1 = *(float*)(source1 + 4 * x); 5336 5337 c0 = c0 + c1; 5338 c0 *= 1.0f / 2.0f; 5339 5340 *(float*)(source0 + 4 * x) = c0; 5341 } 5342 5343 source0 += pitch; 5344 source1 += pitch; 5345 } 5346 } 5347 else if(internal.depth == 4) 5348 { 5349 for(int y = 0; y < height; y++) 5350 { 5351 for(int x = 0; x < 4 * width; x++) 5352 { 5353 float c0 = *(float*)(source0 + 4 * x); 5354 float c1 = *(float*)(source1 + 4 * x); 5355 float c2 = *(float*)(source2 + 4 * x); 5356 float c3 = *(float*)(source3 + 4 * x); 5357 5358 c0 = c0 + c1; 5359 c2 = c2 + c3; 5360 c0 = c0 + c2; 5361 c0 *= 1.0f / 4.0f; 5362 5363 *(float*)(source0 + 4 * x) = c0; 5364 } 5365 5366 source0 += pitch; 5367 source1 += pitch; 5368 source2 += pitch; 5369 source3 += pitch; 5370 } 5371 } 5372 else if(internal.depth == 8) 5373 { 5374 for(int y = 0; y < height; y++) 5375 { 5376 for(int x = 0; x < 4 * width; x++) 5377 { 5378 float c0 = *(float*)(source0 + 4 * x); 5379 float c1 = *(float*)(source1 + 4 * x); 5380 float c2 = *(float*)(source2 + 4 * x); 5381 float c3 = *(float*)(source3 + 4 * x); 5382 float c4 = *(float*)(source4 + 4 * x); 5383 float c5 = *(float*)(source5 + 4 * x); 5384 float c6 = *(float*)(source6 + 4 * x); 5385 float c7 = *(float*)(source7 + 4 * x); 5386 5387 c0 = c0 + c1; 5388 c2 = c2 + c3; 5389 c4 = c4 + c5; 5390 c6 = c6 + c7; 5391 c0 = c0 + c2; 5392 c4 = c4 + c6; 5393 c0 = c0 + c4; 5394 c0 *= 1.0f / 8.0f; 5395 5396 *(float*)(source0 + 4 * x) = c0; 5397 } 5398 5399 source0 += pitch; 5400 source1 += pitch; 5401 source2 += pitch; 5402 source3 += pitch; 5403 source4 += pitch; 5404 source5 += pitch; 5405 source6 += pitch; 5406 source7 += pitch; 5407 } 5408 } 5409 else if(internal.depth == 16) 5410 { 5411 for(int y = 0; y < height; y++) 5412 { 5413 for(int x = 0; x < 4 * width; x++) 5414 { 5415 float c0 = *(float*)(source0 + 4 * x); 5416 float c1 = *(float*)(source1 + 4 * x); 5417 float c2 = *(float*)(source2 + 4 * x); 5418 float c3 = *(float*)(source3 + 4 * x); 5419 float c4 = *(float*)(source4 + 4 * x); 5420 float c5 = *(float*)(source5 + 4 * x); 5421 float c6 = *(float*)(source6 + 4 * x); 5422 float c7 = *(float*)(source7 + 4 * x); 5423 float c8 = *(float*)(source8 + 4 * x); 5424 float c9 = *(float*)(source9 + 4 * x); 5425 float cA = *(float*)(sourceA + 4 * x); 5426 float cB = *(float*)(sourceB + 4 * x); 5427 float cC = *(float*)(sourceC + 4 * x); 5428 float cD = *(float*)(sourceD + 4 * x); 5429 float cE = *(float*)(sourceE + 4 * x); 5430 float cF = *(float*)(sourceF + 4 * x); 5431 5432 c0 = c0 + c1; 5433 c2 = c2 + c3; 5434 c4 = c4 + c5; 5435 c6 = c6 + c7; 5436 c8 = c8 + c9; 5437 cA = cA + cB; 5438 cC = cC + cD; 5439 cE = cE + cF; 5440 c0 = c0 + c2; 5441 c4 = c4 + c6; 5442 c8 = c8 + cA; 5443 cC = cC + cE; 5444 c0 = c0 + c4; 5445 c8 = c8 + cC; 5446 c0 = c0 + c8; 5447 c0 *= 1.0f / 16.0f; 5448 5449 *(float*)(source0 + 4 * x) = c0; 5450 } 5451 5452 source0 += pitch; 5453 source1 += pitch; 5454 source2 += pitch; 5455 source3 += pitch; 5456 source4 += pitch; 5457 source5 += pitch; 5458 source6 += pitch; 5459 source7 += pitch; 5460 source8 += pitch; 5461 source9 += pitch; 5462 sourceA += pitch; 5463 sourceB += pitch; 5464 sourceC += pitch; 5465 sourceD += pitch; 5466 sourceE += pitch; 5467 sourceF += pitch; 5468 } 5469 } 5470 else ASSERT(false); 5471 } 5472 } 5473 else if(internal.format == FORMAT_R5G6B5) 5474 { 5475 if(CPUID::supportsSSE2() && (width % 8) == 0) 5476 { 5477 if(internal.depth == 2) 5478 { 5479 for(int y = 0; y < height; y++) 5480 { 5481 for(int x = 0; x < width; x += 8) 5482 { 5483 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5484 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5485 5486 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5487 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5488 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5489 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5490 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5491 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5492 5493 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5494 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5495 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5496 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5497 c0 = _mm_or_si128(c0, c1); 5498 5499 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5500 } 5501 5502 source0 += pitch; 5503 source1 += pitch; 5504 } 5505 } 5506 else if(internal.depth == 4) 5507 { 5508 for(int y = 0; y < height; y++) 5509 { 5510 for(int x = 0; x < width; x += 8) 5511 { 5512 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5513 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5514 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5515 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5516 5517 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5518 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5519 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5520 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5521 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5522 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5523 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5524 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5525 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5526 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5527 5528 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5529 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5530 c0 = _mm_avg_epu8(c0, c2); 5531 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5532 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5533 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5534 c1 = _mm_avg_epu16(c1, c3); 5535 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5536 c0 = _mm_or_si128(c0, c1); 5537 5538 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5539 } 5540 5541 source0 += pitch; 5542 source1 += pitch; 5543 source2 += pitch; 5544 source3 += pitch; 5545 } 5546 } 5547 else if(internal.depth == 8) 5548 { 5549 for(int y = 0; y < height; y++) 5550 { 5551 for(int x = 0; x < width; x += 8) 5552 { 5553 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5554 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5555 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5556 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5557 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5558 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5559 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5560 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5561 5562 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5563 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5564 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5565 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5566 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5567 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5568 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5569 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5570 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5571 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5572 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5573 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5574 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5575 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5576 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5577 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5578 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5579 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5580 5581 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5582 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5583 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5584 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5585 c0 = _mm_avg_epu8(c0, c2); 5586 c4 = _mm_avg_epu8(c4, c6); 5587 c0 = _mm_avg_epu8(c0, c4); 5588 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5589 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5590 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5591 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5592 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5593 c1 = _mm_avg_epu16(c1, c3); 5594 c5 = _mm_avg_epu16(c5, c7); 5595 c1 = _mm_avg_epu16(c1, c5); 5596 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5597 c0 = _mm_or_si128(c0, c1); 5598 5599 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5600 } 5601 5602 source0 += pitch; 5603 source1 += pitch; 5604 source2 += pitch; 5605 source3 += pitch; 5606 source4 += pitch; 5607 source5 += pitch; 5608 source6 += pitch; 5609 source7 += pitch; 5610 } 5611 } 5612 else if(internal.depth == 16) 5613 { 5614 for(int y = 0; y < height; y++) 5615 { 5616 for(int x = 0; x < width; x += 8) 5617 { 5618 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5619 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5620 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5621 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5622 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5623 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5624 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5625 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5626 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5627 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5628 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5629 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5630 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5631 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5632 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5633 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5634 5635 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5636 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5637 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5638 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5639 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5640 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5641 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5642 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5643 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5644 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5645 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5646 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5647 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5648 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5649 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5650 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5651 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5652 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5653 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5654 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5655 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5656 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5657 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5658 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5659 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5660 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5661 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5662 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5663 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5664 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5665 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5666 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5667 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5668 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5669 5670 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5671 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5672 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5673 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5674 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5675 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5676 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5677 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5678 c0 = _mm_avg_epu8(c0, c2); 5679 c4 = _mm_avg_epu8(c4, c6); 5680 c8 = _mm_avg_epu8(c8, cA); 5681 cC = _mm_avg_epu8(cC, cE); 5682 c0 = _mm_avg_epu8(c0, c4); 5683 c8 = _mm_avg_epu8(c8, cC); 5684 c0 = _mm_avg_epu8(c0, c8); 5685 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5686 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5687 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5688 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5689 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5690 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5691 cB = _mm_avg_epu16(cA__g_, cB__g_); 5692 cD = _mm_avg_epu16(cC__g_, cD__g_); 5693 cF = _mm_avg_epu16(cE__g_, cF__g_); 5694 c1 = _mm_avg_epu8(c1, c3); 5695 c5 = _mm_avg_epu8(c5, c7); 5696 c9 = _mm_avg_epu8(c9, cB); 5697 cD = _mm_avg_epu8(cD, cF); 5698 c1 = _mm_avg_epu8(c1, c5); 5699 c9 = _mm_avg_epu8(c9, cD); 5700 c1 = _mm_avg_epu8(c1, c9); 5701 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5702 c0 = _mm_or_si128(c0, c1); 5703 5704 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5705 } 5706 5707 source0 += pitch; 5708 source1 += pitch; 5709 source2 += pitch; 5710 source3 += pitch; 5711 source4 += pitch; 5712 source5 += pitch; 5713 source6 += pitch; 5714 source7 += pitch; 5715 source8 += pitch; 5716 source9 += pitch; 5717 sourceA += pitch; 5718 sourceB += pitch; 5719 sourceC += pitch; 5720 sourceD += pitch; 5721 sourceE += pitch; 5722 sourceF += pitch; 5723 } 5724 } 5725 else ASSERT(false); 5726 } 5727 else 5728 { 5729 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5730 5731 if(internal.depth == 2) 5732 { 5733 for(int y = 0; y < height; y++) 5734 { 5735 for(int x = 0; x < width; x++) 5736 { 5737 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5738 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5739 5740 c0 = AVERAGE(c0, c1); 5741 5742 *(unsigned short*)(source0 + 2 * x) = c0; 5743 } 5744 5745 source0 += pitch; 5746 source1 += pitch; 5747 } 5748 } 5749 else if(internal.depth == 4) 5750 { 5751 for(int y = 0; y < height; y++) 5752 { 5753 for(int x = 0; x < width; x++) 5754 { 5755 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5756 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5757 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5758 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5759 5760 c0 = AVERAGE(c0, c1); 5761 c2 = AVERAGE(c2, c3); 5762 c0 = AVERAGE(c0, c2); 5763 5764 *(unsigned short*)(source0 + 2 * x) = c0; 5765 } 5766 5767 source0 += pitch; 5768 source1 += pitch; 5769 source2 += pitch; 5770 source3 += pitch; 5771 } 5772 } 5773 else if(internal.depth == 8) 5774 { 5775 for(int y = 0; y < height; y++) 5776 { 5777 for(int x = 0; x < width; x++) 5778 { 5779 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5780 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5781 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5782 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5783 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5784 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5785 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5786 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5787 5788 c0 = AVERAGE(c0, c1); 5789 c2 = AVERAGE(c2, c3); 5790 c4 = AVERAGE(c4, c5); 5791 c6 = AVERAGE(c6, c7); 5792 c0 = AVERAGE(c0, c2); 5793 c4 = AVERAGE(c4, c6); 5794 c0 = AVERAGE(c0, c4); 5795 5796 *(unsigned short*)(source0 + 2 * x) = c0; 5797 } 5798 5799 source0 += pitch; 5800 source1 += pitch; 5801 source2 += pitch; 5802 source3 += pitch; 5803 source4 += pitch; 5804 source5 += pitch; 5805 source6 += pitch; 5806 source7 += pitch; 5807 } 5808 } 5809 else if(internal.depth == 16) 5810 { 5811 for(int y = 0; y < height; y++) 5812 { 5813 for(int x = 0; x < width; x++) 5814 { 5815 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5816 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5817 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5818 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5819 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5820 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5821 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5822 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5823 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5824 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5825 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5826 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5827 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5828 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5829 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5830 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5831 5832 c0 = AVERAGE(c0, c1); 5833 c2 = AVERAGE(c2, c3); 5834 c4 = AVERAGE(c4, c5); 5835 c6 = AVERAGE(c6, c7); 5836 c8 = AVERAGE(c8, c9); 5837 cA = AVERAGE(cA, cB); 5838 cC = AVERAGE(cC, cD); 5839 cE = AVERAGE(cE, cF); 5840 c0 = AVERAGE(c0, c2); 5841 c4 = AVERAGE(c4, c6); 5842 c8 = AVERAGE(c8, cA); 5843 cC = AVERAGE(cC, cE); 5844 c0 = AVERAGE(c0, c4); 5845 c8 = AVERAGE(c8, cC); 5846 c0 = AVERAGE(c0, c8); 5847 5848 *(unsigned short*)(source0 + 2 * x) = c0; 5849 } 5850 5851 source0 += pitch; 5852 source1 += pitch; 5853 source2 += pitch; 5854 source3 += pitch; 5855 source4 += pitch; 5856 source5 += pitch; 5857 source6 += pitch; 5858 source7 += pitch; 5859 source8 += pitch; 5860 source9 += pitch; 5861 sourceA += pitch; 5862 sourceB += pitch; 5863 sourceC += pitch; 5864 sourceD += pitch; 5865 sourceE += pitch; 5866 sourceF += pitch; 5867 } 5868 } 5869 else ASSERT(false); 5870 5871 #undef AVERAGE 5872 } 5873 } 5874 else 5875 { 5876 // UNIMPLEMENTED(); 5877 } 5878 } 5879 } 5880