1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for loading/storing uncompressed and compressed images.
20  */
21 
22 #include <array>
23 #include <cassert>
24 #include <cstdio>
25 #include <cstdlib>
26 #include <cstring>
27 #include <fstream>
28 
29 #include "astcenccli_internal.h"
30 
31 #include "stb_image.h"
32 #include "stb_image_write.h"
33 #include "tinyexr.h"
34 
35 /* ============================================================================
36   Image load and store through the stb_iamge and tinyexr libraries
37 ============================================================================ */
38 
39 /**
40  * @brief Load a .exr image using TinyExr to provide the loader.
41  *
42  * @param      filename          The name of the file to load.
43  * @param      y_flip            Should the image be vertically flipped?
44  * @param[out] is_hdr            Is this an HDR image load? Always @c true for this function.
45  * @param[out] component_count   The number of components in the data.
46  *
47  * @return The loaded image data in a canonical 4 channel format.
48  */
load_image_with_tinyexr(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)49 static astcenc_image* load_image_with_tinyexr(
50 	const char* filename,
51 	bool y_flip,
52 	bool& is_hdr,
53 	unsigned int& component_count
54 ) {
55 	int dim_x, dim_y;
56 	float* image;
57 	const char* err;
58 
59 	int load_res = LoadEXR(&image, &dim_x, &dim_y, filename, &err);
60 	if (load_res != TINYEXR_SUCCESS)
61 	{
62 		printf("ERROR: Failed to load image %s (%s)\n", filename, err);
63 		free(reinterpret_cast<void*>(const_cast<char*>(err)));
64 		return nullptr;
65 	}
66 
67 	astcenc_image* res_img = astc_img_from_floatx4_array(image, dim_x, dim_y, y_flip);
68 	free(image);
69 
70 	is_hdr = true;
71 	component_count = 4;
72 	return res_img;
73 }
74 
75 /**
76  * @brief Load an image using STBImage to provide the loader.
77  *
78  * @param      filename          The name of the file to load.
79  * @param      y_flip            Should the image be vertically flipped?
80  * @param[out] is_hdr            Is this an HDR image load?
81  * @param[out] component_count   The number of components in the data.
82  *
83  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
84  */
load_image_with_stb(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)85 static astcenc_image* load_image_with_stb(
86 	const char* filename,
87 	bool y_flip,
88 	bool& is_hdr,
89 	unsigned int& component_count
90 ) {
91 	int dim_x, dim_y;
92 
93 	if (stbi_is_hdr(filename))
94 	{
95 		float* data = stbi_loadf(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
96 		if (data)
97 		{
98 			astcenc_image* img = astc_img_from_floatx4_array(data, dim_x, dim_y, y_flip);
99 			stbi_image_free(data);
100 			is_hdr = true;
101 			component_count = 4;
102 			return img;
103 		}
104 	}
105 	else
106 	{
107 		uint8_t* data = stbi_load(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
108 		if (data)
109 		{
110 			astcenc_image* img = astc_img_from_unorm8x4_array(data, dim_x, dim_y, y_flip);
111 			stbi_image_free(data);
112 			is_hdr = false;
113 			component_count = 4;
114 			return img;
115 		}
116 	}
117 
118 	printf("ERROR: Failed to load image %s (%s)\n", filename, stbi_failure_reason());
119 	return nullptr;
120 }
121 
122 /**
123  * @brief Save an EXR image using TinyExr to provide the store routine.
124  *
125  * @param img        The source data for the image.
126  * @param filename   The name of the file to save.
127  * @param y_flip     Should the image be vertically flipped?
128  *
129  * @return @c true if the image saved OK, @c false on error.
130  */
store_exr_image_with_tinyexr(const astcenc_image * img,const char * filename,int y_flip)131 static bool store_exr_image_with_tinyexr(
132 	const astcenc_image* img,
133 	const char* filename,
134 	int y_flip
135 ) {
136 	float *buf = floatx4_array_from_astc_img(img, y_flip);
137 	int res = SaveEXR(buf, img->dim_x, img->dim_y, 4, 1, filename, nullptr);
138 	delete[] buf;
139 	return res >= 0;
140 }
141 
142 /**
143  * @brief Save a PNG image using STBImageWrite to provide the store routine.
144  *
145  * @param img        The source data for the image.
146  * @param filename   The name of the file to save.
147  * @param y_flip     Should the image be vertically flipped?
148  *
149  * @return @c true if the image saved OK, @c false on error.
150  */
store_png_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)151 static bool store_png_image_with_stb(
152 	const astcenc_image* img,
153 	const char* filename,
154 	int y_flip
155 ) {
156 	assert(img->data_type == ASTCENC_TYPE_U8);
157 	uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[0]);
158 
159 	stbi_flip_vertically_on_write(y_flip);
160 	int res = stbi_write_png(filename, img->dim_x, img->dim_y, 4, buf, img->dim_x * 4);
161 	return res != 0;
162 }
163 
164 /**
165  * @brief Save a TGA image using STBImageWrite to provide the store routine.
166  *
167  * @param img        The source data for the image.
168  * @param filename   The name of the file to save.
169  * @param y_flip     Should the image be vertically flipped?
170  *
171  * @return @c true if the image saved OK, @c false on error.
172  */
store_tga_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)173 static bool store_tga_image_with_stb(
174 	const astcenc_image* img,
175 	const char* filename,
176 	int y_flip
177 ) {
178 	assert(img->data_type == ASTCENC_TYPE_U8);
179 	uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[0]);
180 
181 	stbi_flip_vertically_on_write(y_flip);
182 	int res = stbi_write_tga(filename, img->dim_x, img->dim_y, 4, buf);
183 	return res != 0;
184 }
185 
186 /**
187  * @brief Save a BMP image using STBImageWrite to provide the store routine.
188  *
189  * @param img        The source data for the image.
190  * @param filename   The name of the file to save.
191  * @param y_flip     Should the image be vertically flipped?
192  *
193  * @return @c true if the image saved OK, @c false on error.
194  */
store_bmp_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)195 static bool store_bmp_image_with_stb(
196 	const astcenc_image* img,
197 	const char* filename,
198 	int y_flip
199 ) {
200 	assert(img->data_type == ASTCENC_TYPE_U8);
201 	uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[0]);
202 
203 	stbi_flip_vertically_on_write(y_flip);
204 	int res = stbi_write_bmp(filename, img->dim_x, img->dim_y, 4, buf);
205 	return res != 0;
206 }
207 
208 /**
209  * @brief Save a HDR image using STBImageWrite to provide the store routine.
210  *
211  * @param img        The source data for the image.
212  * @param filename   The name of the file to save.
213  * @param y_flip     Should the image be vertically flipped?
214  *
215  * @return @c true if the image saved OK, @c false on error.
216  */
store_hdr_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)217 static bool store_hdr_image_with_stb(
218 	const astcenc_image* img,
219 	const char* filename,
220 	int y_flip
221 ) {
222 	float* buf = floatx4_array_from_astc_img(img, y_flip);
223 	int res = stbi_write_hdr(filename, img->dim_x, img->dim_y, 4, buf);
224 	delete[] buf;
225 	return res != 0;
226 }
227 
228 /* ============================================================================
229 Native Load and store of KTX and DDS file formats.
230 
231 Unlike "regular" 2D image formats, which are mostly supported through stb_image
232 and tinyexr, these formats are supported directly; this involves a relatively
233 large number of pixel formats.
234 
235 The following restrictions apply to loading of these file formats:
236 
237     * Only uncompressed data supported
238     * Only first mipmap in mipmap pyramid supported
239     * KTX: Cube-map arrays are not supported
240 ============================================================================ */
241 enum scanline_transfer
242 {
243 	R8_TO_RGBA8,
244 	RG8_TO_RGBA8,
245 	RGB8_TO_RGBA8,
246 	RGBA8_TO_RGBA8,
247 	BGR8_TO_RGBA8,
248 	BGRA8_TO_RGBA8,
249 	L8_TO_RGBA8,
250 	LA8_TO_RGBA8,
251 
252 	RGBX8_TO_RGBA8,
253 	BGRX8_TO_RGBA8,
254 
255 	R16_TO_RGBA16F,
256 	RG16_TO_RGBA16F,
257 	RGB16_TO_RGBA16F,
258 	RGBA16_TO_RGBA16F,
259 	BGR16_TO_RGBA16F,
260 	BGRA16_TO_RGBA16F,
261 	L16_TO_RGBA16F,
262 	LA16_TO_RGBA16F,
263 
264 	R16F_TO_RGBA16F,
265 	RG16F_TO_RGBA16F,
266 	RGB16F_TO_RGBA16F,
267 	RGBA16F_TO_RGBA16F,
268 	BGR16F_TO_RGBA16F,
269 	BGRA16F_TO_RGBA16F,
270 	L16F_TO_RGBA16F,
271 	LA16F_TO_RGBA16F,
272 
273 	R32F_TO_RGBA16F,
274 	RG32F_TO_RGBA16F,
275 	RGB32F_TO_RGBA16F,
276 	RGBA32F_TO_RGBA16F,
277 	BGR32F_TO_RGBA16F,
278 	BGRA32F_TO_RGBA16F,
279 	L32F_TO_RGBA16F,
280 	LA32F_TO_RGBA16F
281 };
282 
283 /**
284  * @brief Copy a scanline from a source file and expand to a canonical format.
285  *
286  * Outputs are always 4 component RGBA, stored as U8 (LDR) or FP16 (HDR).
287  *
288  * @param[out] dst           The start of the line to store to.
289  * @param      src           The start of the line to load.
290  * @param      pixel_count   The number of pixels in the scanline.
291  * @param      method        The conversion function.
292  */
copy_scanline(void * dst,const void * src,int pixel_count,scanline_transfer method)293 static void copy_scanline(
294 	void* dst,
295 	const void* src,
296 	int pixel_count,
297 	scanline_transfer method
298 ) {
299 
300 #define id(x) (x)
301 #define u16_sf16(x) float_to_float16(x * (1.0f/65535.0f))
302 #define f32_sf16(x) float_to_float16(x)
303 
304 #define COPY_R(dsttype, srctype, convfunc, oneval) \
305 	do { \
306 		const srctype* s = reinterpret_cast<const srctype*>(src); \
307 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
308 		for (int i = 0; i < pixel_count; i++) \
309 		{ \
310 			d[4 * i    ] = convfunc(s[i]); \
311 			d[4 * i + 1] = 0;              \
312 			d[4 * i + 2] = 0;              \
313 			d[4 * i + 3] = oneval;         \
314 		} \
315 	} while (0); \
316 	break
317 
318 #define COPY_RG(dsttype, srctype, convfunc, oneval) \
319 	do { \
320 		const srctype* s = reinterpret_cast<const srctype*>(src); \
321 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
322 		for (int i = 0; i < pixel_count; i++) \
323 		{ \
324 			d[4 * i    ] = convfunc(s[2 * i    ]); \
325 			d[4 * i + 1] = convfunc(s[2 * i + 1]); \
326 			d[4 * i + 2] = 0;                      \
327 			d[4 * i + 3] = oneval;                 \
328 		} \
329 	} while (0); \
330 	break
331 
332 #define COPY_RGB(dsttype, srctype, convfunc, oneval) \
333 	do { \
334 		const srctype* s = reinterpret_cast<const srctype*>(src); \
335 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
336 		for (int i = 0; i < pixel_count; i++) \
337 		{ \
338 			d[4 * i    ] = convfunc(s[3 * i    ]); \
339 			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
340 			d[4 * i + 2] = convfunc(s[3 * i + 2]); \
341 			d[4 * i + 3] = oneval;                 \
342 		} \
343 	} while (0); \
344 	break
345 
346 #define COPY_BGR(dsttype, srctype, convfunc, oneval) \
347 	do { \
348 		const srctype* s = reinterpret_cast<const srctype*>(src); \
349 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
350 		for (int i = 0; i < pixel_count; i++)\
351 		{ \
352 			d[4 * i    ] = convfunc(s[3 * i + 2]); \
353 			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
354 			d[4 * i + 2] = convfunc(s[3 * i    ]); \
355 			d[4 * i + 3] = oneval;                 \
356 		} \
357 	} while (0); \
358 	break
359 
360 #define COPY_RGBX(dsttype, srctype, convfunc, oneval) \
361 	do { \
362 		const srctype* s = reinterpret_cast<const srctype*>(src); \
363 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
364 		for (int i = 0; i < pixel_count; i++)\
365 		{ \
366 			d[4 * i    ] = convfunc(s[4 * i    ]); \
367 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
368 			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
369 			d[4 * i + 3] = oneval;                 \
370 		} \
371 	} while (0); \
372 	break
373 
374 #define COPY_BGRX(dsttype, srctype, convfunc, oneval) \
375 	do { \
376 		const srctype* s = reinterpret_cast<const srctype*>(src); \
377 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
378 		for (int i = 0; i < pixel_count; i++)\
379 		{ \
380 			d[4 * i    ] = convfunc(s[4 * i + 2]); \
381 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
382 			d[4 * i + 2] = convfunc(s[4 * i    ]); \
383 			d[4 * i + 3] = oneval;                 \
384 		} \
385 	} while (0); \
386 	break
387 
388 #define COPY_RGBA(dsttype, srctype, convfunc, oneval) \
389 	do { \
390 		const srctype* s = reinterpret_cast<const srctype*>(src); \
391 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
392 		for (int i = 0; i < pixel_count; i++) \
393 		{ \
394 			d[4 * i    ] = convfunc(s[4 * i    ]); \
395 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
396 			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
397 			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
398 		} \
399 	} while (0); \
400 	break
401 
402 #define COPY_BGRA(dsttype, srctype, convfunc, oneval) \
403 	do { \
404 		const srctype* s = reinterpret_cast<const srctype*>(src); \
405 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
406 		for (int i = 0; i < pixel_count; i++) \
407 		{ \
408 			d[4 * i    ] = convfunc(s[4 * i + 2]); \
409 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
410 			d[4 * i + 2] = convfunc(s[4 * i    ]); \
411 			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
412 		} \
413 	} while (0); \
414 	break
415 
416 #define COPY_L(dsttype, srctype, convfunc, oneval) \
417 	do { \
418 		const srctype* s = reinterpret_cast<const srctype*>(src); \
419 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
420 		for (int i = 0; i < pixel_count; i++) \
421 		{ \
422 			d[4 * i    ] = convfunc(s[i]); \
423 			d[4 * i + 1] = convfunc(s[i]); \
424 			d[4 * i + 2] = convfunc(s[i]); \
425 			d[4 * i + 3] = oneval;         \
426 		} \
427 	} while (0); \
428 	break
429 
430 #define COPY_LA(dsttype, srctype, convfunc, oneval) \
431 	do { \
432 		const srctype* s = reinterpret_cast<const srctype*>(src); \
433 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
434 		for (int i = 0; i < pixel_count; i++) \
435 		{ \
436 			d[4 * i    ] = convfunc(s[2 * i    ]); \
437 			d[4 * i + 1] = convfunc(s[2 * i    ]); \
438 			d[4 * i + 2] = convfunc(s[2 * i    ]); \
439 			d[4 * i + 3] = convfunc(s[2 * i + 1]); \
440 		} \
441 	} while (0); \
442 	break
443 
444 	switch (method)
445 	{
446 	case R8_TO_RGBA8:
447 		COPY_R(uint8_t, uint8_t, id, 0xFF);
448 	case RG8_TO_RGBA8:
449 		COPY_RG(uint8_t, uint8_t, id, 0xFF);
450 	case RGB8_TO_RGBA8:
451 		COPY_RGB(uint8_t, uint8_t, id, 0xFF);
452 	case RGBA8_TO_RGBA8:
453 		COPY_RGBA(uint8_t, uint8_t, id, 0xFF);
454 	case BGR8_TO_RGBA8:
455 		COPY_BGR(uint8_t, uint8_t, id, 0xFF);
456 	case BGRA8_TO_RGBA8:
457 		COPY_BGRA(uint8_t, uint8_t, id, 0xFF);
458 	case RGBX8_TO_RGBA8:
459 		COPY_RGBX(uint8_t, uint8_t, id, 0xFF);
460 	case BGRX8_TO_RGBA8:
461 		COPY_BGRX(uint8_t, uint8_t, id, 0xFF);
462 	case L8_TO_RGBA8:
463 		COPY_L(uint8_t, uint8_t, id, 0xFF);
464 	case LA8_TO_RGBA8:
465 		COPY_LA(uint8_t, uint8_t, id, 0xFF);
466 
467 	case R16F_TO_RGBA16F:
468 		COPY_R(uint16_t, uint16_t, id, 0x3C00);
469 	case RG16F_TO_RGBA16F:
470 		COPY_RG(uint16_t, uint16_t, id, 0x3C00);
471 	case RGB16F_TO_RGBA16F:
472 		COPY_RGB(uint16_t, uint16_t, id, 0x3C00);
473 	case RGBA16F_TO_RGBA16F:
474 		COPY_RGBA(uint16_t, uint16_t, id, 0x3C00);
475 	case BGR16F_TO_RGBA16F:
476 		COPY_BGR(uint16_t, uint16_t, id, 0x3C00);
477 	case BGRA16F_TO_RGBA16F:
478 		COPY_BGRA(uint16_t, uint16_t, id, 0x3C00);
479 	case L16F_TO_RGBA16F:
480 		COPY_L(uint16_t, uint16_t, id, 0x3C00);
481 	case LA16F_TO_RGBA16F:
482 		COPY_LA(uint16_t, uint16_t, id, 0x3C00);
483 
484 	case R16_TO_RGBA16F:
485 		COPY_R(uint16_t, uint16_t, u16_sf16, 0x3C00);
486 	case RG16_TO_RGBA16F:
487 		COPY_RG(uint16_t, uint16_t, u16_sf16, 0x3C00);
488 	case RGB16_TO_RGBA16F:
489 		COPY_RGB(uint16_t, uint16_t, u16_sf16, 0x3C00);
490 	case RGBA16_TO_RGBA16F:
491 		COPY_RGBA(uint16_t, uint16_t, u16_sf16, 0x3C00);
492 	case BGR16_TO_RGBA16F:
493 		COPY_BGR(uint16_t, uint16_t, u16_sf16, 0x3C00);
494 	case BGRA16_TO_RGBA16F:
495 		COPY_BGRA(uint16_t, uint16_t, u16_sf16, 0x3C00);
496 	case L16_TO_RGBA16F:
497 		COPY_L(uint16_t, uint16_t, u16_sf16, 0x3C00);
498 	case LA16_TO_RGBA16F:
499 		COPY_LA(uint16_t, uint16_t, u16_sf16, 0x3C00);
500 
501 	case R32F_TO_RGBA16F:
502 		COPY_R(uint16_t, float, f32_sf16, 0x3C00);
503 	case RG32F_TO_RGBA16F:
504 		COPY_RG(uint16_t, float, f32_sf16, 0x3C00);
505 	case RGB32F_TO_RGBA16F:
506 		COPY_RGB(uint16_t, float, f32_sf16, 0x3C00);
507 	case RGBA32F_TO_RGBA16F:
508 		COPY_RGBA(uint16_t, float, f32_sf16, 0x3C00);
509 	case BGR32F_TO_RGBA16F:
510 		COPY_BGR(uint16_t, float, f32_sf16, 0x3C00);
511 	case BGRA32F_TO_RGBA16F:
512 		COPY_BGRA(uint16_t, float, f32_sf16, 0x3C00);
513 	case L32F_TO_RGBA16F:
514 		COPY_L(uint16_t, float, f32_sf16, 0x3C00);
515 	case LA32F_TO_RGBA16F:
516 		COPY_LA(uint16_t, float, f32_sf16, 0x3C00);
517 	}
518 }
519 
520 /**
521  * @brief Swap endianness of N two byte values.
522  *
523  * @param[in,out] dataptr      The data to convert.
524  * @param         byte_count   The number of bytes to convert.
525  */
switch_endianness2(void * dataptr,int byte_count)526 static void switch_endianness2(
527 	void* dataptr,
528 	int byte_count
529 ) {
530 	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
531 	for (int i = 0; i < byte_count / 2; i++)
532 	{
533 		uint8_t d0 = data[0];
534 		uint8_t d1 = data[1];
535 		data[0] = d1;
536 		data[1] = d0;
537 		data += 2;
538 	}
539 }
540 
541 /**
542  * @brief Swap endianness of N four byte values.
543  *
544  * @param[in,out] dataptr      The data to convert.
545  * @param         byte_count   The number of bytes to convert.
546  */
switch_endianness4(void * dataptr,int byte_count)547 static void switch_endianness4(
548 	void* dataptr,
549 	int byte_count
550 ) {
551 	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
552 	for (int i = 0; i < byte_count / 4; i++)
553 	{
554 		uint8_t d0 = data[0];
555 		uint8_t d1 = data[1];
556 		uint8_t d2 = data[2];
557 		uint8_t d3 = data[3];
558 		data[0] = d3;
559 		data[1] = d2;
560 		data[2] = d1;
561 		data[3] = d0;
562 		data += 4;
563 	}
564 }
565 
566 /**
567  * @brief Swap endianness of a u32 value.
568  *
569  * @param v   The data to convert.
570  *
571  * @return The converted value.
572  */
u32_byterev(uint32_t v)573 static uint32_t u32_byterev(uint32_t v)
574 {
575 	return (v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24);
576 }
577 
578 /*
579  Notes about KTX:
580 
581  After the header and the key/value data area, the actual image data follows.
582  Each image starts with a 4-byte "imageSize" value indicating the number of bytes of image data follow.
583  (For cube-maps, this value appears only after first image; the remaining 5 images are all of equal size.)
584  If the size of an image is not a multiple of 4, then it is padded to the next multiple of 4.
585  Note that this padding is NOT included in the "imageSize" field.
586  In a cubemap, the padding appears after each face note that in a 2D/3D texture, padding does
587  NOT appear between the lines/planes of the texture!
588 
589  In a KTX file, there may be multiple images; they are organized as follows:
590 
591  For each mipmap_level in numberOfMipmapLevels
592  	UInt32 imageSize;
593  	For each array_element in numberOfArrayElements
594  	* for each face in numberOfFaces
595  		* for each z_slice in pixelDepth
596  			* for each row or row_of_blocks in pixelHeight
597  				* for each pixel or block_of_pixels in pixelWidth
598  					Byte data[format-specific-number-of-bytes]
599  				* end
600  			* end
601  		*end
602  		Byte cubePadding[0-3]
603  	*end
604  	Byte mipPadding[3 - ((imageSize+ 3) % 4)]
605  *end
606 
607  In the ASTC codec, we will, for the time being only harvest the first image,
608  and we will support only a limited set of formats:
609 
610  gl_type: UNSIGNED_BYTE UNSIGNED_SHORT HALF_FLOAT FLOAT UNSIGNED_INT_8_8_8_8 UNSIGNED_INT_8_8_8_8_REV
611  gl_format: RED, RG. RGB, RGBA BGR, BGRA
612  gl_internal_format: used for upload to OpenGL; we can ignore it on uncompressed-load, but
613  	need to provide a reasonable value on store: RGB8 RGBA8 RGB16F RGBA16F
614  gl_base_internal_format: same as gl_format unless texture is compressed (well, BGR is turned into RGB)
615  	RED, RG, RGB, RGBA
616 */
617 
618 // Khronos enums
619 #define GL_RED                                      0x1903
620 #define GL_RG                                       0x8227
621 #define GL_RGB                                      0x1907
622 #define GL_RGBA                                     0x1908
623 #define GL_BGR                                      0x80E0
624 #define GL_BGRA                                     0x80E1
625 #define GL_LUMINANCE                                0x1909
626 #define GL_LUMINANCE_ALPHA                          0x190A
627 
628 #define GL_R8                                       0x8229
629 #define GL_RG8                                      0x822B
630 #define GL_RGB8                                     0x8051
631 #define GL_RGBA8                                    0x8058
632 
633 #define GL_R16F                                     0x822D
634 #define GL_RG16F                                    0x822F
635 #define GL_RGB16F                                   0x881B
636 #define GL_RGBA16F                                  0x881A
637 
638 #define GL_UNSIGNED_BYTE                            0x1401
639 #define GL_UNSIGNED_SHORT                           0x1403
640 #define GL_HALF_FLOAT                               0x140B
641 #define GL_FLOAT                                    0x1406
642 
643 #define GL_COMPRESSED_RGBA_ASTC_4x4                 0x93B0
644 #define GL_COMPRESSED_RGBA_ASTC_5x4                 0x93B1
645 #define GL_COMPRESSED_RGBA_ASTC_5x5                 0x93B2
646 #define GL_COMPRESSED_RGBA_ASTC_6x5                 0x93B3
647 #define GL_COMPRESSED_RGBA_ASTC_6x6                 0x93B4
648 #define GL_COMPRESSED_RGBA_ASTC_8x5                 0x93B5
649 #define GL_COMPRESSED_RGBA_ASTC_8x6                 0x93B6
650 #define GL_COMPRESSED_RGBA_ASTC_8x8                 0x93B7
651 #define GL_COMPRESSED_RGBA_ASTC_10x5                0x93B8
652 #define GL_COMPRESSED_RGBA_ASTC_10x6                0x93B9
653 #define GL_COMPRESSED_RGBA_ASTC_10x8                0x93BA
654 #define GL_COMPRESSED_RGBA_ASTC_10x10               0x93BB
655 #define GL_COMPRESSED_RGBA_ASTC_12x10               0x93BC
656 #define GL_COMPRESSED_RGBA_ASTC_12x12               0x93BD
657 
658 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4         0x93D0
659 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4         0x93D1
660 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5         0x93D2
661 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5         0x93D3
662 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6         0x93D4
663 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5         0x93D5
664 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6         0x93D6
665 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8         0x93D7
666 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5        0x93D8
667 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6        0x93D9
668 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8        0x93DA
669 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10       0x93DB
670 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10       0x93DC
671 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12       0x93DD
672 
673 #define GL_COMPRESSED_RGBA_ASTC_3x3x3_OES           0x93C0
674 #define GL_COMPRESSED_RGBA_ASTC_4x3x3_OES           0x93C1
675 #define GL_COMPRESSED_RGBA_ASTC_4x4x3_OES           0x93C2
676 #define GL_COMPRESSED_RGBA_ASTC_4x4x4_OES           0x93C3
677 #define GL_COMPRESSED_RGBA_ASTC_5x4x4_OES           0x93C4
678 #define GL_COMPRESSED_RGBA_ASTC_5x5x4_OES           0x93C5
679 #define GL_COMPRESSED_RGBA_ASTC_5x5x5_OES           0x93C6
680 #define GL_COMPRESSED_RGBA_ASTC_6x5x5_OES           0x93C7
681 #define GL_COMPRESSED_RGBA_ASTC_6x6x5_OES           0x93C8
682 #define GL_COMPRESSED_RGBA_ASTC_6x6x6_OES           0x93C9
683 
684 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES   0x93E0
685 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES   0x93E1
686 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES   0x93E2
687 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES   0x93E3
688 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES   0x93E4
689 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES   0x93E5
690 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES   0x93E6
691 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES   0x93E7
692 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES   0x93E8
693 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES   0x93E9
694 
695 struct format_entry
696 {
697 	unsigned int x;
698 	unsigned int y;
699 	unsigned int z;
700 	bool is_srgb;
701 	unsigned int format;
702 };
703 
704 static const std::array<format_entry, 48> ASTC_FORMATS =
705 {{
706 	// 2D Linear RGB
707 	{ 4,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_4x4},
708 	{ 5,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_5x4},
709 	{ 5,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_5x5},
710 	{ 6,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_6x5},
711 	{ 6,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_6x6},
712 	{ 8,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_8x5},
713 	{ 8,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_8x6},
714 	{ 8,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_8x8},
715 	{10,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_10x5},
716 	{10,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_10x6},
717 	{10,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_10x8},
718 	{10, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_10x10},
719 	{12, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_12x10},
720 	{12, 12,  1, false, GL_COMPRESSED_RGBA_ASTC_12x12},
721 	// 2D SRGB
722 	{ 4,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4},
723 	{ 5,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4},
724 	{ 5,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5},
725 	{ 6,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5},
726 	{ 6,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6},
727 	{ 8,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5},
728 	{ 8,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6},
729 	{ 8,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8},
730 	{10,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5},
731 	{10,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6},
732 	{10,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8},
733 	{10, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10},
734 	{12, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10},
735 	{12, 12,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12},
736 	// 3D Linear RGB
737 	{ 3,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_3x3x3_OES},
738 	{ 4,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_4x3x3_OES},
739 	{ 4,  4,  3, false, GL_COMPRESSED_RGBA_ASTC_4x4x3_OES},
740 	{ 4,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_4x4x4_OES},
741 	{ 5,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_5x4x4_OES},
742 	{ 5,  5,  4, false, GL_COMPRESSED_RGBA_ASTC_5x5x4_OES},
743 	{ 5,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_5x5x5_OES},
744 	{ 6,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_6x5x5_OES},
745 	{ 6,  6,  5, false, GL_COMPRESSED_RGBA_ASTC_6x6x5_OES},
746 	{ 6,  6,  6, false, GL_COMPRESSED_RGBA_ASTC_6x6x6_OES},
747 	// 3D SRGB
748 	{ 3,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES},
749 	{ 4,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES},
750 	{ 4,  4,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES},
751 	{ 4,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES},
752 	{ 5,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES},
753 	{ 5,  5,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES},
754 	{ 5,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES},
755 	{ 6,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES},
756 	{ 6,  6,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES},
757 	{ 6,  6,  6,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES}
758 }};
759 
get_format(unsigned int format)760 static const format_entry* get_format(
761 	unsigned int format
762 ) {
763 	for (auto& it : ASTC_FORMATS)
764 	{
765 		if (it.format == format)
766 		{
767 			return &it;
768 		}
769 	}
770 	return nullptr;
771 }
772 
get_format(unsigned int x,unsigned int y,unsigned int z,bool is_srgb)773 static unsigned int get_format(
774 	unsigned int x,
775 	unsigned int y,
776 	unsigned int z,
777 	bool is_srgb
778 ) {
779 	for (auto& it : ASTC_FORMATS)
780 	{
781 		if ((it.x == x) && (it.y == y) && (it.z == z)  && (it.is_srgb == is_srgb))
782 		{
783 			return it.format;
784 		}
785 	}
786 	return 0;
787 }
788 
789 struct ktx_header
790 {
791 	uint8_t magic[12];
792 	uint32_t endianness;				// should be 0x04030201; if it is instead 0x01020304, then the endianness of everything must be switched.
793 	uint32_t gl_type;					// 0 for compressed textures, otherwise value from table 3.2 (page 162) of OpenGL 4.0 spec
794 	uint32_t gl_type_size;				// size of data elements to do endianness swap on (1=endian-neutral data)
795 	uint32_t gl_format;					// 0 for compressed textures, otherwise value from table 3.3 (page 163) of OpenGL spec
796 	uint32_t gl_internal_format;		// sized-internal-format, corresponding to table 3.12 to 3.14 (pages 182-185) of OpenGL spec
797 	uint32_t gl_base_internal_format;	// unsized-internal-format: corresponding to table 3.11 (page 179) of OpenGL spec
798 	uint32_t pixel_width;				// texture dimensions; not rounded up to block size for compressed.
799 	uint32_t pixel_height;				// must be 0 for 1D textures.
800 	uint32_t pixel_depth;				// must be 0 for 1D, 2D and cubemap textures.
801 	uint32_t number_of_array_elements;	// 0 if not a texture array
802 	uint32_t number_of_faces;			// 6 for cubemaps, 1 for non-cubemaps
803 	uint32_t number_of_mipmap_levels;	// 0 or 1 for non-mipmapped textures; 0 indicates that auto-mipmap-gen should be done at load time.
804 	uint32_t bytes_of_key_value_data;	// size in bytes of the key-and-value area immediately following the header.
805 };
806 
807 // Magic 12-byte sequence that must appear at the beginning of every KTX file.
808 static uint8_t ktx_magic[12] {
809 	0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
810 };
811 
ktx_header_switch_endianness(ktx_header * kt)812 static void ktx_header_switch_endianness(ktx_header * kt)
813 {
814 	#define REV(x) kt->x = u32_byterev(kt->x)
815 	REV(endianness);
816 	REV(gl_type);
817 	REV(gl_type_size);
818 	REV(gl_format);
819 	REV(gl_internal_format);
820 	REV(gl_base_internal_format);
821 	REV(pixel_width);
822 	REV(pixel_height);
823 	REV(pixel_depth);
824 	REV(number_of_array_elements);
825 	REV(number_of_faces);
826 	REV(number_of_mipmap_levels);
827 	REV(bytes_of_key_value_data);
828 	#undef REV
829 }
830 
831 /**
832  * @brief Load an uncompressed KTX image using the local custom loader.
833  *
834  * @param      filename          The name of the file to load.
835  * @param      y_flip            Should the image be vertically flipped?
836  * @param[out] is_hdr            Is this an HDR image load?
837  * @param[out] component_count   The number of components in the data.
838  *
839  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
840  */
load_ktx_uncompressed_image(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)841 static astcenc_image* load_ktx_uncompressed_image(
842 	const char* filename,
843 	bool y_flip,
844 	bool& is_hdr,
845 	unsigned int& component_count
846 ) {
847 	FILE *f = fopen(filename, "rb");
848 	if (!f)
849 	{
850 		printf("Failed to open file %s\n", filename);
851 		return nullptr;
852 	}
853 
854 	ktx_header hdr;
855 	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
856 
857 	if (header_bytes_read != sizeof(hdr))
858 	{
859 		printf("Failed to read header of KTX file %s\n", filename);
860 		fclose(f);
861 		return nullptr;
862 	}
863 
864 	if (memcmp(hdr.magic, ktx_magic, 12) != 0 || (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
865 	{
866 		printf("File %s does not have a valid KTX header\n", filename);
867 		fclose(f);
868 		return nullptr;
869 	}
870 
871 	int switch_endianness = 0;
872 	if (hdr.endianness == 0x01020304)
873 	{
874 		ktx_header_switch_endianness(&hdr);
875 		switch_endianness = 1;
876 	}
877 
878 	if (hdr.gl_type == 0 || hdr.gl_format == 0)
879 	{
880 		printf("File %s appears to be compressed, not supported as input\n", filename);
881 		fclose(f);
882 		return nullptr;
883 	}
884 
885 	// the formats we support are:
886 
887 	// Cartesian product of gl_type=(UNSIGNED_BYTE, UNSIGNED_SHORT, HALF_FLOAT, FLOAT) x gl_format=(RED, RG, RGB, RGBA, BGR, BGRA)
888 
889 	int components;
890 	switch (hdr.gl_format)
891 	{
892 	case GL_RED:
893 		components = 1;
894 		break;
895 	case GL_RG:
896 		components = 2;
897 		break;
898 	case GL_RGB:
899 		components = 3;
900 		break;
901 	case GL_RGBA:
902 		components = 4;
903 		break;
904 	case GL_BGR:
905 		components = 3;
906 		break;
907 	case GL_BGRA:
908 		components = 4;
909 		break;
910 	case GL_LUMINANCE:
911 		components = 1;
912 		break;
913 	case GL_LUMINANCE_ALPHA:
914 		components = 2;
915 		break;
916 	default:
917 		printf("KTX file %s has unsupported GL type\n", filename);
918 		fclose(f);
919 		return nullptr;
920 	}
921 
922 	// Although these are set up later, use default initializer to remove warnings
923 	int bitness = 8;              // Internal precision after conversion
924 	int bytes_per_component = 1;  // Bytes per component in the KTX file
925 	scanline_transfer copy_method = R8_TO_RGBA8;
926 
927 	switch (hdr.gl_type)
928 	{
929 	case GL_UNSIGNED_BYTE:
930 		{
931 			bitness = 8;
932 			bytes_per_component = 1;
933 			switch (hdr.gl_format)
934 			{
935 			case GL_RED:
936 				copy_method = R8_TO_RGBA8;
937 				break;
938 			case GL_RG:
939 				copy_method = RG8_TO_RGBA8;
940 				break;
941 			case GL_RGB:
942 				copy_method = RGB8_TO_RGBA8;
943 				break;
944 			case GL_RGBA:
945 				copy_method = RGBA8_TO_RGBA8;
946 				break;
947 			case GL_BGR:
948 				copy_method = BGR8_TO_RGBA8;
949 				break;
950 			case GL_BGRA:
951 				copy_method = BGRA8_TO_RGBA8;
952 				break;
953 			case GL_LUMINANCE:
954 				copy_method = L8_TO_RGBA8;
955 				break;
956 			case GL_LUMINANCE_ALPHA:
957 				copy_method = LA8_TO_RGBA8;
958 				break;
959 			}
960 			break;
961 		}
962 	case GL_UNSIGNED_SHORT:
963 		{
964 			bitness = 16;
965 			bytes_per_component = 2;
966 			switch (hdr.gl_format)
967 			{
968 			case GL_RED:
969 				copy_method = R16_TO_RGBA16F;
970 				break;
971 			case GL_RG:
972 				copy_method = RG16_TO_RGBA16F;
973 				break;
974 			case GL_RGB:
975 				copy_method = RGB16_TO_RGBA16F;
976 				break;
977 			case GL_RGBA:
978 				copy_method = RGBA16_TO_RGBA16F;
979 				break;
980 			case GL_BGR:
981 				copy_method = BGR16_TO_RGBA16F;
982 				break;
983 			case GL_BGRA:
984 				copy_method = BGRA16_TO_RGBA16F;
985 				break;
986 			case GL_LUMINANCE:
987 				copy_method = L16_TO_RGBA16F;
988 				break;
989 			case GL_LUMINANCE_ALPHA:
990 				copy_method = LA16_TO_RGBA16F;
991 				break;
992 			}
993 			break;
994 		}
995 	case GL_HALF_FLOAT:
996 		{
997 			bitness = 16;
998 			bytes_per_component = 2;
999 			switch (hdr.gl_format)
1000 			{
1001 			case GL_RED:
1002 				copy_method = R16F_TO_RGBA16F;
1003 				break;
1004 			case GL_RG:
1005 				copy_method = RG16F_TO_RGBA16F;
1006 				break;
1007 			case GL_RGB:
1008 				copy_method = RGB16F_TO_RGBA16F;
1009 				break;
1010 			case GL_RGBA:
1011 				copy_method = RGBA16F_TO_RGBA16F;
1012 				break;
1013 			case GL_BGR:
1014 				copy_method = BGR16F_TO_RGBA16F;
1015 				break;
1016 			case GL_BGRA:
1017 				copy_method = BGRA16F_TO_RGBA16F;
1018 				break;
1019 			case GL_LUMINANCE:
1020 				copy_method = L16F_TO_RGBA16F;
1021 				break;
1022 			case GL_LUMINANCE_ALPHA:
1023 				copy_method = LA16F_TO_RGBA16F;
1024 				break;
1025 			}
1026 			break;
1027 		}
1028 	case GL_FLOAT:
1029 		{
1030 			bitness = 16;
1031 			bytes_per_component = 4;
1032 			switch (hdr.gl_format)
1033 			{
1034 			case GL_RED:
1035 				copy_method = R32F_TO_RGBA16F;
1036 				break;
1037 			case GL_RG:
1038 				copy_method = RG32F_TO_RGBA16F;
1039 				break;
1040 			case GL_RGB:
1041 				copy_method = RGB32F_TO_RGBA16F;
1042 				break;
1043 			case GL_RGBA:
1044 				copy_method = RGBA32F_TO_RGBA16F;
1045 				break;
1046 			case GL_BGR:
1047 				copy_method = BGR32F_TO_RGBA16F;
1048 				break;
1049 			case GL_BGRA:
1050 				copy_method = BGRA32F_TO_RGBA16F;
1051 				break;
1052 			case GL_LUMINANCE:
1053 				copy_method = L32F_TO_RGBA16F;
1054 				break;
1055 			case GL_LUMINANCE_ALPHA:
1056 				copy_method = LA32F_TO_RGBA16F;
1057 				break;
1058 			}
1059 			break;
1060 		}
1061 	default:
1062 		printf("KTX file %s has unsupported GL format\n", filename);
1063 		fclose(f);
1064 		return nullptr;
1065 	}
1066 
1067 	if (hdr.number_of_mipmap_levels > 1)
1068 	{
1069 		printf("WARNING: KTX file %s has %d mipmap levels; only the first one will be encoded.\n", filename, hdr.number_of_mipmap_levels);
1070 	}
1071 
1072 	if (hdr.number_of_array_elements > 1)
1073 	{
1074 		printf("WARNING: KTX file %s contains a texture array with %d layers; only the first one will be encoded.\n", filename, hdr.number_of_array_elements);
1075 	}
1076 
1077 	if (hdr.number_of_faces > 1)
1078 	{
1079 		printf("WARNING: KTX file %s contains a cubemap with 6 faces; only the first one will be encoded.\n", filename);
1080 	}
1081 
1082 
1083 	unsigned int dim_x = hdr.pixel_width;
1084 	unsigned int dim_y = astc::max(hdr.pixel_height, 1u);
1085 	unsigned int dim_z = astc::max(hdr.pixel_depth, 1u);
1086 
1087 	// ignore the key/value data
1088 	fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1089 
1090 	uint32_t specified_bytes_of_surface = 0;
1091 	size_t sb_read = fread(&specified_bytes_of_surface, 1, 4, f);
1092 	if (sb_read != 4)
1093 	{
1094 		printf("Failed to read header of KTX file %s\n", filename);
1095 		fclose(f);
1096 		return nullptr;
1097 	}
1098 
1099 	if (switch_endianness)
1100 	{
1101 		specified_bytes_of_surface = u32_byterev(specified_bytes_of_surface);
1102 	}
1103 
1104 	// read the surface
1105 	uint32_t xstride = bytes_per_component * components * dim_x;
1106 	uint32_t ystride = xstride * dim_y;
1107 	uint32_t computed_bytes_of_surface = dim_z * ystride;
1108 	if (computed_bytes_of_surface != specified_bytes_of_surface)
1109 	{
1110 		fclose(f);
1111 		printf("%s: KTX file inconsistency: computed surface size is %d bytes, but specified size is %d bytes\n", filename, computed_bytes_of_surface, specified_bytes_of_surface);
1112 		return nullptr;
1113 	}
1114 
1115 	uint8_t *buf = new uint8_t[specified_bytes_of_surface];
1116 	size_t bytes_read = fread(buf, 1, specified_bytes_of_surface, f);
1117 	fclose(f);
1118 	if (bytes_read != specified_bytes_of_surface)
1119 	{
1120 		delete[] buf;
1121 		printf("Failed to read file %s\n", filename);
1122 		return nullptr;
1123 	}
1124 
1125 	// perform an endianness swap on the surface if needed.
1126 	if (switch_endianness)
1127 	{
1128 		if (hdr.gl_type_size == 2)
1129 		{
1130 			switch_endianness2(buf, specified_bytes_of_surface);
1131 		}
1132 
1133 		if (hdr.gl_type_size == 4)
1134 		{
1135 			switch_endianness4(buf, specified_bytes_of_surface);
1136 		}
1137 	}
1138 
1139 	// Transfer data from the surface to our own image data structure
1140 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
1141 
1142 	for (unsigned int z = 0; z < dim_z; z++)
1143 	{
1144 		for (unsigned int y = 0; y < dim_y; y++)
1145 		{
1146 			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
1147 			unsigned int ydst = ymod;
1148 			void *dst;
1149 
1150 			if (astc_img->data_type == ASTCENC_TYPE_U8)
1151 			{
1152 				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
1153 				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
1154 			}
1155 			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
1156 			{
1157 				assert(astc_img->data_type == ASTCENC_TYPE_F16);
1158 				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
1159 				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
1160 			}
1161 
1162 			uint8_t *src = buf + (z * ystride) + (y * xstride);
1163 			copy_scanline(dst, src, dim_x, copy_method);
1164 		}
1165 	}
1166 
1167 	delete[] buf;
1168 	is_hdr = bitness >= 16;
1169 	component_count = components;
1170 	return astc_img;
1171 }
1172 
1173 /**
1174  * @brief Load a KTX compressed image using the local custom loader.
1175  *
1176  * @param      filename          The name of the file to load.
1177  * @param[out] is_srgb           @c true if this is an sRGB image, @c false otherwise.
1178  * @param[out] img               The output image to populate.
1179  *
1180  * @return @c true on error, @c false otherwise.
1181  */
load_ktx_compressed_image(const char * filename,bool & is_srgb,astc_compressed_image & img)1182 bool load_ktx_compressed_image(
1183 	const char* filename,
1184 	bool& is_srgb,
1185 	astc_compressed_image& img
1186 ) {
1187 	FILE *f = fopen(filename, "rb");
1188 	if (!f)
1189 	{
1190 		printf("Failed to open file %s\n", filename);
1191 		return true;
1192 	}
1193 
1194 	ktx_header hdr;
1195 	size_t actual = fread(&hdr, 1, sizeof(hdr), f);
1196 	if (actual != sizeof(hdr))
1197 	{
1198 		printf("Failed to read header from %s\n", filename);
1199 		fclose(f);
1200 		return true;
1201 	}
1202 
1203 	if (memcmp(hdr.magic, ktx_magic, 12) != 0 ||
1204 	    (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
1205 	{
1206 		printf("File %s does not have a valid KTX header\n", filename);
1207 		fclose(f);
1208 		return true;
1209 	}
1210 
1211 	bool switch_endianness = false;
1212 	if (hdr.endianness == 0x01020304)
1213 	{
1214 		switch_endianness = true;
1215 		ktx_header_switch_endianness(&hdr);
1216 	}
1217 
1218 	if (hdr.gl_type != 0 || hdr.gl_format != 0 || hdr.gl_type_size != 1 ||
1219 	    hdr.gl_base_internal_format != GL_RGBA)
1220 	{
1221 		printf("File %s is not a compressed ASTC file\n", filename);
1222 		fclose(f);
1223 		return true;
1224 	}
1225 
1226 	const format_entry* fmt = get_format(hdr.gl_internal_format);
1227 	if (!fmt)
1228 	{
1229 		printf("File %s is not a compressed ASTC file\n", filename);
1230 		fclose(f);
1231 		return true;
1232 	}
1233 
1234 	// Skip over any key-value pairs
1235 	int seekerr;
1236 	seekerr = fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1237 	if (seekerr)
1238 	{
1239 		printf("Failed to skip key-value pairs in %s\n", filename);
1240 		fclose(f);
1241 		return true;
1242 	}
1243 
1244 	// Read the length of the data and endianess convert
1245 	unsigned int data_len;
1246 	actual = fread(&data_len, 1, sizeof(data_len), f);
1247 	if (actual != sizeof(data_len))
1248 	{
1249 		printf("Failed to read mip 0 size from %s\n", filename);
1250 		fclose(f);
1251 		return true;
1252 	}
1253 
1254 	if (switch_endianness)
1255 	{
1256 		data_len = u32_byterev(data_len);
1257 	}
1258 
1259 	// Read the data
1260 	unsigned char* data = new unsigned char[data_len];
1261 	actual = fread(data, 1, data_len, f);
1262 	if (actual != data_len)
1263 	{
1264 		printf("Failed to read mip 0 data from %s\n", filename);
1265 		fclose(f);
1266 		delete[] data;
1267 		return true;
1268 	}
1269 
1270 	img.block_x = fmt->x;
1271 	img.block_y = fmt->y;
1272 	img.block_z = fmt->z == 0 ? 1 : fmt->z;
1273 
1274 	img.dim_x = hdr.pixel_width;
1275 	img.dim_y = hdr.pixel_height;
1276 	img.dim_z = hdr.pixel_depth == 0 ? 1 : hdr.pixel_depth;
1277 
1278 	img.data_len = data_len;
1279 	img.data = data;
1280 
1281 	is_srgb = fmt->is_srgb;
1282 
1283 	fclose(f);
1284 	return false;
1285 }
1286 
1287 /**
1288  * @brief Store a KTX compressed image using a local store routine.
1289  *
1290  * @param img        The image data to store.
1291  * @param filename   The name of the file to save.
1292  * @param is_srgb    @c true if this is an sRGB image, @c false if linear.
1293  *
1294  * @return @c true on error, @c false otherwise.
1295  */
store_ktx_compressed_image(const astc_compressed_image & img,const char * filename,bool is_srgb)1296 bool store_ktx_compressed_image(
1297 	const astc_compressed_image& img,
1298 	const char* filename,
1299 	bool is_srgb
1300 ) {
1301 	unsigned int fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb);
1302 
1303 	ktx_header hdr;
1304 	memcpy(hdr.magic, ktx_magic, 12);
1305 	hdr.endianness = 0x04030201;
1306 	hdr.gl_type = 0;
1307 	hdr.gl_type_size = 1;
1308 	hdr.gl_format = 0;
1309 	hdr.gl_internal_format = fmt;
1310 	hdr.gl_base_internal_format = GL_RGBA;
1311 	hdr.pixel_width = img.dim_x;
1312 	hdr.pixel_height = img.dim_y;
1313 	hdr.pixel_depth = (img.dim_z == 1) ? 0 : img.dim_z;
1314 	hdr.number_of_array_elements = 0;
1315 	hdr.number_of_faces = 1;
1316 	hdr.number_of_mipmap_levels = 1;
1317 	hdr.bytes_of_key_value_data = 0;
1318 
1319 	size_t expected = sizeof(ktx_header) + 4 + img.data_len;
1320 	size_t actual = 0;
1321 
1322 	FILE *wf = fopen(filename, "wb");
1323 	if (!wf)
1324 	{
1325 		return true;
1326 	}
1327 
1328 	actual += fwrite(&hdr, 1, sizeof(ktx_header), wf);
1329 	actual += fwrite(&img.data_len, 1, 4, wf);
1330 	actual += fwrite(img.data, 1, img.data_len, wf);
1331 	fclose(wf);
1332 
1333 	if (actual != expected)
1334 	{
1335 		return true;
1336 	}
1337 
1338 	return false;
1339 }
1340 
1341 /**
1342  * @brief Save a KTX uncompressed image using a local store routine.
1343  *
1344  * @param img        The source data for the image.
1345  * @param filename   The name of the file to save.
1346  * @param y_flip     Should the image be vertically flipped?
1347  *
1348  * @return @c true if the image saved OK, @c false on error.
1349  */
store_ktx_uncompressed_image(const astcenc_image * img,const char * filename,int y_flip)1350 static bool store_ktx_uncompressed_image(
1351 	const astcenc_image* img,
1352 	const char* filename,
1353 	int y_flip
1354 ) {
1355 	unsigned int dim_x = img->dim_x;
1356 	unsigned int dim_y = img->dim_y;
1357 	unsigned int dim_z = img->dim_z;
1358 
1359 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
1360 	int image_components = determine_image_components(img);
1361 
1362 	ktx_header hdr;
1363 
1364 	static const int gl_format_of_components[4] {
1365 		GL_RED, GL_RG, GL_RGB, GL_RGBA
1366 	};
1367 
1368 	static const int gl_sized_format_of_components_ldr[4] {
1369 		GL_R8, GL_RG8, GL_RGB8, GL_RGBA8
1370 	};
1371 
1372 	static const int gl_sized_format_of_components_hdr[4] {
1373 		GL_R16F, GL_RG16F, GL_RGB16F, GL_RGBA16F
1374 	};
1375 
1376 	memcpy(hdr.magic, ktx_magic, 12);
1377 	hdr.endianness = 0x04030201;
1378 	hdr.gl_type = (bitness == 16) ? GL_HALF_FLOAT : GL_UNSIGNED_BYTE;
1379 	hdr.gl_type_size = bitness / 8;
1380 	hdr.gl_format = gl_format_of_components[image_components - 1];
1381 	if (bitness == 16)
1382 	{
1383 		hdr.gl_internal_format = gl_sized_format_of_components_hdr[image_components - 1];
1384 	}
1385 	else
1386 	{
1387 		hdr.gl_internal_format = gl_sized_format_of_components_ldr[image_components - 1];
1388 	}
1389 	hdr.gl_base_internal_format = hdr.gl_format;
1390 	hdr.pixel_width = dim_x;
1391 	hdr.pixel_height = dim_y;
1392 	hdr.pixel_depth = (dim_z == 1) ? 0 : dim_z;
1393 	hdr.number_of_array_elements = 0;
1394 	hdr.number_of_faces = 1;
1395 	hdr.number_of_mipmap_levels = 1;
1396 	hdr.bytes_of_key_value_data = 0;
1397 
1398 	// Collect image data to write
1399 	uint8_t ***row_pointers8 = nullptr;
1400 	uint16_t ***row_pointers16 = nullptr;
1401 	if (bitness == 8)
1402 	{
1403 		row_pointers8 = new uint8_t **[dim_z];
1404 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
1405 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components + 3];
1406 
1407 		for (unsigned int z = 1; z < dim_z; z++)
1408 		{
1409 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
1410 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_x * image_components * z;
1411 		}
1412 
1413 		for (unsigned int z = 0; z < dim_z; z++)
1414 		{
1415 			for (unsigned int y = 1; y < dim_y; y++)
1416 			{
1417 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
1418 			}
1419 		}
1420 
1421 		for (unsigned int z = 0; z < dim_z; z++)
1422 		{
1423 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
1424 			for (unsigned int y = 0; y < dim_y; y++)
1425 			{
1426 				int ym = y_flip ? dim_y - y - 1 : y;
1427 				switch (image_components)
1428 				{
1429 				case 1:		// single-component, treated as Luminance
1430 					for (unsigned int x = 0; x < dim_x; x++)
1431 					{
1432 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
1433 					}
1434 					break;
1435 				case 2:		// two-component, treated as Luminance-Alpha
1436 					for (unsigned int x = 0; x < dim_x; x++)
1437 					{
1438 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1439 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1440 					}
1441 					break;
1442 				case 3:		// three-component, treated a
1443 					for (unsigned int x = 0; x < dim_x; x++)
1444 					{
1445 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1446 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1447 						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1448 					}
1449 					break;
1450 				case 4:		// four-component, treated as RGBA
1451 					for (unsigned int x = 0; x < dim_x; x++)
1452 					{
1453 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1454 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1455 						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1456 						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1457 					}
1458 					break;
1459 				}
1460 			}
1461 		}
1462 	}
1463 	else						// if bitness == 16
1464 	{
1465 		row_pointers16 = new uint16_t **[dim_z];
1466 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
1467 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components + 1];
1468 
1469 		for (unsigned int z = 1; z < dim_z; z++)
1470 		{
1471 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
1472 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
1473 		}
1474 
1475 		for (unsigned int z = 0; z < dim_z; z++)
1476 		{
1477 			for (unsigned int y = 1; y < dim_y; y++)
1478 			{
1479 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
1480 			}
1481 		}
1482 
1483 		for (unsigned int z = 0; z < dim_z; z++)
1484 		{
1485 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
1486 			for (unsigned int y = 0; y < dim_y; y++)
1487 			{
1488 				int ym = y_flip ? dim_y - y - 1 : y;
1489 				switch (image_components)
1490 				{
1491 				case 1:		// single-component, treated as Luminance
1492 					for (unsigned int x = 0; x < dim_x; x++)
1493 					{
1494 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
1495 					}
1496 					break;
1497 				case 2:		// two-component, treated as Luminance-Alpha
1498 					for (unsigned int x = 0; x < dim_x; x++)
1499 					{
1500 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1501 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1502 					}
1503 					break;
1504 				case 3:		// three-component, treated as RGB
1505 					for (unsigned int x = 0; x < dim_x; x++)
1506 					{
1507 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1508 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1509 						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1510 					}
1511 					break;
1512 				case 4:		// four-component, treated as RGBA
1513 					for (unsigned int x = 0; x < dim_x; x++)
1514 					{
1515 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1516 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1517 						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1518 						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1519 					}
1520 					break;
1521 				}
1522 			}
1523 		}
1524 	}
1525 
1526 	bool retval { true };
1527 	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
1528 	uint32_t image_write_bytes = (image_bytes + 3) & ~3;
1529 
1530 	FILE *wf = fopen(filename, "wb");
1531 	if (wf)
1532 	{
1533 		void* dataptr = (bitness == 16) ?
1534 			reinterpret_cast<void*>(row_pointers16[0][0]) :
1535 			reinterpret_cast<void*>(row_pointers8[0][0]);
1536 
1537 		size_t expected_bytes_written = sizeof(ktx_header) + image_write_bytes + 4;
1538 		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(ktx_header), wf);
1539 		size_t bytecount_bytes_written = fwrite(&image_bytes, 1, 4, wf);
1540 		size_t data_bytes_written = fwrite(dataptr, 1, image_write_bytes, wf);
1541 		fclose(wf);
1542 		if (hdr_bytes_written + bytecount_bytes_written + data_bytes_written != expected_bytes_written)
1543 		{
1544 			retval = false;
1545 		}
1546 	}
1547 	else
1548 	{
1549 		retval = false;
1550 	}
1551 
1552 	if (row_pointers8)
1553 	{
1554 		delete[] row_pointers8[0][0];
1555 		delete[] row_pointers8[0];
1556 		delete[] row_pointers8;
1557 	}
1558 
1559 	if (row_pointers16)
1560 	{
1561 		delete[] row_pointers16[0][0];
1562 		delete[] row_pointers16[0];
1563 		delete[] row_pointers16;
1564 	}
1565 
1566 	return retval;
1567 }
1568 
1569 /*
1570 	Loader for DDS files.
1571 
1572 	Note that after the header, data are densely packed with no padding;
1573 	in the case of multiple surfaces, they appear one after another in
1574 	the file, again with no padding.
1575 
1576 	This code is NOT endian-neutral.
1577 */
1578 struct dds_pixelformat
1579 {
1580 	uint32_t size;				// structure size, set to 32.
1581 	/*
1582 	   flags bits are a combination of the following: 0x1 : Texture contains alpha data 0x2 : ---- (older files: texture contains alpha data, for Alpha-only texture) 0x4 : The fourcc field is valid,
1583 	   indicating a compressed or DX10 texture format 0x40 : texture contains uncompressed RGB data 0x200 : ---- (YUV in older files) 0x20000 : Texture contains Luminance data (can be combined with
1584 	   0x1 for Lum-Alpha) */
1585 	uint32_t flags;
1586 	uint32_t fourcc;			// "DX10" to indicate a DX10 format, "DXTn" for the DXT formats
1587 	uint32_t rgbbitcount;		// number of bits per texel; up to 32 for non-DX10 formats.
1588 	uint32_t rbitmask;			// bitmap indicating position of red/luminance color component
1589 	uint32_t gbitmask;			// bitmap indicating position of green color component
1590 	uint32_t bbitmask;			// bitmap indicating position of blue color component
1591 	uint32_t abitmask;			// bitmap indicating position of alpha color component
1592 };
1593 
1594 struct dds_header
1595 {
1596 	uint32_t size;				// header size; must be exactly 124.
1597 	/*
1598 	   flag field is an OR or the following bits, that indicate fields containing valid data:
1599 		1: caps/caps2/caps3/caps4 (set in all DDS files, ignore on read)
1600 		2: height (set in all DDS files, ignore on read)
1601 		4: width (set in all DDS files, ignore on read)
1602 		8: pitch (for uncompressed texture)
1603 		0x1000: the pixel format field (set in all DDS files, ignore on read)
1604 		0x20000: mipmap count (for mipmapped textures with >1 level)
1605 		0x80000: pitch (for compressed texture)
1606 		0x800000: depth (for 3d textures)
1607 	*/
1608 	uint32_t flags;
1609 	uint32_t height;
1610 	uint32_t width;
1611 	uint32_t pitch_or_linear_size;	// scanline pitch for uncompressed; total size in bytes for compressed
1612 	uint32_t depth;
1613 	uint32_t mipmapcount;
1614 	// unused, set to 0
1615 	uint32_t reserved1[11];
1616 	dds_pixelformat ddspf;
1617 	/*
1618 	   caps field is an OR of the following values:
1619 		8 : should be set for a file that contains more than 1 surface (ignore on read)
1620 		0x400000 : should be set for a mipmapped texture
1621 		0x1000 : should be set if the surface is a texture at all (all DDS files, ignore on read)
1622 	*/
1623 	uint32_t caps;
1624 	/*
1625 	   caps2 field is an OR of the following values:
1626 		0x200 : texture is cubemap
1627 		0x400 : +X face of cubemap is present
1628 		0x800 : -X face of cubemap is present
1629 		0x1000 : +Y face of cubemap is present
1630 		0x2000 : -Y face of cubemap is present
1631 		0x4000 : +Z face of cubemap is present
1632 		0x8000 : -Z face of cubemap is present
1633 		0x200000 : texture is a 3d texture.
1634 	*/
1635 	uint32_t caps2;
1636 	// unused, set to 0
1637 	uint32_t caps3;
1638 	// unused, set to 0
1639 	uint32_t caps4;
1640 	// unused, set to 0
1641 	uint32_t reserved2;
1642 };
1643 
1644 struct dds_header_dx10
1645 {
1646 	uint32_t dxgi_format;
1647 	uint32_t resource_dimension;	// 2=1d-texture, 3=2d-texture or cubemap, 4=3d-texture
1648 	uint32_t misc_flag;			// 4 if cubemap, else 0
1649 	uint32_t array_size;		// size of array in case of a texture array; set to 1 for a non-array
1650 	uint32_t reserved;			// set to 0.
1651 };
1652 
1653 #define DDS_MAGIC 0x20534444
1654 #define DX10_MAGIC 0x30315844
1655 
1656 /**
1657  * @brief Load an uncompressed DDS image using the local custom loader.
1658  *
1659  * @param      filename          The name of the file to load.
1660  * @param      y_flip            Should the image be vertically flipped?
1661  * @param[out] is_hdr            Is this an HDR image load?
1662  * @param[out] component_count   The number of components in the data.
1663  *
1664  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
1665  */
load_dds_uncompressed_image(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)1666 static astcenc_image* load_dds_uncompressed_image(
1667 	const char* filename,
1668 	bool y_flip,
1669 	bool& is_hdr,
1670 	unsigned int& component_count
1671 ) {
1672 	FILE *f = fopen(filename, "rb");
1673 	if (!f)
1674 	{
1675 		printf("Failed to open file %s\n", filename);
1676 		return nullptr;
1677 	}
1678 
1679 	uint8_t magic[4];
1680 
1681 	dds_header hdr;
1682 	size_t magic_bytes_read = fread(magic, 1, 4, f);
1683 	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
1684 	if (magic_bytes_read != 4 || header_bytes_read != sizeof(hdr))
1685 	{
1686 		printf("Failed to read header of DDS file %s\n", filename);
1687 		fclose(f);
1688 		return nullptr;
1689 	}
1690 
1691 	uint32_t magicx = magic[0] | (magic[1] << 8) | (magic[2] << 16) | (magic[3] << 24);
1692 
1693 	if (magicx != DDS_MAGIC || hdr.size != 124)
1694 	{
1695 		printf("File %s does not have a valid DDS header\n", filename);
1696 		fclose(f);
1697 		return nullptr;
1698 	}
1699 
1700 	int use_dx10_header = 0;
1701 	if (hdr.ddspf.flags & 4)
1702 	{
1703 		if (hdr.ddspf.fourcc == DX10_MAGIC)
1704 		{
1705 			use_dx10_header = 1;
1706 		}
1707 		else
1708 		{
1709 			printf("DDS file %s is compressed, not supported\n", filename);
1710 			fclose(f);
1711 			return nullptr;
1712 		}
1713 	}
1714 
1715 	dds_header_dx10 dx10_header;
1716 	if (use_dx10_header)
1717 	{
1718 		size_t dx10_header_bytes_read = fread(&dx10_header, 1, sizeof(dx10_header), f);
1719 		if (dx10_header_bytes_read != sizeof(dx10_header))
1720 		{
1721 			printf("Failed to read header of DDS file %s\n", filename);
1722 			fclose(f);
1723 			return nullptr;
1724 		}
1725 	}
1726 
1727 	unsigned int dim_x = hdr.width;
1728 	unsigned int dim_y = hdr.height;
1729 	unsigned int dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1;
1730 
1731 	// The bitcount that we will use internally in the codec
1732 	int bitness = 0;
1733 
1734 	// The bytes per component in the DDS file itself
1735 	int bytes_per_component = 0;
1736 	int components = 0;
1737 	scanline_transfer copy_method = R8_TO_RGBA8;
1738 
1739 	// figure out the format actually used in the DDS file.
1740 	if (use_dx10_header)
1741 	{
1742 		// DX10 header present; use the DXGI format.
1743 		#define DXGI_FORMAT_R32G32B32A32_FLOAT   2
1744 		#define DXGI_FORMAT_R32G32B32_FLOAT      6
1745 		#define DXGI_FORMAT_R16G16B16A16_FLOAT  10
1746 		#define DXGI_FORMAT_R16G16B16A16_UNORM  11
1747 		#define DXGI_FORMAT_R32G32_FLOAT        16
1748 		#define DXGI_FORMAT_R8G8B8A8_UNORM      28
1749 		#define DXGI_FORMAT_R16G16_FLOAT    34
1750 		#define DXGI_FORMAT_R16G16_UNORM    35
1751 		#define DXGI_FORMAT_R32_FLOAT       41
1752 		#define DXGI_FORMAT_R8G8_UNORM      49
1753 		#define DXGI_FORMAT_R16_FLOAT       54
1754 		#define DXGI_FORMAT_R16_UNORM       56
1755 		#define DXGI_FORMAT_R8_UNORM        61
1756 		#define DXGI_FORMAT_B8G8R8A8_UNORM  86
1757 		#define DXGI_FORMAT_B8G8R8X8_UNORM  87
1758 
1759 		struct dxgi_params
1760 		{
1761 			int bitness;
1762 			int bytes_per_component;
1763 			int components;
1764 			scanline_transfer copy_method;
1765 			uint32_t dxgi_format_number;
1766 		};
1767 
1768 		static const dxgi_params format_params[] {
1769 			{16, 4, 4, RGBA32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32A32_FLOAT},
1770 			{16, 4, 3, RGB32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32_FLOAT},
1771 			{16, 2, 4, RGBA16F_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_FLOAT},
1772 			{16, 2, 4, RGBA16_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_UNORM},
1773 			{16, 4, 2, RG32F_TO_RGBA16F, DXGI_FORMAT_R32G32_FLOAT},
1774 			{8, 1, 4, RGBA8_TO_RGBA8, DXGI_FORMAT_R8G8B8A8_UNORM},
1775 			{16, 2, 2, RG16F_TO_RGBA16F, DXGI_FORMAT_R16G16_FLOAT},
1776 			{16, 2, 2, RG16_TO_RGBA16F, DXGI_FORMAT_R16G16_UNORM},
1777 			{16, 4, 1, R32F_TO_RGBA16F, DXGI_FORMAT_R32_FLOAT},
1778 			{8, 1, 2, RG8_TO_RGBA8, DXGI_FORMAT_R8G8_UNORM},
1779 			{16, 2, 1, R16F_TO_RGBA16F, DXGI_FORMAT_R16_FLOAT},
1780 			{16, 2, 1, R16_TO_RGBA16F, DXGI_FORMAT_R16_UNORM},
1781 			{8, 1, 1, R8_TO_RGBA8, DXGI_FORMAT_R8_UNORM},
1782 			{8, 1, 4, BGRA8_TO_RGBA8, DXGI_FORMAT_B8G8R8A8_UNORM},
1783 			{8, 1, 4, BGRX8_TO_RGBA8, DXGI_FORMAT_B8G8R8X8_UNORM},
1784 		};
1785 
1786 		int dxgi_modes_supported = sizeof(format_params) / sizeof(format_params[0]);
1787 		int did_select_format = 0;
1788 		for (int i = 0; i < dxgi_modes_supported; i++)
1789 		{
1790 			if (dx10_header.dxgi_format == format_params[i].dxgi_format_number)
1791 			{
1792 				bitness = format_params[i].bitness;
1793 				bytes_per_component = format_params[i].bytes_per_component;
1794 				components = format_params[i].components;
1795 				copy_method = format_params[i].copy_method;
1796 				did_select_format = 1;
1797 				break;
1798 			}
1799 		}
1800 
1801 		if (!did_select_format)
1802 		{
1803 			printf("DDS file %s: DXGI format not supported by codec\n", filename);
1804 			fclose(f);
1805 			return nullptr;
1806 		}
1807 	}
1808 	else
1809 	{
1810 		// No DX10 header present. Then try to match the bitcount and bitmask against
1811 		// a set of prepared patterns.
1812 		uint32_t flags = hdr.ddspf.flags;
1813 		uint32_t bitcount = hdr.ddspf.rgbbitcount;
1814 		uint32_t rmask = hdr.ddspf.rbitmask;
1815 		uint32_t gmask = hdr.ddspf.gbitmask;
1816 		uint32_t bmask = hdr.ddspf.bbitmask;
1817 		uint32_t amask = hdr.ddspf.abitmask;
1818 
1819 		// RGBA-unorm8
1820 		if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000 && amask == 0xFF000000)
1821 		{
1822 			bytes_per_component = 1;
1823 			components = 4;
1824 			copy_method = RGBA8_TO_RGBA8;
1825 		}
1826 		// BGRA-unorm8
1827 		else if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
1828 		{
1829 			bytes_per_component = 1;
1830 			components = 4;
1831 			copy_method = BGRA8_TO_RGBA8;
1832 		}
1833 		// RGBX-unorm8
1834 		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1835 		{
1836 			bytes_per_component = 1;
1837 			components = 4;
1838 			copy_method = RGBX8_TO_RGBA8;
1839 		}
1840 		// BGRX-unorm8
1841 		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1842 		{
1843 			bytes_per_component = 1;
1844 			components = 4;
1845 			copy_method = BGRX8_TO_RGBA8;
1846 		}
1847 		// RGB-unorm8
1848 		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1849 		{
1850 			bytes_per_component = 1;
1851 			components = 3;
1852 			copy_method = RGB8_TO_RGBA8;
1853 		}
1854 		// BGR-unorm8
1855 		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1856 		{
1857 			bytes_per_component = 1;
1858 			components = 3;
1859 			copy_method = BGR8_TO_RGBA8;
1860 		}
1861 		// RG-unorm16
1862 		else if ((flags & 0x40) && bitcount == 16 && rmask == 0xFFFF && gmask == 0xFFFF0000)
1863 		{
1864 			bytes_per_component = 2;
1865 			components = 2;
1866 			copy_method = RG16_TO_RGBA16F;
1867 		}
1868 		// A8L8
1869 		else if ((flags & 0x20001) == 0x20001 && bitcount == 16 && rmask == 0xFF && amask == 0xFF00)
1870 		{
1871 			bytes_per_component = 1;
1872 			components = 2;
1873 			copy_method = LA8_TO_RGBA8;
1874 		}
1875 		// L8
1876 		else if ((flags & 0x20000) && bitcount == 8 && rmask == 0xFF)
1877 		{
1878 			bytes_per_component = 1;
1879 			components = 1;
1880 			copy_method = L8_TO_RGBA8;
1881 		}
1882 		// L16
1883 		else if ((flags & 0x20000) && bitcount == 16 && rmask == 0xFFFF)
1884 		{
1885 			bytes_per_component = 2;
1886 			components = 1;
1887 			copy_method = L16_TO_RGBA16F;
1888 		}
1889 		else
1890 		{
1891 			printf("DDS file %s: Non-DXGI format not supported by codec\n", filename);
1892 			fclose(f);
1893 			return nullptr;
1894 		}
1895 
1896 		bitness = bytes_per_component * 8;
1897 	}
1898 
1899 	// then, load the actual file.
1900 	uint32_t xstride = bytes_per_component * components * dim_x;
1901 	uint32_t ystride = xstride * dim_y;
1902 	uint32_t bytes_of_surface = ystride * dim_z;
1903 
1904 	uint8_t *buf = new uint8_t[bytes_of_surface];
1905 	size_t bytes_read = fread(buf, 1, bytes_of_surface, f);
1906 	fclose(f);
1907 	if (bytes_read != bytes_of_surface)
1908 	{
1909 		delete[] buf;
1910 		printf("Failed to read file %s\n", filename);
1911 		return nullptr;
1912 	}
1913 
1914 	// then transfer data from the surface to our own image-data-structure.
1915 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
1916 
1917 	for (unsigned int z = 0; z < dim_z; z++)
1918 	{
1919 		for (unsigned int y = 0; y < dim_y; y++)
1920 		{
1921 			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
1922 			unsigned int ydst = ymod;
1923 			void* dst;
1924 
1925 			if (astc_img->data_type == ASTCENC_TYPE_U8)
1926 			{
1927 				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
1928 				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
1929 			}
1930 			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
1931 			{
1932 				assert(astc_img->data_type == ASTCENC_TYPE_F16);
1933 				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
1934 				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
1935 			}
1936 
1937 			uint8_t *src = buf + (z * ystride) + (y * xstride);
1938 			copy_scanline(dst, src, dim_x, copy_method);
1939 		}
1940 	}
1941 
1942 	delete[] buf;
1943 	is_hdr = bitness >= 16;
1944 	component_count = components;
1945 	return astc_img;
1946 }
1947 
1948 /**
1949  * @brief Save a DDS uncompressed image using a local store routine.
1950  *
1951  * @param img        The source data for the image.
1952  * @param filename   The name of the file to save.
1953  * @param y_flip     Should the image be vertically flipped?
1954  *
1955  * @return @c true if the image saved OK, @c false on error.
1956  */
store_dds_uncompressed_image(const astcenc_image * img,const char * filename,int y_flip)1957 static bool store_dds_uncompressed_image(
1958 	const astcenc_image* img,
1959 	const char* filename,
1960 	int y_flip
1961 ) {
1962 	unsigned int dim_x = img->dim_x;
1963 	unsigned int dim_y = img->dim_y;
1964 	unsigned int dim_z = img->dim_z;
1965 
1966 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
1967 	int image_components = (bitness == 16) ? 4 : determine_image_components(img);
1968 
1969 	// DDS-pixel-format structures to use when storing LDR image with 1,2,3 or 4 components.
1970 	static const dds_pixelformat format_of_image_components[4] =
1971 	{
1972 		{32, 0x20000, 0, 8, 0xFF, 0, 0, 0},	// luminance
1973 		{32, 0x20001, 0, 16, 0xFF, 0, 0, 0xFF00},	// L8A8
1974 		{32, 0x40, 0, 24, 0xFF, 0xFF00, 0xFF0000, 0},	// RGB8
1975 		{32, 0x41, 0, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000}	// RGBA8
1976 	};
1977 
1978 	// DDS-pixel-format structures to use when storing HDR image.
1979 	static const dds_pixelformat dxt10_diverter =
1980 	{
1981 		32, 4, DX10_MAGIC, 0, 0, 0, 0, 0
1982 	};
1983 
1984 	// Header handling; will write:
1985 	// * DDS magic value
1986 	// * DDS header
1987 	// * DDS DX10 header, if the file is floating-point
1988 	// * pixel data
1989 
1990 	// Main header data
1991 	dds_header hdr;
1992 	hdr.size = 124;
1993 	hdr.flags = 0x100F | (dim_z > 1 ? 0x800000 : 0);
1994 	hdr.height = dim_y;
1995 	hdr.width = dim_x;
1996 	hdr.pitch_or_linear_size = image_components * (bitness / 8) * dim_x;
1997 	hdr.depth = dim_z;
1998 	hdr.mipmapcount = 1;
1999 	for (unsigned int i = 0; i < 11; i++)
2000 	{
2001 		hdr.reserved1[i] = 0;
2002 	}
2003 	hdr.caps = 0x1000;
2004 	hdr.caps2 = (dim_z > 1) ? 0x200000 : 0;
2005 	hdr.caps3 = 0;
2006 	hdr.caps4 = 0;
2007 
2008 	// Pixel-format data
2009 	if (bitness == 8)
2010 	{
2011 		hdr.ddspf = format_of_image_components[image_components - 1];
2012 	}
2013 	else
2014 	{
2015 		hdr.ddspf = dxt10_diverter;
2016 	}
2017 
2018 	// DX10 data
2019 	dds_header_dx10 dx10;
2020 	dx10.dxgi_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
2021 	dx10.resource_dimension = (dim_z > 1) ? 4 : 3;
2022 	dx10.misc_flag = 0;
2023 	dx10.array_size = 1;
2024 	dx10.reserved = 0;
2025 
2026 	// Collect image data to write
2027 	uint8_t ***row_pointers8 = nullptr;
2028 	uint16_t ***row_pointers16 = nullptr;
2029 
2030 	if (bitness == 8)
2031 	{
2032 		row_pointers8 = new uint8_t **[dim_z];
2033 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
2034 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components];
2035 
2036 		for (unsigned int z = 1; z < dim_z; z++)
2037 		{
2038 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
2039 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_z * image_components * z;
2040 		}
2041 
2042 		for (unsigned int z = 0; z < dim_z; z++)
2043 		{
2044 			for (unsigned int y = 1; y < dim_y; y++)
2045 			{
2046 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
2047 			}
2048 		}
2049 
2050 		for (unsigned int z = 0; z < dim_z; z++)
2051 		{
2052 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
2053 
2054 			for (unsigned int y = 0; y < dim_y; y++)
2055 			{
2056 				int ym = y_flip ? dim_y - y - 1 : y;
2057 				switch (image_components)
2058 				{
2059 				case 1:		// single-component, treated as Luminance
2060 					for (unsigned int x = 0; x < dim_x; x++)
2061 					{
2062 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
2063 					}
2064 					break;
2065 				case 2:		// two-component, treated as Luminance-Alpha
2066 					for (unsigned int x = 0; x < dim_x; x++)
2067 					{
2068 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2069 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2070 					}
2071 					break;
2072 				case 3:		// three-component, treated as RGB
2073 					for (unsigned int x = 0; x < dim_x; x++)
2074 					{
2075 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2076 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2077 						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2078 					}
2079 					break;
2080 				case 4:		// four-component, treated as RGBA
2081 					for (unsigned int x = 0; x < dim_x; x++)
2082 					{
2083 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2084 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2085 						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2086 						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2087 					}
2088 					break;
2089 				}
2090 			}
2091 		}
2092 	}
2093 	else						// if bitness == 16
2094 	{
2095 		row_pointers16 = new uint16_t **[dim_z];
2096 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
2097 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components];
2098 
2099 		for (unsigned int z = 1; z < dim_z; z++)
2100 		{
2101 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
2102 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
2103 		}
2104 
2105 		for (unsigned int z = 0; z < dim_z; z++)
2106 		{
2107 			for (unsigned int y = 1; y < dim_y; y++)
2108 			{
2109 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
2110 			}
2111 		}
2112 
2113 		for (unsigned int z = 0; z < dim_z; z++)
2114 		{
2115 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
2116 
2117 			for (unsigned int y = 0; y < dim_y; y++)
2118 			{
2119 				int ym = y_flip ? dim_y - y - 1: y;
2120 				switch (image_components)
2121 				{
2122 				case 1:		// single-component, treated as Luminance
2123 					for (unsigned int x = 0; x < dim_x; x++)
2124 					{
2125 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
2126 					}
2127 					break;
2128 				case 2:		// two-component, treated as Luminance-Alpha
2129 					for (unsigned int x = 0; x < dim_x; x++)
2130 					{
2131 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2132 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2133 					}
2134 					break;
2135 				case 3:		// three-component, treated as RGB
2136 					for (unsigned int x = 0; x < dim_x; x++)
2137 					{
2138 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2139 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2140 						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2141 					}
2142 					break;
2143 				case 4:		// four-component, treated as RGBA
2144 					for (unsigned int x = 0; x < dim_x; x++)
2145 					{
2146 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2147 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2148 						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2149 						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2150 					}
2151 					break;
2152 				}
2153 			}
2154 		}
2155 	}
2156 
2157 	bool retval { true };
2158 	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
2159 
2160 	uint32_t dds_magic = DDS_MAGIC;
2161 
2162 	FILE *wf = fopen(filename, "wb");
2163 	if (wf)
2164 	{
2165 		void *dataptr = (bitness == 16) ?
2166 			reinterpret_cast<void*>(row_pointers16[0][0]) :
2167 			reinterpret_cast<void*>(row_pointers8[0][0]);
2168 
2169 		size_t expected_bytes_written = 4 + sizeof(dds_header) + (bitness > 8 ? sizeof(dds_header_dx10) : 0) + image_bytes;
2170 
2171 		size_t magic_bytes_written = fwrite(&dds_magic, 1, 4, wf);
2172 		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(dds_header), wf);
2173 
2174 		size_t dx10_bytes_written;
2175 		if (bitness > 8)
2176 		{
2177 			dx10_bytes_written = fwrite(&dx10, 1, sizeof(dx10), wf);
2178 		}
2179 		else
2180 		{
2181 			dx10_bytes_written = 0;
2182 		}
2183 
2184 		size_t data_bytes_written = fwrite(dataptr, 1, image_bytes, wf);
2185 
2186 		fclose(wf);
2187 		if (magic_bytes_written + hdr_bytes_written + dx10_bytes_written + data_bytes_written != expected_bytes_written)
2188 		{
2189 			retval = false;
2190 		}
2191 	}
2192 	else
2193 	{
2194 		retval = false;
2195 	}
2196 
2197 	if (row_pointers8)
2198 	{
2199 		delete[] row_pointers8[0][0];
2200 		delete[] row_pointers8[0];
2201 		delete[] row_pointers8;
2202 	}
2203 
2204 	if (row_pointers16)
2205 	{
2206 		delete[] row_pointers16[0][0];
2207 		delete[] row_pointers16[0];
2208 		delete[] row_pointers16;
2209 	}
2210 
2211 	return retval;
2212 }
2213 
2214 /**
2215  * @brief Supported uncompressed image load functions, and their associated file extensions.
2216  */
2217 static const struct
2218 {
2219 	const char* ending1;
2220 	const char* ending2;
2221 	astcenc_image* (*loader_func)(const char*, bool, bool&, unsigned int&);
2222 } loader_descs[] {
2223 	// LDR formats
2224 	{".png",   ".PNG",  load_png_with_wuffs},
2225 	// HDR formats
2226 	{".exr",   ".EXR",  load_image_with_tinyexr },
2227 	// Container formats
2228 	{".ktx",   ".KTX",  load_ktx_uncompressed_image },
2229 	{".dds",   ".DDS",  load_dds_uncompressed_image },
2230 	// Generic catch all; this one must be last in the list
2231 	{ nullptr, nullptr, load_image_with_stb }
2232 };
2233 
2234 static const int loader_descr_count = sizeof(loader_descs) / sizeof(loader_descs[0]);
2235 
2236 /**
2237  * @brief Supported uncompressed image store functions, and their associated file extensions.
2238  */
2239 static const struct
2240 {
2241 	const char *ending1;
2242 	const char *ending2;
2243 	int enforced_bitness;
2244 	bool (*storer_func)(const astcenc_image *output_image, const char *filename, int y_flip);
2245 } storer_descs[] {
2246 	// LDR formats
2247 	{".bmp", ".BMP",  8, store_bmp_image_with_stb},
2248 	{".png", ".PNG",  8, store_png_image_with_stb},
2249 	{".tga", ".TGA",  8, store_tga_image_with_stb},
2250 	// HDR formats
2251 	{".exr", ".EXR", 16, store_exr_image_with_tinyexr},
2252 	{".hdr", ".HDR", 16, store_hdr_image_with_stb},
2253 	// Container formats
2254 	{".dds", ".DDS",  0, store_dds_uncompressed_image},
2255 	{".ktx", ".KTX",  0, store_ktx_uncompressed_image}
2256 };
2257 
2258 static const int storer_descr_count = sizeof(storer_descs) / sizeof(storer_descs[0]);
2259 
2260 /* See header for documentation. */
get_output_filename_enforced_bitness(const char * filename)2261 int get_output_filename_enforced_bitness(
2262 	const char* filename
2263 ) {
2264 	const char *eptr = strrchr(filename, '.');
2265 	if (!eptr)
2266 	{
2267 		return 0;
2268 	}
2269 
2270 	for (int i = 0; i < storer_descr_count; i++)
2271 	{
2272 		if (strcmp(eptr, storer_descs[i].ending1) == 0
2273 		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2274 		{
2275 			return storer_descs[i].enforced_bitness;
2276 		}
2277 	}
2278 
2279 	return -1;
2280 }
2281 
2282 /* See header for documentation. */
load_ncimage(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)2283 astcenc_image* load_ncimage(
2284 	const char* filename,
2285 	bool y_flip,
2286 	bool& is_hdr,
2287 	unsigned int& component_count
2288 ) {
2289 	// Get the file extension
2290 	const char* eptr = strrchr(filename, '.');
2291 	if (!eptr)
2292 	{
2293 		eptr = filename;
2294 	}
2295 
2296 	// Scan through descriptors until a matching loader is found
2297 	for (unsigned int i = 0; i < loader_descr_count; i++)
2298 	{
2299 		if (loader_descs[i].ending1 == nullptr
2300 			|| strcmp(eptr, loader_descs[i].ending1) == 0
2301 			|| strcmp(eptr, loader_descs[i].ending2) == 0)
2302 		{
2303 			return loader_descs[i].loader_func(filename, y_flip, is_hdr, component_count);
2304 		}
2305 	}
2306 
2307 	// Should never reach here - stb_image provides a generic handler
2308 	return nullptr;
2309 }
2310 
2311 /* See header for documentation. */
store_ncimage(const astcenc_image * output_image,const char * filename,int y_flip)2312 bool store_ncimage(
2313 	const astcenc_image* output_image,
2314 	const char* filename,
2315 	int y_flip
2316 ) {
2317 	const char* eptr = strrchr(filename, '.');
2318 	if (!eptr)
2319 	{
2320 		eptr = ".ktx"; // use KTX file format if we don't have an ending.
2321 	}
2322 
2323 	for (int i = 0; i < storer_descr_count; i++)
2324 	{
2325 		if (strcmp(eptr, storer_descs[i].ending1) == 0
2326 		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2327 		{
2328 			return storer_descs[i].storer_func(output_image, filename, y_flip);
2329 		}
2330 	}
2331 
2332 	// Should never reach here - get_output_filename_enforced_bitness should
2333 	// have acted as a preflight check
2334 	return false;
2335 }
2336 
2337 /* ============================================================================
2338 	ASTC compressed file loading
2339 ============================================================================ */
2340 struct astc_header
2341 {
2342 	uint8_t magic[4];
2343 	uint8_t block_x;
2344 	uint8_t block_y;
2345 	uint8_t block_z;
2346 	uint8_t dim_x[3];			// dims = dim[0] + (dim[1] << 8) + (dim[2] << 16)
2347 	uint8_t dim_y[3];			// Sizes are given in texels;
2348 	uint8_t dim_z[3];			// block count is inferred
2349 };
2350 
2351 static const uint32_t ASTC_MAGIC_ID = 0x5CA1AB13;
2352 
unpack_bytes(uint8_t a,uint8_t b,uint8_t c,uint8_t d)2353 static unsigned int unpack_bytes(
2354 	uint8_t a,
2355 	uint8_t b,
2356 	uint8_t c,
2357 	uint8_t d
2358 ) {
2359 	return (static_cast<unsigned int>(a)      ) +
2360 	       (static_cast<unsigned int>(b) <<  8) +
2361 	       (static_cast<unsigned int>(c) << 16) +
2362 	       (static_cast<unsigned int>(d) << 24);
2363 }
2364 
2365 /* See header for documentation. */
load_cimage(const char * filename,astc_compressed_image & img)2366 int load_cimage(
2367 	const char* filename,
2368 	astc_compressed_image& img
2369 ) {
2370 	std::ifstream file(filename, std::ios::in | std::ios::binary);
2371 	if (!file)
2372 	{
2373 		printf("ERROR: File open failed '%s'\n", filename);
2374 		return 1;
2375 	}
2376 
2377 	astc_header hdr;
2378 	file.read(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2379 	if (!file)
2380 	{
2381 		printf("ERROR: File read failed '%s'\n", filename);
2382 		return 1;
2383 	}
2384 
2385 	unsigned int magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]);
2386 	if (magicval != ASTC_MAGIC_ID)
2387 	{
2388 		printf("ERROR: File not recognized '%s'\n", filename);
2389 		return 1;
2390 	}
2391 
2392 	// Ensure these are not zero to avoid div by zero
2393 	unsigned int block_x = astc::max(static_cast<unsigned int>(hdr.block_x), 1u);
2394 	unsigned int block_y = astc::max(static_cast<unsigned int>(hdr.block_y), 1u);
2395 	unsigned int block_z = astc::max(static_cast<unsigned int>(hdr.block_z), 1u);
2396 
2397 	unsigned int dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0);
2398 	unsigned int dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0);
2399 	unsigned int dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0);
2400 
2401 	if (dim_x == 0 || dim_y == 0 || dim_z == 0)
2402 	{
2403 		printf("ERROR: File corrupt '%s'\n", filename);
2404 		return 1;
2405 	}
2406 
2407 	unsigned int xblocks = (dim_x + block_x - 1) / block_x;
2408 	unsigned int yblocks = (dim_y + block_y - 1) / block_y;
2409 	unsigned int zblocks = (dim_z + block_z - 1) / block_z;
2410 
2411 	size_t data_size = xblocks * yblocks * zblocks * 16;
2412 	uint8_t *buffer = new uint8_t[data_size];
2413 
2414 	file.read(reinterpret_cast<char*>(buffer), data_size);
2415 	if (!file)
2416 	{
2417 		printf("ERROR: File read failed '%s'\n", filename);
2418 		return 1;
2419 	}
2420 
2421 	img.data = buffer;
2422 	img.data_len = data_size;
2423 	img.block_x = block_x;
2424 	img.block_y = block_y;
2425 	img.block_z = block_z;
2426 	img.dim_x = dim_x;
2427 	img.dim_y = dim_y;
2428 	img.dim_z = dim_z;
2429 	return 0;
2430 }
2431 
2432 /* See header for documentation. */
store_cimage(const astc_compressed_image & img,const char * filename)2433 int store_cimage(
2434 	const astc_compressed_image& img,
2435 	const char* filename
2436 ) {
2437 	astc_header hdr;
2438 	hdr.magic[0] =  ASTC_MAGIC_ID        & 0xFF;
2439 	hdr.magic[1] = (ASTC_MAGIC_ID >>  8) & 0xFF;
2440 	hdr.magic[2] = (ASTC_MAGIC_ID >> 16) & 0xFF;
2441 	hdr.magic[3] = (ASTC_MAGIC_ID >> 24) & 0xFF;
2442 
2443 	hdr.block_x = static_cast<uint8_t>(img.block_x);
2444 	hdr.block_y = static_cast<uint8_t>(img.block_y);
2445 	hdr.block_z = static_cast<uint8_t>(img.block_z);
2446 
2447 	hdr.dim_x[0] =  img.dim_x        & 0xFF;
2448 	hdr.dim_x[1] = (img.dim_x >>  8) & 0xFF;
2449 	hdr.dim_x[2] = (img.dim_x >> 16) & 0xFF;
2450 
2451 	hdr.dim_y[0] =  img.dim_y       & 0xFF;
2452 	hdr.dim_y[1] = (img.dim_y >>  8) & 0xFF;
2453 	hdr.dim_y[2] = (img.dim_y >> 16) & 0xFF;
2454 
2455 	hdr.dim_z[0] =  img.dim_z        & 0xFF;
2456 	hdr.dim_z[1] = (img.dim_z >>  8) & 0xFF;
2457 	hdr.dim_z[2] = (img.dim_z >> 16) & 0xFF;
2458 
2459 	std::ofstream file(filename, std::ios::out | std::ios::binary);
2460 	if (!file)
2461 	{
2462 		printf("ERROR: File open failed '%s'\n", filename);
2463 		return 1;
2464 	}
2465 
2466 	file.write(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2467 	file.write(reinterpret_cast<char*>(img.data), img.data_len);
2468 	return 0;
2469 }
2470