1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for codec library front-end.
20  */
21 
22 #include "astcenc.h"
23 #include "astcenccli_internal.h"
24 
25 #include <cassert>
26 #include <cstring>
27 #include <functional>
28 #include <string>
29 #include <sstream>
30 #include <vector>
31 #include <memory>
32 
33 /* ============================================================================
34 	Data structure definitions
35 ============================================================================ */
36 
37 typedef unsigned int astcenc_operation;
38 
39 struct mode_entry
40 {
41 	const char* opt;
42 	astcenc_operation operation;
43 	astcenc_profile decode_mode;
44 };
45 
46 /* ============================================================================
47 	Constants and literals
48 ============================================================================ */
49 
50 /** @brief Stage bit indicating we need to load a compressed image. */
51 static const unsigned int ASTCENC_STAGE_LD_COMP    = 1 << 0;
52 
53 /** @brief Stage bit indicating we need to store a compressed image. */
54 static const unsigned int ASTCENC_STAGE_ST_COMP    = 1 << 1;
55 
56 /** @brief Stage bit indicating we need to load an uncompressed image. */
57 static const unsigned int ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
58 
59 /** @brief Stage bit indicating we need to store an uncompressed image. */
60 static const unsigned int ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
61 
62 /** @brief Stage bit indicating we need compress an image. */
63 static const unsigned int ASTCENC_STAGE_COMPRESS   = 1 << 4;
64 
65 /** @brief Stage bit indicating we need to decompress an image. */
66 static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
67 
68 /** @brief Stage bit indicating we need to compare an image with the original input. */
69 static const unsigned int ASTCENC_STAGE_COMPARE    = 1 << 6;
70 
71 /** @brief Operation indicating an unknown request (should never happen). */
72 static const astcenc_operation ASTCENC_OP_UNKNOWN  = 0;
73 
74 /** @brief Operation indicating the user wants to print long-form help text and version info. */
75 static const astcenc_operation ASTCENC_OP_HELP     = 1 << 7;
76 
77 /** @brief Operation indicating the user wants to print short-form help text and version info. */
78 static const astcenc_operation ASTCENC_OP_VERSION  = 1 << 8;
79 
80 /** @brief Operation indicating the user wants to compress and store an image. */
81 static const astcenc_operation ASTCENC_OP_COMPRESS =
82                                ASTCENC_STAGE_LD_NCOMP |
83                                ASTCENC_STAGE_COMPRESS |
84                                ASTCENC_STAGE_ST_COMP;
85 
86 /** @brief Operation indicating the user wants to decompress and store an image. */
87 static const astcenc_operation ASTCENC_OP_DECOMPRESS =
88                                ASTCENC_STAGE_LD_COMP |
89                                ASTCENC_STAGE_DECOMPRESS |
90                                ASTCENC_STAGE_ST_NCOMP;
91 
92 /** @brief Operation indicating the user wants to test a compression setting on an image. */
93 static const astcenc_operation ASTCENC_OP_TEST =
94                                ASTCENC_STAGE_LD_NCOMP |
95                                ASTCENC_STAGE_COMPRESS |
96                                ASTCENC_STAGE_DECOMPRESS |
97                                ASTCENC_STAGE_COMPARE |
98                                ASTCENC_STAGE_ST_NCOMP;
99 
100 /**
101  * @brief Image preprocesing tasks prior to encoding.
102  */
103 enum astcenc_preprocess
104 {
105 	/** @brief No image preprocessing. */
106 	ASTCENC_PP_NONE = 0,
107 	/** @brief Normal vector unit-length normalization. */
108 	ASTCENC_PP_NORMALIZE,
109 	/** @brief Color data alpha premultiplication. */
110 	ASTCENC_PP_PREMULTIPLY
111 };
112 
113 /** @brief Decode table for command line operation modes. */
114 static const mode_entry modes[] {
115 	{"-cl",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR},
116 	{"-dl",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
117 	{"-tl",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR},
118 	{"-cs",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR_SRGB},
119 	{"-ds",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
120 	{"-ts",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR_SRGB},
121 	{"-ch",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR_RGB_LDR_A},
122 	{"-dh",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
123 	{"-th",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR_RGB_LDR_A},
124 	{"-cH",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR},
125 	{"-dH",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
126 	{"-tH",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR},
127 	{"-h",       ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
128 	{"-help",    ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
129 	{"-v",       ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR},
130 	{"-version", ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR}
131 };
132 
133 /**
134  * @brief Compression workload definition for worker threads.
135  */
136 struct compression_workload
137 {
138 	astcenc_context* context;
139 	astcenc_image* image;
140 	astcenc_swizzle swizzle;
141 	uint8_t* data_out;
142 	size_t data_len;
143 	astcenc_error error;
144 };
145 
146 /**
147  * @brief Decompression workload definition for worker threads.
148  */
149 struct decompression_workload
150 {
151 	astcenc_context* context;
152 	uint8_t* data;
153 	size_t data_len;
154 	astcenc_image* image_out;
155 	astcenc_swizzle swizzle;
156 	astcenc_error error;
157 };
158 
159 /**
160  * @brief Test if a string argument is a well formed float.
161  */
is_float(std::string target)162 static bool is_float(
163 	std::string target
164 ) {
165 	float test;
166 	std::istringstream stream(target);
167 
168 	// Leading whitespace is an error
169 	stream >> std::noskipws >> test;
170 
171 	// Ensure entire no remaining string in addition to parse failure
172 	return stream.eof() && !stream.fail();
173 }
174 
175 /**
176  * @brief Test if a string ends with a given suffix.
177  */
ends_with(const std::string & str,const std::string & suffix)178 static bool ends_with(
179 	const std::string& str,
180 	const std::string& suffix
181 ) {
182 	return (str.size() >= suffix.size()) &&
183 	       (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
184 }
185 
186 /**
187  * @brief Runner callback function for a compression worker thread.
188  *
189  * @param thread_count   The number of threads in the worker pool.
190  * @param thread_id      The index of this thread in the worker pool.
191  * @param payload        The parameters for this thread.
192  */
compression_workload_runner(int thread_count,int thread_id,void * payload)193 static void compression_workload_runner(
194 	int thread_count,
195 	int thread_id,
196 	void* payload
197 ) {
198 	(void)thread_count;
199 
200 	compression_workload* work = static_cast<compression_workload*>(payload);
201 	astcenc_error error = astcenc_compress_image(
202 	                       work->context, work->image, &work->swizzle,
203 	                       work->data_out, work->data_len, thread_id);
204 
205 	// This is a racy update, so which error gets returned is a random, but it
206 	// will reliably report an error if an error occurs
207 	if (error != ASTCENC_SUCCESS)
208 	{
209 		work->error = error;
210 	}
211 }
212 
213 /**
214  * @brief Runner callback function for a decompression worker thread.
215  *
216  * @param thread_count   The number of threads in the worker pool.
217  * @param thread_id      The index of this thread in the worker pool.
218  * @param payload        The parameters for this thread.
219  */
decompression_workload_runner(int thread_count,int thread_id,void * payload)220 static void decompression_workload_runner(
221 	int thread_count,
222 	int thread_id,
223 	void* payload
224 ) {
225 	(void)thread_count;
226 
227 	decompression_workload* work = static_cast<decompression_workload*>(payload);
228 	astcenc_error error = astcenc_decompress_image(
229 	                       work->context, work->data, work->data_len,
230 	                       work->image_out, &work->swizzle, thread_id);
231 
232 	// This is a racy update, so which error gets returned is a random, but it
233 	// will reliably report an error if an error occurs
234 	if (error != ASTCENC_SUCCESS)
235 	{
236 		work->error = error;
237 	}
238 }
239 
240 /**
241  * @brief Utility to generate a slice file name from a pattern.
242  *
243  * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
244  *
245  * @param basename The base pattern; must contain a file extension.
246  * @param index    The slice index.
247  * @param error    Set to true on success, false on error (no extension found).
248  *
249  * @return The slice file name.
250  */
get_slice_filename(const std::string & basename,unsigned int index,bool & error)251 static std::string get_slice_filename(
252 	const std::string& basename,
253 	unsigned int index,
254 	bool& error
255 ) {
256 	size_t sep = basename.find_last_of('.');
257 	if (sep == std::string::npos)
258 	{
259 		error = true;
260 		return "";
261 	}
262 
263 	std::string base = basename.substr(0, sep);
264 	std::string ext = basename.substr(sep);
265 	std::string name = base + "_" + std::to_string(index) + ext;
266 	error = false;
267 	return name;
268 }
269 
270 /**
271  * @brief Load a non-astc image file from memory.
272  *
273  * @param filename            The file to load, or a pattern for array loads.
274  * @param dim_z               The number of slices to load.
275  * @param y_flip              Should this image be Y flipped?
276  * @param[out] is_hdr         Is the loaded image HDR?
277  * @param[out] component_count The number of components in the loaded image.
278  *
279  * @return The astc image file, or nullptr on error.
280  */
load_uncomp_file(const char * filename,unsigned int dim_z,bool y_flip,bool & is_hdr,unsigned int & component_count)281 static astcenc_image* load_uncomp_file(
282 	const char* filename,
283 	unsigned int dim_z,
284 	bool y_flip,
285 	bool& is_hdr,
286 	unsigned int& component_count
287 ) {
288 	astcenc_image *image = nullptr;
289 
290 	// For a 2D image just load the image directly
291 	if (dim_z == 1)
292 	{
293 		image = load_ncimage(filename, y_flip, is_hdr, component_count);
294 	}
295 	else
296 	{
297 		bool slice_is_hdr;
298 		unsigned int slice_component_count;
299 		astcenc_image* slice = nullptr;
300 		std::vector<astcenc_image*> slices;
301 
302 		// For a 3D image load an array of slices
303 		for (unsigned int image_index = 0; image_index < dim_z; image_index++)
304 		{
305 			bool error;
306 			std::string slice_name = get_slice_filename(filename, image_index, error);
307 			if (error)
308 			{
309 				printf("ERROR: Image pattern does not contain file extension: %s\n", filename);
310 				break;
311 			}
312 
313 			slice = load_ncimage(slice_name.c_str(), y_flip,
314 			                     slice_is_hdr, slice_component_count);
315 			if (!slice)
316 			{
317 				break;
318 			}
319 
320 			slices.push_back(slice);
321 
322 			// Check it is not a 3D image
323 			if (slice->dim_z != 1)
324 			{
325 				printf("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
326 				break;
327 			}
328 
329 			// Check slices are consistent with each other
330 			if (image_index != 0)
331 			{
332 				if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
333 				{
334 					printf("ERROR: Image array[0] and [%d] are different formats\n", image_index);
335 					break;
336 				}
337 
338 				if ((slices[0]->dim_x != slice->dim_x) ||
339 				    (slices[0]->dim_y != slice->dim_y) ||
340 				    (slices[0]->dim_z != slice->dim_z))
341 				{
342 					printf("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
343 					break;
344 				}
345 			}
346 			else
347 			{
348 				is_hdr = slice_is_hdr;
349 				component_count = slice_component_count;
350 			}
351 		}
352 
353 		// If all slices loaded correctly then repack them into a single image
354 		if (slices.size() == dim_z)
355 		{
356 			unsigned int dim_x = slices[0]->dim_x;
357 			unsigned int dim_y = slices[0]->dim_y;
358 			int bitness = is_hdr ? 16 : 8;
359 			int slice_size = dim_x * dim_y;
360 
361 			image = alloc_image(bitness, dim_x, dim_y, dim_z);
362 
363 			// Combine 2D source images into one 3D image
364 			for (unsigned int z = 0; z < dim_z; z++)
365 			{
366 				if (image->data_type == ASTCENC_TYPE_U8)
367 				{
368 					uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
369 					uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
370 					size_t copy_size = slice_size * 4 * sizeof(uint8_t);
371 					memcpy(data8, data8src, copy_size);
372 				}
373 				else if (image->data_type == ASTCENC_TYPE_F16)
374 				{
375 					uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
376 					uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
377 					size_t copy_size = slice_size * 4 * sizeof(uint16_t);
378 					memcpy(data16, data16src, copy_size);
379 				}
380 				else // if (image->data_type == ASTCENC_TYPE_F32)
381 				{
382 					assert(image->data_type == ASTCENC_TYPE_F32);
383 					float* data32 = static_cast<float*>(image->data[z]);
384 					float* data32src = static_cast<float*>(slices[z]->data[0]);
385 					size_t copy_size = slice_size * 4 * sizeof(float);
386 					memcpy(data32, data32src, copy_size);
387 				}
388 			}
389 		}
390 
391 		for (auto &i : slices)
392 		{
393 			free_image(i);
394 		}
395 	}
396 
397 	return image;
398 }
399 
400 /**
401  * @brief Parse the command line.
402  *
403  * @param      argc        Command line argument count.
404  * @param[in]  argv        Command line argument vector.
405  * @param[out] operation   Codec operation mode.
406  * @param[out] profile     Codec color profile.
407  *
408  * @return 0 if everything is okay, 1 if there is some error
409  */
parse_commandline_options(int argc,char ** argv,astcenc_operation & operation,astcenc_profile & profile)410 static int parse_commandline_options(
411 	int argc,
412 	char **argv,
413 	astcenc_operation& operation,
414 	astcenc_profile& profile
415 ) {
416 	assert(argc >= 2); (void)argc;
417 
418 	profile = ASTCENC_PRF_LDR;
419 	operation = ASTCENC_OP_UNKNOWN;
420 
421 	int modes_count = sizeof(modes) / sizeof(modes[0]);
422 	for (int i = 0; i < modes_count; i++)
423 	{
424 		if (!strcmp(modes[i].opt, argv[1]))
425 		{
426 			operation = modes[i].operation;
427 			profile = modes[i].decode_mode;
428 			break;
429 		}
430 	}
431 
432 	if (operation == ASTCENC_OP_UNKNOWN)
433 	{
434 		printf("ERROR: Unrecognized operation '%s'\n", argv[1]);
435 		return 1;
436 	}
437 
438 	return 0;
439 }
440 
441 /**
442  * @brief Initialize the astcenc_config
443  *
444  * @param      argc         Command line argument count.
445  * @param[in]  argv         Command line argument vector.
446  * @param      operation    Codec operation mode.
447  * @param[out] profile      Codec color profile.
448  * @param      comp_image   Compressed image if a decompress operation.
449  * @param[out] preprocess   Image preprocess operation.
450  * @param[out] config       Codec configuration.
451  *
452  * @return 0 if everything is okay, 1 if there is some error
453  */
init_astcenc_config(int argc,char ** argv,astcenc_profile profile,astcenc_operation operation,astc_compressed_image & comp_image,astcenc_preprocess & preprocess,astcenc_config & config)454 static int init_astcenc_config(
455 	int argc,
456 	char **argv,
457 	astcenc_profile profile,
458 	astcenc_operation operation,
459 	astc_compressed_image& comp_image,
460 	astcenc_preprocess& preprocess,
461 	astcenc_config& config
462 ) {
463 	unsigned int block_x = 0;
464 	unsigned int block_y = 0;
465 	unsigned int block_z = 1;
466 
467 	// For decode the block size is set by the incoming image.
468 	if (operation == ASTCENC_OP_DECOMPRESS)
469 	{
470 		block_x = comp_image.block_x;
471 		block_y = comp_image.block_y;
472 		block_z = comp_image.block_z;
473 	}
474 
475 	float quality = 0.0f;
476 	preprocess = ASTCENC_PP_NONE;
477 
478 	// parse the command line's encoding options.
479 	int argidx = 4;
480 	if (operation & ASTCENC_STAGE_COMPRESS)
481 	{
482 		// Read and decode block size
483 		if (argc < 5)
484 		{
485 			printf("ERROR: Block size must be specified\n");
486 			return 1;
487 		}
488 
489 		int cnt2D, cnt3D;
490 		int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
491 		                        &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
492 		// Character after the last match should be a NUL
493 		if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
494 		{
495 			printf("ERROR: Block size '%s' is invalid\n", argv[4]);
496 			return 1;
497 		}
498 
499 		// Read and decode search quality
500 		if (argc < 6)
501 		{
502 			printf("ERROR: Search quality level must be specified\n");
503 			return 1;
504 		}
505 
506 		if (!strcmp(argv[5], "-fastest"))
507 		{
508 			quality = ASTCENC_PRE_FASTEST;
509 		}
510 		else if (!strcmp(argv[5], "-fast"))
511 		{
512 			quality = ASTCENC_PRE_FAST;
513 		}
514 		else if (!strcmp(argv[5], "-medium"))
515 		{
516 			quality = ASTCENC_PRE_MEDIUM;
517 		}
518 		else if (!strcmp(argv[5], "-thorough"))
519 		{
520 			quality = ASTCENC_PRE_THOROUGH;
521 		}
522 		else if (!strcmp(argv[5], "-verythorough"))
523 		{
524 			quality = ASTCENC_PRE_VERYTHOROUGH;
525 		}
526 		else if (!strcmp(argv[5], "-exhaustive"))
527 		{
528 			quality = ASTCENC_PRE_EXHAUSTIVE;
529 		}
530 		else if (is_float(argv[5]))
531 		{
532 			quality = static_cast<float>(atof(argv[5]));
533 		}
534 		else
535 		{
536 			printf("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
537 			return 1;
538 		}
539 
540 		argidx = 6;
541 	}
542 
543 	unsigned int flags = 0;
544 
545 	// Gather the flags that we need
546 	while (argidx < argc)
547 	{
548 		if (!strcmp(argv[argidx], "-a"))
549 		{
550 			// Skip over the data value for now
551 			argidx++;
552 			flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
553 		}
554 		else if (!strcmp(argv[argidx], "-mask"))
555 		{
556 			flags |= ASTCENC_FLG_MAP_MASK;
557 		}
558 		else if (!strcmp(argv[argidx], "-normal"))
559 		{
560 			flags |= ASTCENC_FLG_MAP_NORMAL;
561 		}
562 		else if (!strcmp(argv[argidx], "-rgbm"))
563 		{
564 			// Skip over the data value for now
565 			argidx++;
566 			flags |= ASTCENC_FLG_MAP_RGBM;
567 		}
568 		else if (!strcmp(argv[argidx], "-perceptual"))
569 		{
570 			flags |= ASTCENC_FLG_USE_PERCEPTUAL;
571 		}
572 		else if (!strcmp(argv[argidx], "-pp-normalize"))
573 		{
574 			if (preprocess != ASTCENC_PP_NONE)
575 			{
576 				printf("ERROR: Only a single image preprocess can be used\n");
577 				return 1;
578 			}
579 			preprocess = ASTCENC_PP_NORMALIZE;
580 		}
581 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
582 		{
583 			if (preprocess != ASTCENC_PP_NONE)
584 			{
585 				printf("ERROR: Only a single image preprocess can be used\n");
586 				return 1;
587 			}
588 			preprocess = ASTCENC_PP_PREMULTIPLY;
589 		}
590 		argidx ++;
591 	}
592 
593 #if defined(ASTCENC_DECOMPRESS_ONLY)
594 	flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
595 #else
596 	// Decompression can skip some memory allocation, but need full tables
597 	if (operation == ASTCENC_OP_DECOMPRESS)
598 	{
599 		flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
600 	}
601 	// Compression and test passes can skip some decimation initialization
602 	// as we know we are decompressing images that were compressed using the
603 	// same settings and heuristics ...
604 	else
605 	{
606 		flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
607 	}
608 #endif
609 
610 	astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
611 	                                           quality, flags, &config);
612 	if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
613 	{
614 		printf("ERROR: Block size '%s' is invalid\n", argv[4]);
615 		return 1;
616 	}
617 	else if (status == ASTCENC_ERR_BAD_CPU_ISA)
618 	{
619 		printf("ERROR: Required SIMD ISA support missing on this CPU\n");
620 		return 1;
621 	}
622 	else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
623 	{
624 		printf("ERROR: astcenc must not be compiled with -ffast-math\n");
625 		return 1;
626 	}
627 	else if (status != ASTCENC_SUCCESS)
628 	{
629 		printf("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
630 		return 1;
631 	}
632 
633 	return 0;
634 }
635 
636 /**
637  * @brief Edit the astcenc_config
638  *
639  * @param         argc         Command line argument count.
640  * @param[in]     argv         Command line argument vector.
641  * @param         operation    Codec operation.
642  * @param[out]    cli_config   Command line config.
643  * @param[in,out] config       Codec configuration.
644  *
645  * @return 0 if everything is OK, 1 if there is some error
646  */
edit_astcenc_config(int argc,char ** argv,const astcenc_operation operation,cli_config_options & cli_config,astcenc_config & config)647 static int edit_astcenc_config(
648 	int argc,
649 	char **argv,
650 	const astcenc_operation operation,
651 	cli_config_options& cli_config,
652 	astcenc_config& config
653 ) {
654 
655 	int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
656 
657 	while (argidx < argc)
658 	{
659 		if (!strcmp(argv[argidx], "-silent"))
660 		{
661 			argidx++;
662 			cli_config.silentmode = 1;
663 		}
664 		else if (!strcmp(argv[argidx], "-cw"))
665 		{
666 			argidx += 5;
667 			if (argidx > argc)
668 			{
669 				printf("ERROR: -cw switch with less than 4 arguments\n");
670 				return 1;
671 			}
672 
673 			config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
674 			config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
675 			config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
676 			config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
677 		}
678 		else if (!strcmp(argv[argidx], "-a"))
679 		{
680 			argidx += 2;
681 			if (argidx > argc)
682 			{
683 				printf("ERROR: -a switch with no argument\n");
684 				return 1;
685 			}
686 
687 			config.a_scale_radius = atoi(argv[argidx - 1]);
688 		}
689 		else if (!strcmp(argv[argidx], "-esw"))
690 		{
691 			argidx += 2;
692 			if (argidx > argc)
693 			{
694 				printf("ERROR: -esw switch with no argument\n");
695 				return 1;
696 			}
697 
698 			if (strlen(argv[argidx - 1]) != 4)
699 			{
700 				printf("ERROR: -esw pattern does not contain 4 characters\n");
701 				return 1;
702 			}
703 
704 			astcenc_swz swizzle_components[4];
705 			for (int i = 0; i < 4; i++)
706 			{
707 				switch (argv[argidx - 1][i])
708 				{
709 				case 'r':
710 					swizzle_components[i] = ASTCENC_SWZ_R;
711 					break;
712 				case 'g':
713 					swizzle_components[i] = ASTCENC_SWZ_G;
714 					break;
715 				case 'b':
716 					swizzle_components[i] = ASTCENC_SWZ_B;
717 					break;
718 				case 'a':
719 					swizzle_components[i] = ASTCENC_SWZ_A;
720 					break;
721 				case '0':
722 					swizzle_components[i] = ASTCENC_SWZ_0;
723 					break;
724 				case '1':
725 					swizzle_components[i] = ASTCENC_SWZ_1;
726 					break;
727 				default:
728 					printf("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
729 					return 1;
730 				}
731 			}
732 
733 			cli_config.swz_encode.r = swizzle_components[0];
734 			cli_config.swz_encode.g = swizzle_components[1];
735 			cli_config.swz_encode.b = swizzle_components[2];
736 			cli_config.swz_encode.a = swizzle_components[3];
737 		}
738 		else if (!strcmp(argv[argidx], "-ssw"))
739 		{
740 			argidx += 2;
741 			if (argidx > argc)
742 			{
743 				printf("ERROR: -ssw switch with no argument\n");
744 				return 1;
745 			}
746 
747 			size_t char_count = strlen(argv[argidx - 1]);
748 			if (char_count == 0)
749 			{
750 				printf("ERROR: -ssw pattern contains no characters\n");
751 				return 1;
752 			}
753 
754 			if (char_count > 4)
755 			{
756 				printf("ERROR: -ssw pattern contains more than 4 characters\n");
757 				return 1;
758 			}
759 
760 			bool found_r = false;
761 			bool found_g = false;
762 			bool found_b = false;
763 			bool found_a = false;
764 
765 			for (size_t i = 0; i < char_count; i++)
766 			{
767 				switch (argv[argidx - 1][i])
768 				{
769 				case 'r':
770 					found_r = true;
771 					break;
772 				case 'g':
773 					found_g = true;
774 					break;
775 				case 'b':
776 					found_b = true;
777 					break;
778 				case 'a':
779 					found_a = true;
780 					break;
781 				default:
782 					printf("ERROR: -ssw component '%c' is not valid\n", argv[argidx - 1][i]);
783 					return 1;
784 				}
785 			}
786 
787 			config.cw_r_weight = found_r ? 1.0f : 0.0f;
788 			config.cw_g_weight = found_g ? 1.0f : 0.0f;
789 			config.cw_b_weight = found_b ? 1.0f : 0.0f;
790 			config.cw_a_weight = found_a ? 1.0f : 0.0f;
791 		}
792 		else if (!strcmp(argv[argidx], "-dsw"))
793 		{
794 			argidx += 2;
795 			if (argidx > argc)
796 			{
797 				printf("ERROR: -dsw switch with no argument\n");
798 				return 1;
799 			}
800 
801 			if (strlen(argv[argidx - 1]) != 4)
802 			{
803 				printf("ERROR: -dsw switch does not contain 4 characters\n");
804 				return 1;
805 			}
806 
807 			astcenc_swz swizzle_components[4];
808 			for (int i = 0; i < 4; i++)
809 			{
810 				switch (argv[argidx - 1][i])
811 				{
812 				case 'r':
813 					swizzle_components[i] = ASTCENC_SWZ_R;
814 					break;
815 				case 'g':
816 					swizzle_components[i] = ASTCENC_SWZ_G;
817 					break;
818 				case 'b':
819 					swizzle_components[i] = ASTCENC_SWZ_B;
820 					break;
821 				case 'a':
822 					swizzle_components[i] = ASTCENC_SWZ_A;
823 					break;
824 				case '0':
825 					swizzle_components[i] = ASTCENC_SWZ_0;
826 					break;
827 				case '1':
828 					swizzle_components[i] = ASTCENC_SWZ_1;
829 					break;
830 				case 'z':
831 					swizzle_components[i] =  ASTCENC_SWZ_Z;
832 					break;
833 				default:
834 					printf("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
835 					return 1;
836 				}
837 			}
838 
839 			cli_config.swz_decode.r = swizzle_components[0];
840 			cli_config.swz_decode.g = swizzle_components[1];
841 			cli_config.swz_decode.b = swizzle_components[2];
842 			cli_config.swz_decode.a = swizzle_components[3];
843 		}
844 		// presets begin here
845 		else if (!strcmp(argv[argidx], "-mask"))
846 		{
847 			argidx++;
848 		}
849 		else if (!strcmp(argv[argidx], "-normal"))
850 		{
851 			argidx++;
852 
853 			cli_config.swz_encode.r = ASTCENC_SWZ_R;
854 			cli_config.swz_encode.g = ASTCENC_SWZ_R;
855 			cli_config.swz_encode.b = ASTCENC_SWZ_R;
856 			cli_config.swz_encode.a = ASTCENC_SWZ_G;
857 
858 			cli_config.swz_decode.r = ASTCENC_SWZ_R;
859 			cli_config.swz_decode.g = ASTCENC_SWZ_A;
860 			cli_config.swz_decode.b = ASTCENC_SWZ_Z;
861 			cli_config.swz_decode.a = ASTCENC_SWZ_1;
862 		}
863 		else if (!strcmp(argv[argidx], "-rgbm"))
864 		{
865 			argidx += 2;
866 			if (argidx > argc)
867 			{
868 				printf("ERROR: -rgbm switch with no argument\n");
869 				return 1;
870 			}
871 
872 			config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
873 			config.cw_a_weight = 2.0f * config.rgbm_m_scale;
874 		}
875 		else if (!strcmp(argv[argidx], "-perceptual"))
876 		{
877 			argidx++;
878 		}
879 		else if (!strcmp(argv[argidx], "-pp-normalize"))
880 		{
881 			argidx++;
882 		}
883 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
884 		{
885 			argidx++;
886 		}
887 		else if (!strcmp(argv[argidx], "-blockmodelimit"))
888 		{
889 			argidx += 2;
890 			if (argidx > argc)
891 			{
892 				printf("ERROR: -blockmodelimit switch with no argument\n");
893 				return 1;
894 			}
895 
896 			config.tune_block_mode_limit = atoi(argv[argidx - 1]);
897 		}
898 		else if (!strcmp(argv[argidx], "-partitioncountlimit"))
899 		{
900 			argidx += 2;
901 			if (argidx > argc)
902 			{
903 				printf("ERROR: -partitioncountlimit switch with no argument\n");
904 				return 1;
905 			}
906 
907 			config.tune_partition_count_limit = atoi(argv[argidx - 1]);
908 		}
909 		else if (!strcmp(argv[argidx], "-2partitionindexlimit"))
910 		{
911 			argidx += 2;
912 			if (argidx > argc)
913 			{
914 				printf("ERROR: -2partitionindexlimit switch with no argument\n");
915 				return 1;
916 			}
917 
918 			config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
919 		}
920 		else if (!strcmp(argv[argidx], "-3partitionindexlimit"))
921 		{
922 			argidx += 2;
923 			if (argidx > argc)
924 			{
925 				printf("ERROR: -3partitionindexlimit switch with no argument\n");
926 				return 1;
927 			}
928 
929 			config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
930 		}
931 		else if (!strcmp(argv[argidx], "-4partitionindexlimit"))
932 		{
933 			argidx += 2;
934 			if (argidx > argc)
935 			{
936 				printf("ERROR: -4partitionindexlimit switch with no argument\n");
937 				return 1;
938 			}
939 
940 			config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
941 		}
942 		else if (!strcmp(argv[argidx], "-2partitioncandiatelimit"))
943 		{
944 			argidx += 2;
945 			if (argidx > argc)
946 			{
947 				printf("ERROR: -2partitioncandidatelimit switch with no argument\n");
948 				return 1;
949 			}
950 
951 			config.tune_2partitioning_candidate_limit = atoi(argv[argidx - 1]);
952 		}
953 		else if (!strcmp(argv[argidx], "-3partitioncandiatelimit"))
954 		{
955 			argidx += 2;
956 			if (argidx > argc)
957 			{
958 				printf("ERROR: -3partitioncandiatelimit switch with no argument\n");
959 				return 1;
960 			}
961 
962 			config.tune_3partitioning_candidate_limit = atoi(argv[argidx - 1]);
963 		}
964 		else if (!strcmp(argv[argidx], "-4partitioncandiatelimit"))
965 		{
966 			argidx += 2;
967 			if (argidx > argc)
968 			{
969 				printf("ERROR: -4partitioncandiatelimit switch with no argument\n");
970 				return 1;
971 			}
972 
973 			config.tune_4partitioning_candidate_limit = atoi(argv[argidx - 1]);
974 		}
975 		else if (!strcmp(argv[argidx], "-dblimit"))
976 		{
977 			argidx += 2;
978 			if (argidx > argc)
979 			{
980 				printf("ERROR: -dblimit switch with no argument\n");
981 				return 1;
982 			}
983 
984 			if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
985 			{
986 				config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
987 			}
988 		}
989 		else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
990 		{
991 			argidx += 2;
992 			if (argidx > argc)
993 			{
994 				printf("ERROR: -2partitionlimitfactor switch with no argument\n");
995 				return 1;
996 			}
997 
998 			config.tune_2_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
999 		}
1000 		else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
1001 		{
1002 			argidx += 2;
1003 			if (argidx > argc)
1004 			{
1005 				printf("ERROR: -3partitionlimitfactor switch with no argument\n");
1006 				return 1;
1007 			}
1008 
1009 			config.tune_3_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1010 		}
1011 		else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
1012 		{
1013 			argidx += 2;
1014 			if (argidx > argc)
1015 			{
1016 				printf("ERROR: -2planelimitcorrelation switch with no argument\n");
1017 				return 1;
1018 			}
1019 
1020 			config.tune_2_plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
1021 		}
1022 		else if (!strcmp(argv[argidx], "-refinementlimit"))
1023 		{
1024 			argidx += 2;
1025 			if (argidx > argc)
1026 			{
1027 				printf("ERROR: -refinementlimit switch with no argument\n");
1028 				return 1;
1029 			}
1030 
1031 			config.tune_refinement_limit = atoi(argv[argidx - 1]);
1032 		}
1033 		else if (!strcmp(argv[argidx], "-candidatelimit"))
1034 		{
1035 			argidx += 2;
1036 			if (argidx > argc)
1037 			{
1038 				printf("ERROR: -candidatelimit switch with no argument\n");
1039 				return 1;
1040 			}
1041 
1042 			config.tune_candidate_limit = atoi(argv[argidx - 1]);
1043 		}
1044 		else if (!strcmp(argv[argidx], "-j"))
1045 		{
1046 			argidx += 2;
1047 			if (argidx > argc)
1048 			{
1049 				printf("ERROR: -j switch with no argument\n");
1050 				return 1;
1051 			}
1052 
1053 			cli_config.thread_count = atoi(argv[argidx - 1]);
1054 		}
1055 		else if (!strcmp(argv[argidx], "-repeats"))
1056 		{
1057 			argidx += 2;
1058 			if (argidx > argc)
1059 			{
1060 				printf("ERROR: -repeats switch with no argument\n");
1061 				return 1;
1062 			}
1063 
1064 			cli_config.repeat_count = atoi(argv[argidx - 1]);
1065 			if (cli_config.repeat_count <= 0)
1066 			{
1067 				printf("ERROR: -repeats value must be at least one\n");
1068 				return 1;
1069 			}
1070 		}
1071 		else if (!strcmp(argv[argidx], "-yflip"))
1072 		{
1073 			argidx++;
1074 			cli_config.y_flip = 1;
1075 		}
1076 		else if (!strcmp(argv[argidx], "-mpsnr"))
1077 		{
1078 			argidx += 3;
1079 			if (argidx > argc)
1080 			{
1081 				printf("ERROR: -mpsnr switch with less than 2 arguments\n");
1082 				return 1;
1083 			}
1084 
1085 			cli_config.low_fstop = atoi(argv[argidx - 2]);
1086 			cli_config.high_fstop = atoi(argv[argidx - 1]);
1087 			if (cli_config.high_fstop < cli_config.low_fstop)
1088 			{
1089 				printf("ERROR: -mpsnr switch <low> is greater than the <high>\n");
1090 				return 1;
1091 			}
1092 		}
1093 		// Option: Encode a 3D image from a sequence of 2D images.
1094 		else if (!strcmp(argv[argidx], "-zdim"))
1095 		{
1096 			// Only supports compressing
1097 			if (!(operation & ASTCENC_STAGE_COMPRESS))
1098 			{
1099 				printf("ERROR: -zdim switch is only valid for compression\n");
1100 				return 1;
1101 			}
1102 
1103 			// Image depth must be specified.
1104 			if (argidx + 2 > argc)
1105 			{
1106 				printf("ERROR: -zdim switch with no argument\n");
1107 				return 1;
1108 			}
1109 			argidx++;
1110 
1111 			// Read array size (image depth).
1112 			if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1113 			{
1114 				printf("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
1115 				return 1;
1116 			}
1117 
1118 			if ((cli_config.array_size > 1) && (config.block_z == 1))
1119 			{
1120 				printf("ERROR: -zdim with 3D input data for a 2D output format\n");
1121 				return 1;
1122 			}
1123 			argidx++;
1124 		}
1125 #if defined(ASTCENC_DIAGNOSTICS)
1126 		else if (!strcmp(argv[argidx], "-dtrace"))
1127 		{
1128 			argidx += 2;
1129 			if (argidx > argc)
1130 			{
1131 				printf("ERROR: -dtrace switch with no argument\n");
1132 				return 1;
1133 			}
1134 
1135 			config.trace_file_path = argv[argidx - 1];
1136 		}
1137 #endif
1138 		else if (!strcmp(argv[argidx], "-dimage"))
1139 		{
1140 			argidx += 1;
1141 			cli_config.diagnostic_images = true;
1142 		}
1143 		else // check others as well
1144 		{
1145 			printf("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1146 			return 1;
1147 		}
1148 	}
1149 
1150 	if (cli_config.thread_count <= 0)
1151 	{
1152 		cli_config.thread_count = get_cpu_count();
1153 	}
1154 
1155 #if defined(ASTCENC_DIAGNOSTICS)
1156 	// Force single threaded for diagnostic builds
1157 	cli_config.thread_count = 1;
1158 
1159 	if (!config.trace_file_path)
1160 	{
1161 		printf("ERROR: Diagnostics builds must set -dtrace\n");
1162 		return 1;
1163 	}
1164 #endif
1165 
1166 	return 0;
1167 }
1168 
1169 /**
1170  * @brief Print the config settings in a human readable form.
1171  *
1172  * @param[in] cli_config   Command line config.
1173  * @param[in] config       Codec configuration.
1174  */
print_astcenc_config(const cli_config_options & cli_config,const astcenc_config & config)1175 static void print_astcenc_config(
1176 	const cli_config_options& cli_config,
1177 	const astcenc_config& config
1178 ) {
1179 	// Print all encoding settings unless specifically told otherwise
1180 	if (!cli_config.silentmode)
1181 	{
1182 		printf("Compressor settings\n");
1183 		printf("===================\n\n");
1184 
1185 		switch (config.profile)
1186 		{
1187 		case ASTCENC_PRF_LDR:
1188 			printf("    Color profile:              LDR linear\n");
1189 			break;
1190 		case ASTCENC_PRF_LDR_SRGB:
1191 			printf("    Color profile:              LDR sRGB\n");
1192 			break;
1193 		case ASTCENC_PRF_HDR_RGB_LDR_A:
1194 			printf("    Color profile:              HDR RGB + LDR A\n");
1195 			break;
1196 		case ASTCENC_PRF_HDR:
1197 			printf("    Color profile:              HDR RGBA\n");
1198 			break;
1199 		}
1200 
1201 		if (config.block_z == 1)
1202 		{
1203 			printf("    Block size:                 %ux%u\n", config.block_x, config.block_y);
1204 		}
1205 		else
1206 		{
1207 			printf("    Block size:                 %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1208 		}
1209 
1210 		printf("    Bitrate:                    %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1211 		printf("    RGB alpha scale weight:     %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1212 		if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1213 		{
1214 			printf("    Radius RGB alpha scale:     %u texels\n", config.a_scale_radius);
1215 		}
1216 
1217 		printf("    R component weight:         %g\n", static_cast<double>(config.cw_r_weight));
1218 		printf("    G component weight:         %g\n", static_cast<double>(config.cw_g_weight));
1219 		printf("    B component weight:         %g\n", static_cast<double>(config.cw_b_weight));
1220 		printf("    A component weight:         %g\n", static_cast<double>(config.cw_a_weight));
1221 		printf("    Partition cutoff:           %u partitions\n", config.tune_partition_count_limit);
1222 		printf("    2 partition index cutoff:   %u partition ids\n", config.tune_2partition_index_limit);
1223 		printf("    3 partition index cutoff:   %u partition ids\n", config.tune_3partition_index_limit);
1224 		printf("    4 partition index cutoff:   %u partition ids\n", config.tune_4partition_index_limit);
1225 		printf("    PSNR cutoff:                %g dB\n", static_cast<double>(config.tune_db_limit));
1226 		printf("    3 partition cutoff:         %g\n", static_cast<double>(config.tune_2_partition_early_out_limit_factor));
1227 		printf("    4 partition cutoff:         %g\n", static_cast<double>(config.tune_3_partition_early_out_limit_factor));
1228 		printf("    2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2_plane_early_out_limit_correlation));
1229 		printf("    Block mode centile cutoff:  %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1230 		printf("    Candidate cutoff:           %u candidates\n", config.tune_candidate_limit);
1231 		printf("    Refinement cutoff:          %u iterations\n", config.tune_refinement_limit);
1232 		printf("    Compressor thread count:    %d\n", cli_config.thread_count);
1233 		printf("\n");
1234 	}
1235 }
1236 
1237 /**
1238  * @brief Get the value of a single pixel in an image.
1239  *
1240  * Note, this implementation is not particularly optimal as it puts format
1241  * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1242  * acceptable as these are not performance critical paths.
1243  *
1244  * @param[in] img   The output image.
1245  * @param     x     The pixel x coordinate.
1246  * @param     y     The pixel y coordinate.
1247  * @param     z     The pixel z coordinate.
1248  *
1249  * @return      pixel   The pixel color value to write.
1250  */
image_get_pixel(const astcenc_image & img,unsigned int x,unsigned int y,unsigned int z)1251 static vfloat4 image_get_pixel(
1252 	const astcenc_image& img,
1253 	unsigned int x,
1254 	unsigned int y,
1255 	unsigned int z
1256 ) {
1257 	// We should never escape bounds
1258 	assert(x < img.dim_x);
1259 	assert(y < img.dim_y);
1260 	assert(z < img.dim_z);
1261 
1262 	if (img.data_type == ASTCENC_TYPE_U8)
1263 	{
1264 		uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1265 
1266 		float r = data[(4 * img.dim_x * y) + (4 * x    )] / 255.0f;
1267 		float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1268 		float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1269 		float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1270 
1271 		return vfloat4(r, g, b, a);
1272 	}
1273 	else if (img.data_type == ASTCENC_TYPE_F16)
1274 	{
1275 		uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1276 
1277 		vint4 colori(
1278 			data[(4 * img.dim_x * y) + (4 * x    )],
1279 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1280 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1281 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1282 		);
1283 
1284 		return float16_to_float(colori);
1285 	}
1286 	else // if (img.data_type == ASTCENC_TYPE_F32)
1287 	{
1288 		assert(img.data_type == ASTCENC_TYPE_F32);
1289 		float* data = static_cast<float*>(img.data[z]);
1290 
1291 		return vfloat4(
1292 			data[(4 * img.dim_x * y) + (4 * x    )],
1293 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1294 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1295 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1296 		);
1297 	}
1298 }
1299 
1300 /**
1301  * @brief Set the value of a single pixel in an image.
1302  *
1303  * @param[out] img     The output image; must use F32 texture components.
1304  * @param      x       The pixel x coordinate.
1305  * @param      y       The pixel y coordinate.
1306  * @param      z       The pixel z coordinate.
1307  * @param      pixel   The pixel color value to write.
1308  */
image_set_pixel(astcenc_image & img,unsigned int x,unsigned int y,unsigned int z,vfloat4 pixel)1309 static void image_set_pixel(
1310 	astcenc_image& img,
1311 	unsigned int x,
1312 	unsigned int y,
1313 	unsigned int z,
1314 	vfloat4 pixel
1315 ) {
1316 	// We should never escape bounds
1317 	assert(x < img.dim_x);
1318 	assert(y < img.dim_y);
1319 	assert(z < img.dim_z);
1320 	assert(img.data_type == ASTCENC_TYPE_F32);
1321 
1322 	float* data = static_cast<float*>(img.data[z]);
1323 
1324 	data[(4 * img.dim_x * y) + (4 * x    )] = pixel.lane<0>();
1325 	data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1326 	data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1327 	data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1328 }
1329 
1330 /**
1331  * @brief Set the value of a single pixel in an image.
1332  *
1333  * @param[out] img     The output image; must use F32 texture components.
1334  * @param      x       The pixel x coordinate.
1335  * @param      y       The pixel y coordinate.
1336  * @param      pixel   The pixel color value to write.
1337  */
image_set_pixel_u8(astcenc_image & img,size_t x,size_t y,vint4 pixel)1338 static void image_set_pixel_u8(
1339 	astcenc_image& img,
1340 	size_t x,
1341 	size_t y,
1342 	vint4 pixel
1343 ) {
1344 	// We should never escape bounds
1345 	assert(x < img.dim_x);
1346 	assert(y < img.dim_y);
1347 	assert(img.data_type == ASTCENC_TYPE_U8);
1348 
1349 	uint8_t* data = static_cast<uint8_t*>(img.data[0]);
1350 	pixel = pack_low_bytes(pixel);
1351 	store_nbytes(pixel, data + (4 * img.dim_x * y) + (4 * x    ));
1352 }
1353 
1354 /**
1355  * @brief Create a copy of @c input with forced unit-length normal vectors.
1356  *
1357  * It is assumed that all normal vectors are stored in the RGB components, and
1358  * stored in a packed unsigned range of [0,1] which must be unpacked prior
1359  * normalization. Data must then be repacked into this form for handing over to
1360  * the core codec.
1361  *
1362  * @param[in]  input    The input image.
1363  * @param[out] output   The output image, must use F32 components.
1364  */
image_preprocess_normalize(const astcenc_image & input,astcenc_image & output)1365 static void image_preprocess_normalize(
1366 	const astcenc_image& input,
1367 	astcenc_image& output
1368 ) {
1369 	for (unsigned int z = 0; z < input.dim_z; z++)
1370 	{
1371 		for (unsigned int y = 0; y < input.dim_y; y++)
1372 		{
1373 			for (unsigned int x = 0; x < input.dim_x; x++)
1374 			{
1375 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1376 
1377 				// Stash alpha component and zero
1378 				float a = pixel.lane<3>();
1379 				pixel.set_lane<3>(0.0f);
1380 
1381 				// Decode [0,1] normals to [-1,1]
1382 				pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1383 				pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1384 				pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1385 
1386 				// Normalize pixel and restore alpha
1387 				pixel = normalize(pixel);
1388 				pixel.set_lane<3>(a);
1389 
1390 				// Encode [-1,1] normals to [0,1]
1391 				pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1392 				pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1393 				pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1394 
1395 				image_set_pixel(output, x, y, z, pixel);
1396 			}
1397 		}
1398 	}
1399 }
1400 
1401 /**
1402  * @brief Linearize an sRGB value.
1403  *
1404  * @return The linearized value.
1405  */
srgb_to_linear(float a)1406 static float srgb_to_linear(
1407 	float a
1408 ) {
1409 	if (a <= 0.04045f)
1410 	{
1411 		return a * (1.0f / 12.92f);
1412 	}
1413 
1414 	return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1415 }
1416 
1417 /**
1418  * @brief sRGB gamma-encode a linear value.
1419  *
1420  * @return The gamma encoded value.
1421  */
linear_to_srgb(float a)1422 static float linear_to_srgb(
1423 	float a
1424 ) {
1425 	if (a <= 0.0031308f)
1426 	{
1427 		return a * 12.92f;
1428 	}
1429 
1430 	return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1431 }
1432 
1433 /**
1434  * @brief Create a copy of @c input with premultiplied color data.
1435  *
1436  * If we are compressing sRGB data we linearize the data prior to
1437  * premultiplication and re-gamma-encode afterwards.
1438  *
1439  * @param[in]  input     The input image.
1440  * @param[out] output    The output image, must use F32 components.
1441  * @param      profile   The encoding profile.
1442  */
image_preprocess_premultiply(const astcenc_image & input,astcenc_image & output,astcenc_profile profile)1443 static void image_preprocess_premultiply(
1444 	const astcenc_image& input,
1445 	astcenc_image& output,
1446 	astcenc_profile profile
1447 ) {
1448 	for (unsigned int z = 0; z < input.dim_z; z++)
1449 	{
1450 		for (unsigned int y = 0; y < input.dim_y; y++)
1451 		{
1452 			for (unsigned int x = 0; x < input.dim_x; x++)
1453 			{
1454 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1455 
1456 				// Linearize sRGB
1457 				if (profile == ASTCENC_PRF_LDR_SRGB)
1458 				{
1459 					pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1460 					pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1461 					pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1462 				}
1463 
1464 				// Premultiply pixel in linear-space
1465 				pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1466 				pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1467 				pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1468 
1469 				// Gamma-encode sRGB
1470 				if (profile == ASTCENC_PRF_LDR_SRGB)
1471 				{
1472 					pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1473 					pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1474 					pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1475 				}
1476 
1477 				image_set_pixel(output, x, y, z, pixel);
1478 			}
1479 		}
1480 	}
1481 }
1482 
1483 /**
1484  * @brief Populate a single diagnostic image showing aspects of the encoding.
1485  *
1486  * @param context      The context to use.
1487  * @param image        The compressed image to analyze.
1488  * @param diag_image   The output visualization image to populate.
1489  * @param texel_func   The per-texel callback used to determine output color.
1490  */
print_diagnostic_image(astcenc_context * context,const astc_compressed_image & image,astcenc_image & diag_image,std::function<vint4 (astcenc_block_info &,size_t,size_t)> texel_func)1491 static void print_diagnostic_image(
1492 	astcenc_context* context,
1493 	const astc_compressed_image& image,
1494 	astcenc_image& diag_image,
1495 	std::function<vint4(astcenc_block_info&, size_t, size_t)> texel_func
1496 ) {
1497 	size_t block_cols = (image.dim_x + image.block_x - 1) / image.block_x;
1498 	size_t block_rows = (image.dim_y + image.block_y - 1) / image.block_y;
1499 
1500 	uint8_t* data = image.data;
1501 	for (size_t block_y = 0; block_y < block_rows; block_y++)
1502 	{
1503 		for (size_t block_x = 0; block_x < block_cols; block_x++)
1504 		{
1505 			astcenc_block_info block_info;
1506 			astcenc_get_block_info(context, data, &block_info);
1507 			data += 16;
1508 
1509 			size_t start_row = block_y * image.block_y;
1510 			size_t start_col = block_x * image.block_x;
1511 
1512 			size_t end_row = astc::min(start_row + image.block_y, static_cast<size_t>(image.dim_y));
1513 			size_t end_col = astc::min(start_col + image.block_x, static_cast<size_t>(image.dim_x));
1514 
1515 			for (size_t texel_y = start_row; texel_y < end_row; texel_y++)
1516 			{
1517 				for (size_t texel_x = start_col; texel_x < end_col; texel_x++)
1518 				{
1519 					vint4 color = texel_func(block_info, texel_x - start_col, texel_y - start_row);
1520 					image_set_pixel_u8(diag_image, texel_x, texel_y, color);
1521 				}
1522 			}
1523 		}
1524 	}
1525 }
1526 
1527 /**
1528  * @brief Print a set of diagnostic images showing aspects of the encoding.
1529  *
1530  * @param context       The context to use.
1531  * @param image         The compressed image to analyze.
1532  * @param output_file   The output file name to use as a stem for new names.
1533  */
print_diagnostic_images(astcenc_context * context,const astc_compressed_image & image,const std::string & output_file)1534 static void print_diagnostic_images(
1535 	astcenc_context* context,
1536 	const astc_compressed_image& image,
1537 	const std::string& output_file
1538 ) {
1539 	if (image.dim_z != 1)
1540 	{
1541 		return;
1542 	}
1543 
1544 	// Try to find a file extension we know about
1545 	size_t index = output_file.find_last_of(".");
1546 	std::string stem = output_file;
1547 	if (index != std::string::npos)
1548 	{
1549 		stem = stem.substr(0, index);
1550 	}
1551 
1552 	auto diag_image = alloc_image(8, image.dim_x, image.dim_y, image.dim_z);
1553 
1554 	// ---- ---- ---- ---- Partitioning ---- ---- ---- ----
1555 	auto partition_func = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1556 		const vint4 colors[] {
1557 			vint4(  0,   0,   0, 255),
1558 			vint4(255,   0,   0, 255),
1559 			vint4(  0, 255,   0, 255),
1560 			vint4(  0,   0, 255, 255),
1561 			vint4(255, 255, 255, 255)
1562 		};
1563 
1564 		size_t texel_index = texel_y * info.block_x + texel_x;
1565 
1566 		int partition { 0 };
1567 		if (!info.is_constant_block)
1568 		{
1569 			partition = info.partition_assignment[texel_index] + 1;
1570 		}
1571 
1572 		return colors[partition];
1573 	};
1574 
1575 	print_diagnostic_image(context, image, *diag_image, partition_func);
1576 	std::string fname = stem + "_diag_partitioning.png";
1577 	store_ncimage(diag_image, fname.c_str(), false);
1578 
1579 	// ---- ---- ---- ---- Weight planes  ---- ---- ---- ----
1580 	auto texel_func1 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1581 		(void)texel_x;
1582 		(void)texel_y;
1583 
1584 		const vint4 colors[] {
1585 			vint4(  0,   0,   0, 255),
1586 			vint4(255,   0,   0, 255),
1587 			vint4(  0, 255,   0, 255),
1588 			vint4(  0,   0, 255, 255),
1589 			vint4(255, 255, 255, 255)
1590 		};
1591 
1592 		int component { 0 };
1593 		if (info.is_dual_plane_block)
1594 		{
1595 			component = info.dual_plane_component + 1;
1596 		}
1597 
1598 		return colors[component];
1599 	};
1600 
1601 	print_diagnostic_image(context, image, *diag_image, texel_func1);
1602 	fname = stem + "_diag_weight_plane2.png";
1603 	store_ncimage(diag_image, fname.c_str(), false);
1604 
1605 	// ---- ---- ---- ---- Weight density  ---- ---- ---- ----
1606 	auto texel_func2 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1607 		(void)texel_x;
1608 		(void)texel_y;
1609 
1610 		float density = 0.0f;
1611 		if (!info.is_constant_block)
1612 		{
1613 			float texel_count = static_cast<float>(info.block_x * info.block_y);
1614 			float weight_count = static_cast<float>(info.weight_x * info.weight_y);
1615 			density = weight_count / texel_count;
1616 		}
1617 
1618 		int densityi = static_cast<int>(255.0f * density);
1619 		return vint4(densityi, densityi, densityi, 255);
1620 	};
1621 
1622 	print_diagnostic_image(context, image, *diag_image, texel_func2);
1623 	fname = stem + "_diag_weight_density.png";
1624 	store_ncimage(diag_image, fname.c_str(), false);
1625 
1626 	// ---- ---- ---- ---- Weight quant  ---- ---- ---- ----
1627 	auto texel_func3 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1628 		(void)texel_x;
1629 		(void)texel_y;
1630 
1631 		int quant { 0 };
1632 		if (!info.is_constant_block)
1633 		{
1634 			quant = info.weight_level_count - 1;
1635 		}
1636 
1637 		return vint4(quant, quant, quant, 255);
1638 	};
1639 
1640 	print_diagnostic_image(context, image, *diag_image, texel_func3);
1641 	fname = stem + "_diag_weight_quant.png";
1642 	store_ncimage(diag_image, fname.c_str(), false);
1643 
1644 	// ---- ---- ---- ---- Color quant  ---- ---- ---- ----
1645 	auto texel_func4 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1646 		(void)texel_x;
1647 		(void)texel_y;
1648 
1649 		int quant { 0 };
1650 		if (!info.is_constant_block)
1651 		{
1652 			quant = info.color_level_count - 1;
1653 		}
1654 
1655 		return vint4(quant, quant, quant, 255);
1656 	};
1657 
1658 	print_diagnostic_image(context, image, *diag_image, texel_func4);
1659 	fname = stem + "_diag_color_quant.png";
1660 	store_ncimage(diag_image, fname.c_str(), false);
1661 
1662 	// ---- ---- ---- ---- Color endpoint mode: Index ---- ---- ---- ----
1663 	auto texel_func5 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1664 		(void)texel_x;
1665 		(void)texel_y;
1666 
1667 		size_t texel_index = texel_y * info.block_x + texel_x;
1668 
1669 		int cem { 255 };
1670 		if (!info.is_constant_block)
1671 		{
1672 			uint8_t partition = info.partition_assignment[texel_index];
1673 			cem = info.color_endpoint_modes[partition] * 16;
1674 		}
1675 
1676 		return vint4(cem, cem, cem, 255);
1677 	};
1678 
1679 	print_diagnostic_image(context, image, *diag_image, texel_func5);
1680 	fname = stem + "_diag_cem_index.png";
1681 	store_ncimage(diag_image, fname.c_str(), false);
1682 
1683 	// ---- ---- ---- ---- Color endpoint mode: Components ---- ---- ---- ----
1684 	auto texel_func6 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1685 		(void)texel_x;
1686 		(void)texel_y;
1687 
1688 		const vint4 colors[] {
1689 			vint4(  0,   0,   0, 255),
1690 			vint4(255,   0,   0, 255),
1691 			vint4(  0, 255,   0, 255),
1692 			vint4(  0,   0, 255, 255),
1693 			vint4(255, 255, 255, 255)
1694 		};
1695 
1696 		size_t texel_index = texel_y * info.block_x + texel_x;
1697 
1698 		int components { 0 };
1699 		if (!info.is_constant_block)
1700 		{
1701 			uint8_t partition = info.partition_assignment[texel_index];
1702 			uint8_t cem = info.color_endpoint_modes[partition];
1703 
1704 			switch (cem)
1705 			{
1706 				case 0:
1707 				case 1:
1708 				case 2:
1709 				case 3:
1710 					components = 1;
1711 					break;
1712 				case 4:
1713 				case 5:
1714 					components = 2;
1715 					break;
1716 				case 6:
1717 				case 7:
1718 				case 8:
1719 				case 9:
1720 				case 11:
1721 					components = 3;
1722 					break;
1723 				default:
1724 					components = 4;
1725 					break;
1726 			}
1727 		}
1728 
1729 		return colors[components];
1730 	};
1731 
1732 	print_diagnostic_image(context, image, *diag_image, texel_func6);
1733 	fname = stem + "_diag_cem_components.png";
1734 	store_ncimage(diag_image, fname.c_str(), false);
1735 
1736 	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1737 	auto texel_func7 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1738 		(void)texel_x;
1739 		(void)texel_y;
1740 
1741 		const vint4 colors[] {
1742 			vint4(  0,   0,   0, 255),
1743 			vint4(255,   0,   0, 255),
1744 			vint4(  0, 255,   0, 255),
1745 			vint4(  0,   0, 255, 255),
1746 		};
1747 
1748 		size_t texel_index = texel_y * info.block_x + texel_x;
1749 
1750 		int style { 0 };
1751 		if (!info.is_constant_block)
1752 		{
1753 			uint8_t partition = info.partition_assignment[texel_index];
1754 			uint8_t cem = info.color_endpoint_modes[partition];
1755 
1756 			switch (cem)
1757 			{
1758 				// Direct - two absolute endpoints
1759 				case 0:
1760 				case 1:
1761 				case 2:
1762 				case 3:
1763 				case 4:
1764 				case 8:
1765 				case 11:
1766 				case 12:
1767 				case 14:
1768 				case 15:
1769 					style = 1;
1770 					break;
1771 				// Offset - one absolute plus delta
1772 				case 5:
1773 				case 9:
1774 				case 13:
1775 					style = 2;
1776 					break;
1777 				// Scale - one absolute plus scale
1778 				case 6:
1779 				case 7:
1780 				case 10:
1781 					style = 3;
1782 					break;
1783 				// Shouldn't happen ...
1784 				default:
1785 					style = 0;
1786 					break;
1787 			}
1788 		}
1789 
1790 		return colors[style];
1791 	};
1792 
1793 	print_diagnostic_image(context, image, *diag_image, texel_func7);
1794 	fname = stem + "_diag_cem_style.png";
1795 	store_ncimage(diag_image, fname.c_str(), false);
1796 
1797 	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1798 	auto texel_func8 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1799 		(void)texel_x;
1800 		(void)texel_y;
1801 
1802 		size_t texel_index = texel_y * info.block_x + texel_x;
1803 
1804 		int style { 0 };
1805 		if (!info.is_constant_block)
1806 		{
1807 			uint8_t partition = info.partition_assignment[texel_index];
1808 			uint8_t cem = info.color_endpoint_modes[partition];
1809 
1810 			switch (cem)
1811 			{
1812 				// LDR blocks
1813 				case 0:
1814 				case 1:
1815 				case 4:
1816 				case 5:
1817 				case 6:
1818 				case 8:
1819 				case 9:
1820 				case 10:
1821 				case 12:
1822 				case 13:
1823 					style = 128;
1824 					break;
1825 				// HDR blocks
1826 				default:
1827 					style = 155;
1828 					break;
1829 			}
1830 		}
1831 
1832 		return vint4(style, style, style, 255);
1833 	};
1834 
1835 	print_diagnostic_image(context, image, *diag_image, texel_func8);
1836 	fname = stem + "_diag_cem_hdr.png";
1837 	store_ncimage(diag_image, fname.c_str(), false);
1838 
1839 	free_image(diag_image);
1840 }
1841 
1842 /**
1843  * @brief The main entry point.
1844  *
1845  * @param argc   The number of arguments.
1846  * @param argv   The vector of arguments.
1847  *
1848  * @return 0 on success, non-zero otherwise.
1849  */
main(int argc,char ** argv)1850 int main(
1851 	int argc,
1852 	char **argv
1853 ) {
1854 	double start_time = get_time();
1855 
1856 	if (argc < 2)
1857 	{
1858 		astcenc_print_shorthelp();
1859 		return 0;
1860 	}
1861 
1862 	astcenc_operation operation;
1863 	astcenc_profile profile;
1864 	int error = parse_commandline_options(argc, argv, operation, profile);
1865 	if (error)
1866 	{
1867 		return 1;
1868 	}
1869 
1870 	switch (operation)
1871 	{
1872 	case ASTCENC_OP_HELP:
1873 		astcenc_print_longhelp();
1874 		return 0;
1875 	case ASTCENC_OP_VERSION:
1876 		astcenc_print_header();
1877 		return 0;
1878 	default:
1879 		break;
1880 	}
1881 
1882 	std::string input_filename = argc >= 3 ? argv[2] : "";
1883 	std::string output_filename = argc >= 4 ? argv[3] : "";
1884 
1885 	if (input_filename.empty())
1886 	{
1887 		printf("ERROR: Input file not specified\n");
1888 		return 1;
1889 	}
1890 
1891 	if (output_filename.empty())
1892 	{
1893 		printf("ERROR: Output file not specified\n");
1894 		return 1;
1895 	}
1896 
1897 	// TODO: Handle RAII resources so they get freed when out of scope
1898 	// Load the compressed input file if needed
1899 
1900 	// This has to come first, as the block size is in the file header
1901 	astc_compressed_image image_comp {};
1902 	if (operation & ASTCENC_STAGE_LD_COMP)
1903 	{
1904 		if (ends_with(input_filename, ".astc"))
1905 		{
1906 			error = load_cimage(input_filename.c_str(), image_comp);
1907 			if (error)
1908 			{
1909 				return 1;
1910 			}
1911 		}
1912 		else if (ends_with(input_filename, ".ktx"))
1913 		{
1914 			bool is_srgb;
1915 			error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
1916 			if (error)
1917 			{
1918 				return 1;
1919 			}
1920 
1921 			if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
1922 			{
1923 				printf("WARNING: Input file is sRGB, but decompressing as linear\n");
1924 			}
1925 
1926 			if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
1927 			{
1928 				printf("WARNING: Input file is linear, but decompressing as sRGB\n");
1929 			}
1930 		}
1931 		else
1932 		{
1933 			printf("ERROR: Unknown compressed input file type\n");
1934 			return 1;
1935 		}
1936 	}
1937 
1938 	astcenc_config config {};
1939 	astcenc_preprocess preprocess;
1940 	error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
1941 	if (error)
1942 	{
1943 		return 1;
1944 	}
1945 
1946 	// Initialize cli_config_options with default values
1947 	cli_config_options cli_config { 0, 1, 1, false, false, false, -10, 10,
1948 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
1949 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
1950 
1951 	error = edit_astcenc_config(argc, argv, operation, cli_config, config);
1952 	if (error)
1953 	{
1954 		return 1;
1955 	}
1956 
1957 	astcenc_image* image_uncomp_in = nullptr ;
1958 	unsigned int image_uncomp_in_component_count = 0;
1959 	bool image_uncomp_in_is_hdr = false;
1960 	astcenc_image* image_decomp_out = nullptr;
1961 
1962 	// TODO: Handle RAII resources so they get freed when out of scope
1963 	astcenc_error    codec_status;
1964 	astcenc_context* codec_context;
1965 
1966 
1967 	// Preflight - check we have valid extensions for storing a file
1968 	if (operation & ASTCENC_STAGE_ST_NCOMP)
1969 	{
1970 		int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
1971 		if (bitness < 0)
1972 		{
1973 			const char *eptr = strrchr(output_filename.c_str(), '.');
1974 			eptr = eptr ? eptr : "";
1975 			printf("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
1976 			return 1;
1977 		}
1978 	}
1979 
1980 	if (operation & ASTCENC_STAGE_ST_COMP)
1981 	{
1982 #if defined(_WIN32)
1983 		bool is_null = output_filename == "NUL" || output_filename == "nul";
1984 #else
1985 		bool is_null = output_filename == "/dev/null";
1986 #endif
1987 
1988 		if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
1989 		{
1990 			const char *eptr = strrchr(output_filename.c_str(), '.');
1991 			eptr = eptr ? eptr : "";
1992 			printf("ERROR: Unknown compressed output file type '%s'\n", eptr);
1993 			return 1;
1994 		}
1995 	}
1996 
1997 	codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
1998 	if (codec_status != ASTCENC_SUCCESS)
1999 	{
2000 		printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
2001 		return 1;
2002 	}
2003 
2004 	// Load the uncompressed input file if needed
2005 	if (operation & ASTCENC_STAGE_LD_NCOMP)
2006 	{
2007 		image_uncomp_in = load_uncomp_file(
2008 		    input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
2009 		    image_uncomp_in_is_hdr, image_uncomp_in_component_count);
2010 		if (!image_uncomp_in)
2011 		{
2012 			printf ("ERROR: Failed to load uncompressed image file\n");
2013 			return 1;
2014 		}
2015 
2016 
2017 		if (preprocess != ASTCENC_PP_NONE)
2018 		{
2019 			// Allocate a float image so we can avoid additional quantization,
2020 			// as e.g. premultiplication can result in fractional color values
2021 			astcenc_image* image_pp = alloc_image(32,
2022 			                                      image_uncomp_in->dim_x,
2023 			                                      image_uncomp_in->dim_y,
2024 			                                      image_uncomp_in->dim_z);
2025 			if (!image_pp)
2026 			{
2027 				printf ("ERROR: Failed to allocate preprocessed image\n");
2028 				return 1;
2029 			}
2030 
2031 			if (preprocess == ASTCENC_PP_NORMALIZE)
2032 			{
2033 				image_preprocess_normalize(*image_uncomp_in, *image_pp);
2034 			}
2035 
2036 			if (preprocess == ASTCENC_PP_PREMULTIPLY)
2037 			{
2038 				image_preprocess_premultiply(*image_uncomp_in, *image_pp,
2039 				                             config.profile);
2040 			}
2041 
2042 			// Delete the original as we no longer need it
2043 			free_image(image_uncomp_in);
2044 			image_uncomp_in = image_pp;
2045 		}
2046 
2047 		if (!cli_config.silentmode)
2048 		{
2049 			printf("Source image\n");
2050 			printf("============\n\n");
2051 			printf("    Source:                     %s\n", input_filename.c_str());
2052 			printf("    Color profile:              %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
2053 			if (image_uncomp_in->dim_z > 1)
2054 			{
2055 				printf("    Dimensions:                 3D, %ux%ux%u\n",
2056 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
2057 			}
2058 			else
2059 			{
2060 				printf("    Dimensions:                 2D, %ux%u\n",
2061 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y);
2062 			}
2063 			printf("    Components:                 %d\n\n", image_uncomp_in_component_count);
2064 		}
2065 	}
2066 
2067 	double image_size = 0.0;
2068 	if (image_uncomp_in)
2069 	{
2070 		image_size = static_cast<double>(image_uncomp_in->dim_x) *
2071 		             static_cast<double>(image_uncomp_in->dim_y) *
2072 		             static_cast<double>(image_uncomp_in->dim_z);
2073 	}
2074 	else
2075 	{
2076 		image_size = static_cast<double>(image_comp.dim_x) *
2077 		             static_cast<double>(image_comp.dim_y) *
2078 		             static_cast<double>(image_comp.dim_z);
2079 	}
2080 
2081 	// Compress an image
2082 	double best_compression_time = 100000.0;
2083 	double total_compression_time = 0.0;
2084 	if (operation & ASTCENC_STAGE_COMPRESS)
2085 	{
2086 		print_astcenc_config(cli_config, config);
2087 
2088 		unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
2089 		unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
2090 		unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
2091 		size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
2092 		uint8_t* buffer = new uint8_t[buffer_size];
2093 
2094 		compression_workload work;
2095 		work.context = codec_context;
2096 		work.image = image_uncomp_in;
2097 		work.swizzle = cli_config.swz_encode;
2098 		work.data_out = buffer;
2099 		work.data_len = buffer_size;
2100 		work.error = ASTCENC_SUCCESS;
2101 
2102 		// Only launch worker threads for multi-threaded use - it makes basic
2103 		// single-threaded profiling and debugging a little less convoluted
2104 		double start_compression_time = get_time();
2105 		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2106 		{
2107 			double start_iter_time = get_time();
2108 			if (cli_config.thread_count > 1)
2109 			{
2110 				launch_threads(cli_config.thread_count, compression_workload_runner, &work);
2111 			}
2112 			else
2113 			{
2114 				work.error = astcenc_compress_image(
2115 					work.context, work.image, &work.swizzle,
2116 					work.data_out, work.data_len, 0);
2117 			}
2118 
2119 			astcenc_compress_reset(codec_context);
2120 
2121 			double iter_time = get_time() - start_iter_time;
2122 			best_compression_time = astc::min(iter_time, best_compression_time);
2123 		}
2124 		total_compression_time = get_time() - start_compression_time;
2125 
2126 		if (work.error != ASTCENC_SUCCESS)
2127 		{
2128 			printf("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
2129 			return 1;
2130 		}
2131 
2132 		image_comp.block_x = config.block_x;
2133 		image_comp.block_y = config.block_y;
2134 		image_comp.block_z = config.block_z;
2135 		image_comp.dim_x = image_uncomp_in->dim_x;
2136 		image_comp.dim_y = image_uncomp_in->dim_y;
2137 		image_comp.dim_z = image_uncomp_in->dim_z;
2138 		image_comp.data = buffer;
2139 		image_comp.data_len = buffer_size;
2140 	}
2141 
2142 	// Decompress an image
2143 	double best_decompression_time = 100000.0;
2144 	double total_decompression_time = 0.0;
2145 	if (operation & ASTCENC_STAGE_DECOMPRESS)
2146 	{
2147 		int out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2148 		if (out_bitness == 0)
2149 		{
2150 			bool is_hdr = (config.profile == ASTCENC_PRF_HDR) || (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
2151 			out_bitness = is_hdr ? 16 : 8;
2152 		}
2153 
2154 		image_decomp_out = alloc_image(
2155 		    out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
2156 
2157 		decompression_workload work;
2158 		work.context = codec_context;
2159 		work.data = image_comp.data;
2160 		work.data_len = image_comp.data_len;
2161 		work.image_out = image_decomp_out;
2162 		work.swizzle = cli_config.swz_decode;
2163 		work.error = ASTCENC_SUCCESS;
2164 
2165 		// Only launch worker threads for multi-threaded use - it makes basic
2166 		// single-threaded profiling and debugging a little less convoluted
2167 		double start_decompression_time = get_time();
2168 		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2169 		{
2170 			double start_iter_time = get_time();
2171 			if (cli_config.thread_count > 1)
2172 			{
2173 				launch_threads(cli_config.thread_count, decompression_workload_runner, &work);
2174 			}
2175 			else
2176 			{
2177 				work.error = astcenc_decompress_image(
2178 				    work.context, work.data, work.data_len,
2179 				    work.image_out, &work.swizzle, 0);
2180 			}
2181 
2182 			astcenc_decompress_reset(codec_context);
2183 
2184 			double iter_time = get_time() - start_iter_time;
2185 			best_decompression_time = astc::min(iter_time, best_decompression_time);
2186 		}
2187 		total_decompression_time = get_time() - start_decompression_time;
2188 
2189 		if (work.error != ASTCENC_SUCCESS)
2190 		{
2191 			printf("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
2192 			return 1;
2193 		}
2194 	}
2195 
2196 #if defined(_WIN32)
2197 	bool is_null = output_filename == "NUL" || output_filename == "nul";
2198 #else
2199 	bool is_null = output_filename == "/dev/null";
2200 #endif
2201 
2202    // Print metrics in comparison mode
2203 	if (operation & ASTCENC_STAGE_COMPARE)
2204 	{
2205 		bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
2206 
2207 		compute_error_metrics(
2208 		    image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
2209 		    image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
2210 	}
2211 
2212 	// Store compressed image
2213 	if (operation & ASTCENC_STAGE_ST_COMP)
2214 	{
2215 		if (ends_with(output_filename, ".astc"))
2216 		{
2217 			error = store_cimage(image_comp, output_filename.c_str());
2218 			if (error)
2219 			{
2220 				printf ("ERROR: Failed to store compressed image\n");
2221 				return 1;
2222 			}
2223 		}
2224 		else if (ends_with(output_filename, ".ktx"))
2225 		{
2226 			bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
2227 			error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
2228 			if (error)
2229 			{
2230 				printf ("ERROR: Failed to store compressed image\n");
2231 				return 1;
2232 			}
2233 		}
2234 		else
2235 		{
2236 			if (!is_null)
2237 			{
2238 				printf("ERROR: Unknown compressed output file type\n");
2239 				return 1;
2240 			}
2241 		}
2242 	}
2243 
2244 	// Store decompressed image
2245 	if (operation & ASTCENC_STAGE_ST_NCOMP)
2246 	{
2247 		if (!is_null)
2248 		{
2249 			bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
2250 			                                  cli_config.y_flip);
2251 			if (!store_result)
2252 			{
2253 				printf("ERROR: Failed to write output image %s\n", output_filename.c_str());
2254 				return 1;
2255 			}
2256 		}
2257 	}
2258 
2259 	// Store diagnostic images
2260 	if (cli_config.diagnostic_images && !is_null)
2261 	{
2262 		print_diagnostic_images(codec_context, image_comp, output_filename);
2263 	}
2264 
2265 	free_image(image_uncomp_in);
2266 	free_image(image_decomp_out);
2267 	astcenc_context_free(codec_context);
2268 
2269 	delete[] image_comp.data;
2270 
2271 	if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
2272 	{
2273 		double end_time = get_time();
2274 
2275 		double repeats = static_cast<double>(cli_config.repeat_count);
2276 		double avg_compression_time = total_compression_time / repeats;
2277 		double avg_decompression_time = total_decompression_time / repeats;
2278 		double total_time = (end_time - start_time) - ((repeats - 1.0) * avg_compression_time)  - ((repeats - 1.0) * avg_decompression_time);
2279 
2280 		printf("Performance metrics\n");
2281 		printf("===================\n\n");
2282 		printf("    Total time:                %8.4f s\n", total_time);
2283 
2284 		if (operation & ASTCENC_STAGE_COMPRESS)
2285 		{
2286 			double compression_rate = image_size / (best_compression_time * 1000000.0);
2287 
2288 			printf("    Coding time:               %8.4f s\n", best_compression_time);
2289 			printf("    Coding rate:               %8.4f MT/s\n", compression_rate);
2290 		}
2291 
2292 		if (operation & ASTCENC_STAGE_DECOMPRESS)
2293 		{
2294 			double decompression_rate = image_size / (best_decompression_time * 1000000.0);
2295 			printf("    Decoding time:             %8.4f s\n", best_decompression_time);
2296 			printf("    Decoding rate:             %8.4f MT/s\n", decompression_rate);
2297 		}
2298 	}
2299 
2300 	return 0;
2301 }
2302