1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions for codec library front-end.
20 */
21
22 #include "astcenc.h"
23 #include "astcenccli_internal.h"
24
25 #include <cassert>
26 #include <cstring>
27 #include <functional>
28 #include <string>
29 #include <sstream>
30 #include <vector>
31 #include <memory>
32
33 /* ============================================================================
34 Data structure definitions
35 ============================================================================ */
36
37 typedef unsigned int astcenc_operation;
38
39 struct mode_entry
40 {
41 const char* opt;
42 astcenc_operation operation;
43 astcenc_profile decode_mode;
44 };
45
46 /* ============================================================================
47 Constants and literals
48 ============================================================================ */
49
50 /** @brief Stage bit indicating we need to load a compressed image. */
51 static const unsigned int ASTCENC_STAGE_LD_COMP = 1 << 0;
52
53 /** @brief Stage bit indicating we need to store a compressed image. */
54 static const unsigned int ASTCENC_STAGE_ST_COMP = 1 << 1;
55
56 /** @brief Stage bit indicating we need to load an uncompressed image. */
57 static const unsigned int ASTCENC_STAGE_LD_NCOMP = 1 << 2;
58
59 /** @brief Stage bit indicating we need to store an uncompressed image. */
60 static const unsigned int ASTCENC_STAGE_ST_NCOMP = 1 << 3;
61
62 /** @brief Stage bit indicating we need compress an image. */
63 static const unsigned int ASTCENC_STAGE_COMPRESS = 1 << 4;
64
65 /** @brief Stage bit indicating we need to decompress an image. */
66 static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
67
68 /** @brief Stage bit indicating we need to compare an image with the original input. */
69 static const unsigned int ASTCENC_STAGE_COMPARE = 1 << 6;
70
71 /** @brief Operation indicating an unknown request (should never happen). */
72 static const astcenc_operation ASTCENC_OP_UNKNOWN = 0;
73
74 /** @brief Operation indicating the user wants to print long-form help text and version info. */
75 static const astcenc_operation ASTCENC_OP_HELP = 1 << 7;
76
77 /** @brief Operation indicating the user wants to print short-form help text and version info. */
78 static const astcenc_operation ASTCENC_OP_VERSION = 1 << 8;
79
80 /** @brief Operation indicating the user wants to compress and store an image. */
81 static const astcenc_operation ASTCENC_OP_COMPRESS =
82 ASTCENC_STAGE_LD_NCOMP |
83 ASTCENC_STAGE_COMPRESS |
84 ASTCENC_STAGE_ST_COMP;
85
86 /** @brief Operation indicating the user wants to decompress and store an image. */
87 static const astcenc_operation ASTCENC_OP_DECOMPRESS =
88 ASTCENC_STAGE_LD_COMP |
89 ASTCENC_STAGE_DECOMPRESS |
90 ASTCENC_STAGE_ST_NCOMP;
91
92 /** @brief Operation indicating the user wants to test a compression setting on an image. */
93 static const astcenc_operation ASTCENC_OP_TEST =
94 ASTCENC_STAGE_LD_NCOMP |
95 ASTCENC_STAGE_COMPRESS |
96 ASTCENC_STAGE_DECOMPRESS |
97 ASTCENC_STAGE_COMPARE |
98 ASTCENC_STAGE_ST_NCOMP;
99
100 /**
101 * @brief Image preprocesing tasks prior to encoding.
102 */
103 enum astcenc_preprocess
104 {
105 /** @brief No image preprocessing. */
106 ASTCENC_PP_NONE = 0,
107 /** @brief Normal vector unit-length normalization. */
108 ASTCENC_PP_NORMALIZE,
109 /** @brief Color data alpha premultiplication. */
110 ASTCENC_PP_PREMULTIPLY
111 };
112
113 /** @brief Decode table for command line operation modes. */
114 static const mode_entry modes[] {
115 {"-cl", ASTCENC_OP_COMPRESS, ASTCENC_PRF_LDR},
116 {"-dl", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
117 {"-tl", ASTCENC_OP_TEST, ASTCENC_PRF_LDR},
118 {"-cs", ASTCENC_OP_COMPRESS, ASTCENC_PRF_LDR_SRGB},
119 {"-ds", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
120 {"-ts", ASTCENC_OP_TEST, ASTCENC_PRF_LDR_SRGB},
121 {"-ch", ASTCENC_OP_COMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
122 {"-dh", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
123 {"-th", ASTCENC_OP_TEST, ASTCENC_PRF_HDR_RGB_LDR_A},
124 {"-cH", ASTCENC_OP_COMPRESS, ASTCENC_PRF_HDR},
125 {"-dH", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
126 {"-tH", ASTCENC_OP_TEST, ASTCENC_PRF_HDR},
127 {"-h", ASTCENC_OP_HELP, ASTCENC_PRF_HDR},
128 {"-help", ASTCENC_OP_HELP, ASTCENC_PRF_HDR},
129 {"-v", ASTCENC_OP_VERSION, ASTCENC_PRF_HDR},
130 {"-version", ASTCENC_OP_VERSION, ASTCENC_PRF_HDR}
131 };
132
133 /**
134 * @brief Compression workload definition for worker threads.
135 */
136 struct compression_workload
137 {
138 astcenc_context* context;
139 astcenc_image* image;
140 astcenc_swizzle swizzle;
141 uint8_t* data_out;
142 size_t data_len;
143 astcenc_error error;
144 };
145
146 /**
147 * @brief Decompression workload definition for worker threads.
148 */
149 struct decompression_workload
150 {
151 astcenc_context* context;
152 uint8_t* data;
153 size_t data_len;
154 astcenc_image* image_out;
155 astcenc_swizzle swizzle;
156 astcenc_error error;
157 };
158
159 /**
160 * @brief Test if a string argument is a well formed float.
161 */
is_float(std::string target)162 static bool is_float(
163 std::string target
164 ) {
165 float test;
166 std::istringstream stream(target);
167
168 // Leading whitespace is an error
169 stream >> std::noskipws >> test;
170
171 // Ensure entire no remaining string in addition to parse failure
172 return stream.eof() && !stream.fail();
173 }
174
175 /**
176 * @brief Test if a string ends with a given suffix.
177 */
ends_with(const std::string & str,const std::string & suffix)178 static bool ends_with(
179 const std::string& str,
180 const std::string& suffix
181 ) {
182 return (str.size() >= suffix.size()) &&
183 (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
184 }
185
186 /**
187 * @brief Runner callback function for a compression worker thread.
188 *
189 * @param thread_count The number of threads in the worker pool.
190 * @param thread_id The index of this thread in the worker pool.
191 * @param payload The parameters for this thread.
192 */
compression_workload_runner(int thread_count,int thread_id,void * payload)193 static void compression_workload_runner(
194 int thread_count,
195 int thread_id,
196 void* payload
197 ) {
198 (void)thread_count;
199
200 compression_workload* work = static_cast<compression_workload*>(payload);
201 astcenc_error error = astcenc_compress_image(
202 work->context, work->image, &work->swizzle,
203 work->data_out, work->data_len, thread_id);
204
205 // This is a racy update, so which error gets returned is a random, but it
206 // will reliably report an error if an error occurs
207 if (error != ASTCENC_SUCCESS)
208 {
209 work->error = error;
210 }
211 }
212
213 /**
214 * @brief Runner callback function for a decompression worker thread.
215 *
216 * @param thread_count The number of threads in the worker pool.
217 * @param thread_id The index of this thread in the worker pool.
218 * @param payload The parameters for this thread.
219 */
decompression_workload_runner(int thread_count,int thread_id,void * payload)220 static void decompression_workload_runner(
221 int thread_count,
222 int thread_id,
223 void* payload
224 ) {
225 (void)thread_count;
226
227 decompression_workload* work = static_cast<decompression_workload*>(payload);
228 astcenc_error error = astcenc_decompress_image(
229 work->context, work->data, work->data_len,
230 work->image_out, &work->swizzle, thread_id);
231
232 // This is a racy update, so which error gets returned is a random, but it
233 // will reliably report an error if an error occurs
234 if (error != ASTCENC_SUCCESS)
235 {
236 work->error = error;
237 }
238 }
239
240 /**
241 * @brief Utility to generate a slice file name from a pattern.
242 *
243 * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
244 *
245 * @param basename The base pattern; must contain a file extension.
246 * @param index The slice index.
247 * @param error Set to true on success, false on error (no extension found).
248 *
249 * @return The slice file name.
250 */
get_slice_filename(const std::string & basename,unsigned int index,bool & error)251 static std::string get_slice_filename(
252 const std::string& basename,
253 unsigned int index,
254 bool& error
255 ) {
256 size_t sep = basename.find_last_of('.');
257 if (sep == std::string::npos)
258 {
259 error = true;
260 return "";
261 }
262
263 std::string base = basename.substr(0, sep);
264 std::string ext = basename.substr(sep);
265 std::string name = base + "_" + std::to_string(index) + ext;
266 error = false;
267 return name;
268 }
269
270 /**
271 * @brief Load a non-astc image file from memory.
272 *
273 * @param filename The file to load, or a pattern for array loads.
274 * @param dim_z The number of slices to load.
275 * @param y_flip Should this image be Y flipped?
276 * @param[out] is_hdr Is the loaded image HDR?
277 * @param[out] component_count The number of components in the loaded image.
278 *
279 * @return The astc image file, or nullptr on error.
280 */
load_uncomp_file(const char * filename,unsigned int dim_z,bool y_flip,bool & is_hdr,unsigned int & component_count)281 static astcenc_image* load_uncomp_file(
282 const char* filename,
283 unsigned int dim_z,
284 bool y_flip,
285 bool& is_hdr,
286 unsigned int& component_count
287 ) {
288 astcenc_image *image = nullptr;
289
290 // For a 2D image just load the image directly
291 if (dim_z == 1)
292 {
293 image = load_ncimage(filename, y_flip, is_hdr, component_count);
294 }
295 else
296 {
297 bool slice_is_hdr;
298 unsigned int slice_component_count;
299 astcenc_image* slice = nullptr;
300 std::vector<astcenc_image*> slices;
301
302 // For a 3D image load an array of slices
303 for (unsigned int image_index = 0; image_index < dim_z; image_index++)
304 {
305 bool error;
306 std::string slice_name = get_slice_filename(filename, image_index, error);
307 if (error)
308 {
309 printf("ERROR: Image pattern does not contain file extension: %s\n", filename);
310 break;
311 }
312
313 slice = load_ncimage(slice_name.c_str(), y_flip,
314 slice_is_hdr, slice_component_count);
315 if (!slice)
316 {
317 break;
318 }
319
320 slices.push_back(slice);
321
322 // Check it is not a 3D image
323 if (slice->dim_z != 1)
324 {
325 printf("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
326 break;
327 }
328
329 // Check slices are consistent with each other
330 if (image_index != 0)
331 {
332 if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
333 {
334 printf("ERROR: Image array[0] and [%d] are different formats\n", image_index);
335 break;
336 }
337
338 if ((slices[0]->dim_x != slice->dim_x) ||
339 (slices[0]->dim_y != slice->dim_y) ||
340 (slices[0]->dim_z != slice->dim_z))
341 {
342 printf("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
343 break;
344 }
345 }
346 else
347 {
348 is_hdr = slice_is_hdr;
349 component_count = slice_component_count;
350 }
351 }
352
353 // If all slices loaded correctly then repack them into a single image
354 if (slices.size() == dim_z)
355 {
356 unsigned int dim_x = slices[0]->dim_x;
357 unsigned int dim_y = slices[0]->dim_y;
358 int bitness = is_hdr ? 16 : 8;
359 int slice_size = dim_x * dim_y;
360
361 image = alloc_image(bitness, dim_x, dim_y, dim_z);
362
363 // Combine 2D source images into one 3D image
364 for (unsigned int z = 0; z < dim_z; z++)
365 {
366 if (image->data_type == ASTCENC_TYPE_U8)
367 {
368 uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
369 uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
370 size_t copy_size = slice_size * 4 * sizeof(uint8_t);
371 memcpy(data8, data8src, copy_size);
372 }
373 else if (image->data_type == ASTCENC_TYPE_F16)
374 {
375 uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
376 uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
377 size_t copy_size = slice_size * 4 * sizeof(uint16_t);
378 memcpy(data16, data16src, copy_size);
379 }
380 else // if (image->data_type == ASTCENC_TYPE_F32)
381 {
382 assert(image->data_type == ASTCENC_TYPE_F32);
383 float* data32 = static_cast<float*>(image->data[z]);
384 float* data32src = static_cast<float*>(slices[z]->data[0]);
385 size_t copy_size = slice_size * 4 * sizeof(float);
386 memcpy(data32, data32src, copy_size);
387 }
388 }
389 }
390
391 for (auto &i : slices)
392 {
393 free_image(i);
394 }
395 }
396
397 return image;
398 }
399
400 /**
401 * @brief Parse the command line.
402 *
403 * @param argc Command line argument count.
404 * @param[in] argv Command line argument vector.
405 * @param[out] operation Codec operation mode.
406 * @param[out] profile Codec color profile.
407 *
408 * @return 0 if everything is okay, 1 if there is some error
409 */
parse_commandline_options(int argc,char ** argv,astcenc_operation & operation,astcenc_profile & profile)410 static int parse_commandline_options(
411 int argc,
412 char **argv,
413 astcenc_operation& operation,
414 astcenc_profile& profile
415 ) {
416 assert(argc >= 2); (void)argc;
417
418 profile = ASTCENC_PRF_LDR;
419 operation = ASTCENC_OP_UNKNOWN;
420
421 int modes_count = sizeof(modes) / sizeof(modes[0]);
422 for (int i = 0; i < modes_count; i++)
423 {
424 if (!strcmp(modes[i].opt, argv[1]))
425 {
426 operation = modes[i].operation;
427 profile = modes[i].decode_mode;
428 break;
429 }
430 }
431
432 if (operation == ASTCENC_OP_UNKNOWN)
433 {
434 printf("ERROR: Unrecognized operation '%s'\n", argv[1]);
435 return 1;
436 }
437
438 return 0;
439 }
440
441 /**
442 * @brief Initialize the astcenc_config
443 *
444 * @param argc Command line argument count.
445 * @param[in] argv Command line argument vector.
446 * @param operation Codec operation mode.
447 * @param[out] profile Codec color profile.
448 * @param comp_image Compressed image if a decompress operation.
449 * @param[out] preprocess Image preprocess operation.
450 * @param[out] config Codec configuration.
451 *
452 * @return 0 if everything is okay, 1 if there is some error
453 */
init_astcenc_config(int argc,char ** argv,astcenc_profile profile,astcenc_operation operation,astc_compressed_image & comp_image,astcenc_preprocess & preprocess,astcenc_config & config)454 static int init_astcenc_config(
455 int argc,
456 char **argv,
457 astcenc_profile profile,
458 astcenc_operation operation,
459 astc_compressed_image& comp_image,
460 astcenc_preprocess& preprocess,
461 astcenc_config& config
462 ) {
463 unsigned int block_x = 0;
464 unsigned int block_y = 0;
465 unsigned int block_z = 1;
466
467 // For decode the block size is set by the incoming image.
468 if (operation == ASTCENC_OP_DECOMPRESS)
469 {
470 block_x = comp_image.block_x;
471 block_y = comp_image.block_y;
472 block_z = comp_image.block_z;
473 }
474
475 float quality = 0.0f;
476 preprocess = ASTCENC_PP_NONE;
477
478 // parse the command line's encoding options.
479 int argidx = 4;
480 if (operation & ASTCENC_STAGE_COMPRESS)
481 {
482 // Read and decode block size
483 if (argc < 5)
484 {
485 printf("ERROR: Block size must be specified\n");
486 return 1;
487 }
488
489 int cnt2D, cnt3D;
490 int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
491 &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
492 // Character after the last match should be a NUL
493 if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
494 {
495 printf("ERROR: Block size '%s' is invalid\n", argv[4]);
496 return 1;
497 }
498
499 // Read and decode search quality
500 if (argc < 6)
501 {
502 printf("ERROR: Search quality level must be specified\n");
503 return 1;
504 }
505
506 if (!strcmp(argv[5], "-fastest"))
507 {
508 quality = ASTCENC_PRE_FASTEST;
509 }
510 else if (!strcmp(argv[5], "-fast"))
511 {
512 quality = ASTCENC_PRE_FAST;
513 }
514 else if (!strcmp(argv[5], "-medium"))
515 {
516 quality = ASTCENC_PRE_MEDIUM;
517 }
518 else if (!strcmp(argv[5], "-thorough"))
519 {
520 quality = ASTCENC_PRE_THOROUGH;
521 }
522 else if (!strcmp(argv[5], "-verythorough"))
523 {
524 quality = ASTCENC_PRE_VERYTHOROUGH;
525 }
526 else if (!strcmp(argv[5], "-exhaustive"))
527 {
528 quality = ASTCENC_PRE_EXHAUSTIVE;
529 }
530 else if (is_float(argv[5]))
531 {
532 quality = static_cast<float>(atof(argv[5]));
533 }
534 else
535 {
536 printf("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
537 return 1;
538 }
539
540 argidx = 6;
541 }
542
543 unsigned int flags = 0;
544
545 // Gather the flags that we need
546 while (argidx < argc)
547 {
548 if (!strcmp(argv[argidx], "-a"))
549 {
550 // Skip over the data value for now
551 argidx++;
552 flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
553 }
554 else if (!strcmp(argv[argidx], "-mask"))
555 {
556 flags |= ASTCENC_FLG_MAP_MASK;
557 }
558 else if (!strcmp(argv[argidx], "-normal"))
559 {
560 flags |= ASTCENC_FLG_MAP_NORMAL;
561 }
562 else if (!strcmp(argv[argidx], "-rgbm"))
563 {
564 // Skip over the data value for now
565 argidx++;
566 flags |= ASTCENC_FLG_MAP_RGBM;
567 }
568 else if (!strcmp(argv[argidx], "-perceptual"))
569 {
570 flags |= ASTCENC_FLG_USE_PERCEPTUAL;
571 }
572 else if (!strcmp(argv[argidx], "-pp-normalize"))
573 {
574 if (preprocess != ASTCENC_PP_NONE)
575 {
576 printf("ERROR: Only a single image preprocess can be used\n");
577 return 1;
578 }
579 preprocess = ASTCENC_PP_NORMALIZE;
580 }
581 else if (!strcmp(argv[argidx], "-pp-premultiply"))
582 {
583 if (preprocess != ASTCENC_PP_NONE)
584 {
585 printf("ERROR: Only a single image preprocess can be used\n");
586 return 1;
587 }
588 preprocess = ASTCENC_PP_PREMULTIPLY;
589 }
590 argidx ++;
591 }
592
593 #if defined(ASTCENC_DECOMPRESS_ONLY)
594 flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
595 #else
596 // Decompression can skip some memory allocation, but need full tables
597 if (operation == ASTCENC_OP_DECOMPRESS)
598 {
599 flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
600 }
601 // Compression and test passes can skip some decimation initialization
602 // as we know we are decompressing images that were compressed using the
603 // same settings and heuristics ...
604 else
605 {
606 flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
607 }
608 #endif
609
610 astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
611 quality, flags, &config);
612 if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
613 {
614 printf("ERROR: Block size '%s' is invalid\n", argv[4]);
615 return 1;
616 }
617 else if (status == ASTCENC_ERR_BAD_CPU_ISA)
618 {
619 printf("ERROR: Required SIMD ISA support missing on this CPU\n");
620 return 1;
621 }
622 else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
623 {
624 printf("ERROR: astcenc must not be compiled with -ffast-math\n");
625 return 1;
626 }
627 else if (status != ASTCENC_SUCCESS)
628 {
629 printf("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
630 return 1;
631 }
632
633 return 0;
634 }
635
636 /**
637 * @brief Edit the astcenc_config
638 *
639 * @param argc Command line argument count.
640 * @param[in] argv Command line argument vector.
641 * @param operation Codec operation.
642 * @param[out] cli_config Command line config.
643 * @param[in,out] config Codec configuration.
644 *
645 * @return 0 if everything is OK, 1 if there is some error
646 */
edit_astcenc_config(int argc,char ** argv,const astcenc_operation operation,cli_config_options & cli_config,astcenc_config & config)647 static int edit_astcenc_config(
648 int argc,
649 char **argv,
650 const astcenc_operation operation,
651 cli_config_options& cli_config,
652 astcenc_config& config
653 ) {
654
655 int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
656
657 while (argidx < argc)
658 {
659 if (!strcmp(argv[argidx], "-silent"))
660 {
661 argidx++;
662 cli_config.silentmode = 1;
663 }
664 else if (!strcmp(argv[argidx], "-cw"))
665 {
666 argidx += 5;
667 if (argidx > argc)
668 {
669 printf("ERROR: -cw switch with less than 4 arguments\n");
670 return 1;
671 }
672
673 config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
674 config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
675 config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
676 config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
677 }
678 else if (!strcmp(argv[argidx], "-a"))
679 {
680 argidx += 2;
681 if (argidx > argc)
682 {
683 printf("ERROR: -a switch with no argument\n");
684 return 1;
685 }
686
687 config.a_scale_radius = atoi(argv[argidx - 1]);
688 }
689 else if (!strcmp(argv[argidx], "-esw"))
690 {
691 argidx += 2;
692 if (argidx > argc)
693 {
694 printf("ERROR: -esw switch with no argument\n");
695 return 1;
696 }
697
698 if (strlen(argv[argidx - 1]) != 4)
699 {
700 printf("ERROR: -esw pattern does not contain 4 characters\n");
701 return 1;
702 }
703
704 astcenc_swz swizzle_components[4];
705 for (int i = 0; i < 4; i++)
706 {
707 switch (argv[argidx - 1][i])
708 {
709 case 'r':
710 swizzle_components[i] = ASTCENC_SWZ_R;
711 break;
712 case 'g':
713 swizzle_components[i] = ASTCENC_SWZ_G;
714 break;
715 case 'b':
716 swizzle_components[i] = ASTCENC_SWZ_B;
717 break;
718 case 'a':
719 swizzle_components[i] = ASTCENC_SWZ_A;
720 break;
721 case '0':
722 swizzle_components[i] = ASTCENC_SWZ_0;
723 break;
724 case '1':
725 swizzle_components[i] = ASTCENC_SWZ_1;
726 break;
727 default:
728 printf("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
729 return 1;
730 }
731 }
732
733 cli_config.swz_encode.r = swizzle_components[0];
734 cli_config.swz_encode.g = swizzle_components[1];
735 cli_config.swz_encode.b = swizzle_components[2];
736 cli_config.swz_encode.a = swizzle_components[3];
737 }
738 else if (!strcmp(argv[argidx], "-ssw"))
739 {
740 argidx += 2;
741 if (argidx > argc)
742 {
743 printf("ERROR: -ssw switch with no argument\n");
744 return 1;
745 }
746
747 size_t char_count = strlen(argv[argidx - 1]);
748 if (char_count == 0)
749 {
750 printf("ERROR: -ssw pattern contains no characters\n");
751 return 1;
752 }
753
754 if (char_count > 4)
755 {
756 printf("ERROR: -ssw pattern contains more than 4 characters\n");
757 return 1;
758 }
759
760 bool found_r = false;
761 bool found_g = false;
762 bool found_b = false;
763 bool found_a = false;
764
765 for (size_t i = 0; i < char_count; i++)
766 {
767 switch (argv[argidx - 1][i])
768 {
769 case 'r':
770 found_r = true;
771 break;
772 case 'g':
773 found_g = true;
774 break;
775 case 'b':
776 found_b = true;
777 break;
778 case 'a':
779 found_a = true;
780 break;
781 default:
782 printf("ERROR: -ssw component '%c' is not valid\n", argv[argidx - 1][i]);
783 return 1;
784 }
785 }
786
787 config.cw_r_weight = found_r ? 1.0f : 0.0f;
788 config.cw_g_weight = found_g ? 1.0f : 0.0f;
789 config.cw_b_weight = found_b ? 1.0f : 0.0f;
790 config.cw_a_weight = found_a ? 1.0f : 0.0f;
791 }
792 else if (!strcmp(argv[argidx], "-dsw"))
793 {
794 argidx += 2;
795 if (argidx > argc)
796 {
797 printf("ERROR: -dsw switch with no argument\n");
798 return 1;
799 }
800
801 if (strlen(argv[argidx - 1]) != 4)
802 {
803 printf("ERROR: -dsw switch does not contain 4 characters\n");
804 return 1;
805 }
806
807 astcenc_swz swizzle_components[4];
808 for (int i = 0; i < 4; i++)
809 {
810 switch (argv[argidx - 1][i])
811 {
812 case 'r':
813 swizzle_components[i] = ASTCENC_SWZ_R;
814 break;
815 case 'g':
816 swizzle_components[i] = ASTCENC_SWZ_G;
817 break;
818 case 'b':
819 swizzle_components[i] = ASTCENC_SWZ_B;
820 break;
821 case 'a':
822 swizzle_components[i] = ASTCENC_SWZ_A;
823 break;
824 case '0':
825 swizzle_components[i] = ASTCENC_SWZ_0;
826 break;
827 case '1':
828 swizzle_components[i] = ASTCENC_SWZ_1;
829 break;
830 case 'z':
831 swizzle_components[i] = ASTCENC_SWZ_Z;
832 break;
833 default:
834 printf("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
835 return 1;
836 }
837 }
838
839 cli_config.swz_decode.r = swizzle_components[0];
840 cli_config.swz_decode.g = swizzle_components[1];
841 cli_config.swz_decode.b = swizzle_components[2];
842 cli_config.swz_decode.a = swizzle_components[3];
843 }
844 // presets begin here
845 else if (!strcmp(argv[argidx], "-mask"))
846 {
847 argidx++;
848 }
849 else if (!strcmp(argv[argidx], "-normal"))
850 {
851 argidx++;
852
853 cli_config.swz_encode.r = ASTCENC_SWZ_R;
854 cli_config.swz_encode.g = ASTCENC_SWZ_R;
855 cli_config.swz_encode.b = ASTCENC_SWZ_R;
856 cli_config.swz_encode.a = ASTCENC_SWZ_G;
857
858 cli_config.swz_decode.r = ASTCENC_SWZ_R;
859 cli_config.swz_decode.g = ASTCENC_SWZ_A;
860 cli_config.swz_decode.b = ASTCENC_SWZ_Z;
861 cli_config.swz_decode.a = ASTCENC_SWZ_1;
862 }
863 else if (!strcmp(argv[argidx], "-rgbm"))
864 {
865 argidx += 2;
866 if (argidx > argc)
867 {
868 printf("ERROR: -rgbm switch with no argument\n");
869 return 1;
870 }
871
872 config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
873 config.cw_a_weight = 2.0f * config.rgbm_m_scale;
874 }
875 else if (!strcmp(argv[argidx], "-perceptual"))
876 {
877 argidx++;
878 }
879 else if (!strcmp(argv[argidx], "-pp-normalize"))
880 {
881 argidx++;
882 }
883 else if (!strcmp(argv[argidx], "-pp-premultiply"))
884 {
885 argidx++;
886 }
887 else if (!strcmp(argv[argidx], "-blockmodelimit"))
888 {
889 argidx += 2;
890 if (argidx > argc)
891 {
892 printf("ERROR: -blockmodelimit switch with no argument\n");
893 return 1;
894 }
895
896 config.tune_block_mode_limit = atoi(argv[argidx - 1]);
897 }
898 else if (!strcmp(argv[argidx], "-partitioncountlimit"))
899 {
900 argidx += 2;
901 if (argidx > argc)
902 {
903 printf("ERROR: -partitioncountlimit switch with no argument\n");
904 return 1;
905 }
906
907 config.tune_partition_count_limit = atoi(argv[argidx - 1]);
908 }
909 else if (!strcmp(argv[argidx], "-2partitionindexlimit"))
910 {
911 argidx += 2;
912 if (argidx > argc)
913 {
914 printf("ERROR: -2partitionindexlimit switch with no argument\n");
915 return 1;
916 }
917
918 config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
919 }
920 else if (!strcmp(argv[argidx], "-3partitionindexlimit"))
921 {
922 argidx += 2;
923 if (argidx > argc)
924 {
925 printf("ERROR: -3partitionindexlimit switch with no argument\n");
926 return 1;
927 }
928
929 config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
930 }
931 else if (!strcmp(argv[argidx], "-4partitionindexlimit"))
932 {
933 argidx += 2;
934 if (argidx > argc)
935 {
936 printf("ERROR: -4partitionindexlimit switch with no argument\n");
937 return 1;
938 }
939
940 config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
941 }
942 else if (!strcmp(argv[argidx], "-2partitioncandiatelimit"))
943 {
944 argidx += 2;
945 if (argidx > argc)
946 {
947 printf("ERROR: -2partitioncandidatelimit switch with no argument\n");
948 return 1;
949 }
950
951 config.tune_2partitioning_candidate_limit = atoi(argv[argidx - 1]);
952 }
953 else if (!strcmp(argv[argidx], "-3partitioncandiatelimit"))
954 {
955 argidx += 2;
956 if (argidx > argc)
957 {
958 printf("ERROR: -3partitioncandiatelimit switch with no argument\n");
959 return 1;
960 }
961
962 config.tune_3partitioning_candidate_limit = atoi(argv[argidx - 1]);
963 }
964 else if (!strcmp(argv[argidx], "-4partitioncandiatelimit"))
965 {
966 argidx += 2;
967 if (argidx > argc)
968 {
969 printf("ERROR: -4partitioncandiatelimit switch with no argument\n");
970 return 1;
971 }
972
973 config.tune_4partitioning_candidate_limit = atoi(argv[argidx - 1]);
974 }
975 else if (!strcmp(argv[argidx], "-dblimit"))
976 {
977 argidx += 2;
978 if (argidx > argc)
979 {
980 printf("ERROR: -dblimit switch with no argument\n");
981 return 1;
982 }
983
984 if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
985 {
986 config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
987 }
988 }
989 else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
990 {
991 argidx += 2;
992 if (argidx > argc)
993 {
994 printf("ERROR: -2partitionlimitfactor switch with no argument\n");
995 return 1;
996 }
997
998 config.tune_2_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
999 }
1000 else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
1001 {
1002 argidx += 2;
1003 if (argidx > argc)
1004 {
1005 printf("ERROR: -3partitionlimitfactor switch with no argument\n");
1006 return 1;
1007 }
1008
1009 config.tune_3_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1010 }
1011 else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
1012 {
1013 argidx += 2;
1014 if (argidx > argc)
1015 {
1016 printf("ERROR: -2planelimitcorrelation switch with no argument\n");
1017 return 1;
1018 }
1019
1020 config.tune_2_plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
1021 }
1022 else if (!strcmp(argv[argidx], "-refinementlimit"))
1023 {
1024 argidx += 2;
1025 if (argidx > argc)
1026 {
1027 printf("ERROR: -refinementlimit switch with no argument\n");
1028 return 1;
1029 }
1030
1031 config.tune_refinement_limit = atoi(argv[argidx - 1]);
1032 }
1033 else if (!strcmp(argv[argidx], "-candidatelimit"))
1034 {
1035 argidx += 2;
1036 if (argidx > argc)
1037 {
1038 printf("ERROR: -candidatelimit switch with no argument\n");
1039 return 1;
1040 }
1041
1042 config.tune_candidate_limit = atoi(argv[argidx - 1]);
1043 }
1044 else if (!strcmp(argv[argidx], "-j"))
1045 {
1046 argidx += 2;
1047 if (argidx > argc)
1048 {
1049 printf("ERROR: -j switch with no argument\n");
1050 return 1;
1051 }
1052
1053 cli_config.thread_count = atoi(argv[argidx - 1]);
1054 }
1055 else if (!strcmp(argv[argidx], "-repeats"))
1056 {
1057 argidx += 2;
1058 if (argidx > argc)
1059 {
1060 printf("ERROR: -repeats switch with no argument\n");
1061 return 1;
1062 }
1063
1064 cli_config.repeat_count = atoi(argv[argidx - 1]);
1065 if (cli_config.repeat_count <= 0)
1066 {
1067 printf("ERROR: -repeats value must be at least one\n");
1068 return 1;
1069 }
1070 }
1071 else if (!strcmp(argv[argidx], "-yflip"))
1072 {
1073 argidx++;
1074 cli_config.y_flip = 1;
1075 }
1076 else if (!strcmp(argv[argidx], "-mpsnr"))
1077 {
1078 argidx += 3;
1079 if (argidx > argc)
1080 {
1081 printf("ERROR: -mpsnr switch with less than 2 arguments\n");
1082 return 1;
1083 }
1084
1085 cli_config.low_fstop = atoi(argv[argidx - 2]);
1086 cli_config.high_fstop = atoi(argv[argidx - 1]);
1087 if (cli_config.high_fstop < cli_config.low_fstop)
1088 {
1089 printf("ERROR: -mpsnr switch <low> is greater than the <high>\n");
1090 return 1;
1091 }
1092 }
1093 // Option: Encode a 3D image from a sequence of 2D images.
1094 else if (!strcmp(argv[argidx], "-zdim"))
1095 {
1096 // Only supports compressing
1097 if (!(operation & ASTCENC_STAGE_COMPRESS))
1098 {
1099 printf("ERROR: -zdim switch is only valid for compression\n");
1100 return 1;
1101 }
1102
1103 // Image depth must be specified.
1104 if (argidx + 2 > argc)
1105 {
1106 printf("ERROR: -zdim switch with no argument\n");
1107 return 1;
1108 }
1109 argidx++;
1110
1111 // Read array size (image depth).
1112 if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1113 {
1114 printf("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
1115 return 1;
1116 }
1117
1118 if ((cli_config.array_size > 1) && (config.block_z == 1))
1119 {
1120 printf("ERROR: -zdim with 3D input data for a 2D output format\n");
1121 return 1;
1122 }
1123 argidx++;
1124 }
1125 #if defined(ASTCENC_DIAGNOSTICS)
1126 else if (!strcmp(argv[argidx], "-dtrace"))
1127 {
1128 argidx += 2;
1129 if (argidx > argc)
1130 {
1131 printf("ERROR: -dtrace switch with no argument\n");
1132 return 1;
1133 }
1134
1135 config.trace_file_path = argv[argidx - 1];
1136 }
1137 #endif
1138 else if (!strcmp(argv[argidx], "-dimage"))
1139 {
1140 argidx += 1;
1141 cli_config.diagnostic_images = true;
1142 }
1143 else // check others as well
1144 {
1145 printf("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1146 return 1;
1147 }
1148 }
1149
1150 if (cli_config.thread_count <= 0)
1151 {
1152 cli_config.thread_count = get_cpu_count();
1153 }
1154
1155 #if defined(ASTCENC_DIAGNOSTICS)
1156 // Force single threaded for diagnostic builds
1157 cli_config.thread_count = 1;
1158
1159 if (!config.trace_file_path)
1160 {
1161 printf("ERROR: Diagnostics builds must set -dtrace\n");
1162 return 1;
1163 }
1164 #endif
1165
1166 return 0;
1167 }
1168
1169 /**
1170 * @brief Print the config settings in a human readable form.
1171 *
1172 * @param[in] cli_config Command line config.
1173 * @param[in] config Codec configuration.
1174 */
print_astcenc_config(const cli_config_options & cli_config,const astcenc_config & config)1175 static void print_astcenc_config(
1176 const cli_config_options& cli_config,
1177 const astcenc_config& config
1178 ) {
1179 // Print all encoding settings unless specifically told otherwise
1180 if (!cli_config.silentmode)
1181 {
1182 printf("Compressor settings\n");
1183 printf("===================\n\n");
1184
1185 switch (config.profile)
1186 {
1187 case ASTCENC_PRF_LDR:
1188 printf(" Color profile: LDR linear\n");
1189 break;
1190 case ASTCENC_PRF_LDR_SRGB:
1191 printf(" Color profile: LDR sRGB\n");
1192 break;
1193 case ASTCENC_PRF_HDR_RGB_LDR_A:
1194 printf(" Color profile: HDR RGB + LDR A\n");
1195 break;
1196 case ASTCENC_PRF_HDR:
1197 printf(" Color profile: HDR RGBA\n");
1198 break;
1199 }
1200
1201 if (config.block_z == 1)
1202 {
1203 printf(" Block size: %ux%u\n", config.block_x, config.block_y);
1204 }
1205 else
1206 {
1207 printf(" Block size: %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1208 }
1209
1210 printf(" Bitrate: %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1211 printf(" RGB alpha scale weight: %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1212 if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1213 {
1214 printf(" Radius RGB alpha scale: %u texels\n", config.a_scale_radius);
1215 }
1216
1217 printf(" R component weight: %g\n", static_cast<double>(config.cw_r_weight));
1218 printf(" G component weight: %g\n", static_cast<double>(config.cw_g_weight));
1219 printf(" B component weight: %g\n", static_cast<double>(config.cw_b_weight));
1220 printf(" A component weight: %g\n", static_cast<double>(config.cw_a_weight));
1221 printf(" Partition cutoff: %u partitions\n", config.tune_partition_count_limit);
1222 printf(" 2 partition index cutoff: %u partition ids\n", config.tune_2partition_index_limit);
1223 printf(" 3 partition index cutoff: %u partition ids\n", config.tune_3partition_index_limit);
1224 printf(" 4 partition index cutoff: %u partition ids\n", config.tune_4partition_index_limit);
1225 printf(" PSNR cutoff: %g dB\n", static_cast<double>(config.tune_db_limit));
1226 printf(" 3 partition cutoff: %g\n", static_cast<double>(config.tune_2_partition_early_out_limit_factor));
1227 printf(" 4 partition cutoff: %g\n", static_cast<double>(config.tune_3_partition_early_out_limit_factor));
1228 printf(" 2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2_plane_early_out_limit_correlation));
1229 printf(" Block mode centile cutoff: %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1230 printf(" Candidate cutoff: %u candidates\n", config.tune_candidate_limit);
1231 printf(" Refinement cutoff: %u iterations\n", config.tune_refinement_limit);
1232 printf(" Compressor thread count: %d\n", cli_config.thread_count);
1233 printf("\n");
1234 }
1235 }
1236
1237 /**
1238 * @brief Get the value of a single pixel in an image.
1239 *
1240 * Note, this implementation is not particularly optimal as it puts format
1241 * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1242 * acceptable as these are not performance critical paths.
1243 *
1244 * @param[in] img The output image.
1245 * @param x The pixel x coordinate.
1246 * @param y The pixel y coordinate.
1247 * @param z The pixel z coordinate.
1248 *
1249 * @return pixel The pixel color value to write.
1250 */
image_get_pixel(const astcenc_image & img,unsigned int x,unsigned int y,unsigned int z)1251 static vfloat4 image_get_pixel(
1252 const astcenc_image& img,
1253 unsigned int x,
1254 unsigned int y,
1255 unsigned int z
1256 ) {
1257 // We should never escape bounds
1258 assert(x < img.dim_x);
1259 assert(y < img.dim_y);
1260 assert(z < img.dim_z);
1261
1262 if (img.data_type == ASTCENC_TYPE_U8)
1263 {
1264 uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1265
1266 float r = data[(4 * img.dim_x * y) + (4 * x )] / 255.0f;
1267 float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1268 float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1269 float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1270
1271 return vfloat4(r, g, b, a);
1272 }
1273 else if (img.data_type == ASTCENC_TYPE_F16)
1274 {
1275 uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1276
1277 vint4 colori(
1278 data[(4 * img.dim_x * y) + (4 * x )],
1279 data[(4 * img.dim_x * y) + (4 * x + 1)],
1280 data[(4 * img.dim_x * y) + (4 * x + 2)],
1281 data[(4 * img.dim_x * y) + (4 * x + 3)]
1282 );
1283
1284 return float16_to_float(colori);
1285 }
1286 else // if (img.data_type == ASTCENC_TYPE_F32)
1287 {
1288 assert(img.data_type == ASTCENC_TYPE_F32);
1289 float* data = static_cast<float*>(img.data[z]);
1290
1291 return vfloat4(
1292 data[(4 * img.dim_x * y) + (4 * x )],
1293 data[(4 * img.dim_x * y) + (4 * x + 1)],
1294 data[(4 * img.dim_x * y) + (4 * x + 2)],
1295 data[(4 * img.dim_x * y) + (4 * x + 3)]
1296 );
1297 }
1298 }
1299
1300 /**
1301 * @brief Set the value of a single pixel in an image.
1302 *
1303 * @param[out] img The output image; must use F32 texture components.
1304 * @param x The pixel x coordinate.
1305 * @param y The pixel y coordinate.
1306 * @param z The pixel z coordinate.
1307 * @param pixel The pixel color value to write.
1308 */
image_set_pixel(astcenc_image & img,unsigned int x,unsigned int y,unsigned int z,vfloat4 pixel)1309 static void image_set_pixel(
1310 astcenc_image& img,
1311 unsigned int x,
1312 unsigned int y,
1313 unsigned int z,
1314 vfloat4 pixel
1315 ) {
1316 // We should never escape bounds
1317 assert(x < img.dim_x);
1318 assert(y < img.dim_y);
1319 assert(z < img.dim_z);
1320 assert(img.data_type == ASTCENC_TYPE_F32);
1321
1322 float* data = static_cast<float*>(img.data[z]);
1323
1324 data[(4 * img.dim_x * y) + (4 * x )] = pixel.lane<0>();
1325 data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1326 data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1327 data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1328 }
1329
1330 /**
1331 * @brief Set the value of a single pixel in an image.
1332 *
1333 * @param[out] img The output image; must use F32 texture components.
1334 * @param x The pixel x coordinate.
1335 * @param y The pixel y coordinate.
1336 * @param pixel The pixel color value to write.
1337 */
image_set_pixel_u8(astcenc_image & img,size_t x,size_t y,vint4 pixel)1338 static void image_set_pixel_u8(
1339 astcenc_image& img,
1340 size_t x,
1341 size_t y,
1342 vint4 pixel
1343 ) {
1344 // We should never escape bounds
1345 assert(x < img.dim_x);
1346 assert(y < img.dim_y);
1347 assert(img.data_type == ASTCENC_TYPE_U8);
1348
1349 uint8_t* data = static_cast<uint8_t*>(img.data[0]);
1350 pixel = pack_low_bytes(pixel);
1351 store_nbytes(pixel, data + (4 * img.dim_x * y) + (4 * x ));
1352 }
1353
1354 /**
1355 * @brief Create a copy of @c input with forced unit-length normal vectors.
1356 *
1357 * It is assumed that all normal vectors are stored in the RGB components, and
1358 * stored in a packed unsigned range of [0,1] which must be unpacked prior
1359 * normalization. Data must then be repacked into this form for handing over to
1360 * the core codec.
1361 *
1362 * @param[in] input The input image.
1363 * @param[out] output The output image, must use F32 components.
1364 */
image_preprocess_normalize(const astcenc_image & input,astcenc_image & output)1365 static void image_preprocess_normalize(
1366 const astcenc_image& input,
1367 astcenc_image& output
1368 ) {
1369 for (unsigned int z = 0; z < input.dim_z; z++)
1370 {
1371 for (unsigned int y = 0; y < input.dim_y; y++)
1372 {
1373 for (unsigned int x = 0; x < input.dim_x; x++)
1374 {
1375 vfloat4 pixel = image_get_pixel(input, x, y, z);
1376
1377 // Stash alpha component and zero
1378 float a = pixel.lane<3>();
1379 pixel.set_lane<3>(0.0f);
1380
1381 // Decode [0,1] normals to [-1,1]
1382 pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1383 pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1384 pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1385
1386 // Normalize pixel and restore alpha
1387 pixel = normalize(pixel);
1388 pixel.set_lane<3>(a);
1389
1390 // Encode [-1,1] normals to [0,1]
1391 pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1392 pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1393 pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1394
1395 image_set_pixel(output, x, y, z, pixel);
1396 }
1397 }
1398 }
1399 }
1400
1401 /**
1402 * @brief Linearize an sRGB value.
1403 *
1404 * @return The linearized value.
1405 */
srgb_to_linear(float a)1406 static float srgb_to_linear(
1407 float a
1408 ) {
1409 if (a <= 0.04045f)
1410 {
1411 return a * (1.0f / 12.92f);
1412 }
1413
1414 return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1415 }
1416
1417 /**
1418 * @brief sRGB gamma-encode a linear value.
1419 *
1420 * @return The gamma encoded value.
1421 */
linear_to_srgb(float a)1422 static float linear_to_srgb(
1423 float a
1424 ) {
1425 if (a <= 0.0031308f)
1426 {
1427 return a * 12.92f;
1428 }
1429
1430 return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1431 }
1432
1433 /**
1434 * @brief Create a copy of @c input with premultiplied color data.
1435 *
1436 * If we are compressing sRGB data we linearize the data prior to
1437 * premultiplication and re-gamma-encode afterwards.
1438 *
1439 * @param[in] input The input image.
1440 * @param[out] output The output image, must use F32 components.
1441 * @param profile The encoding profile.
1442 */
image_preprocess_premultiply(const astcenc_image & input,astcenc_image & output,astcenc_profile profile)1443 static void image_preprocess_premultiply(
1444 const astcenc_image& input,
1445 astcenc_image& output,
1446 astcenc_profile profile
1447 ) {
1448 for (unsigned int z = 0; z < input.dim_z; z++)
1449 {
1450 for (unsigned int y = 0; y < input.dim_y; y++)
1451 {
1452 for (unsigned int x = 0; x < input.dim_x; x++)
1453 {
1454 vfloat4 pixel = image_get_pixel(input, x, y, z);
1455
1456 // Linearize sRGB
1457 if (profile == ASTCENC_PRF_LDR_SRGB)
1458 {
1459 pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1460 pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1461 pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1462 }
1463
1464 // Premultiply pixel in linear-space
1465 pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1466 pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1467 pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1468
1469 // Gamma-encode sRGB
1470 if (profile == ASTCENC_PRF_LDR_SRGB)
1471 {
1472 pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1473 pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1474 pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1475 }
1476
1477 image_set_pixel(output, x, y, z, pixel);
1478 }
1479 }
1480 }
1481 }
1482
1483 /**
1484 * @brief Populate a single diagnostic image showing aspects of the encoding.
1485 *
1486 * @param context The context to use.
1487 * @param image The compressed image to analyze.
1488 * @param diag_image The output visualization image to populate.
1489 * @param texel_func The per-texel callback used to determine output color.
1490 */
print_diagnostic_image(astcenc_context * context,const astc_compressed_image & image,astcenc_image & diag_image,std::function<vint4 (astcenc_block_info &,size_t,size_t)> texel_func)1491 static void print_diagnostic_image(
1492 astcenc_context* context,
1493 const astc_compressed_image& image,
1494 astcenc_image& diag_image,
1495 std::function<vint4(astcenc_block_info&, size_t, size_t)> texel_func
1496 ) {
1497 size_t block_cols = (image.dim_x + image.block_x - 1) / image.block_x;
1498 size_t block_rows = (image.dim_y + image.block_y - 1) / image.block_y;
1499
1500 uint8_t* data = image.data;
1501 for (size_t block_y = 0; block_y < block_rows; block_y++)
1502 {
1503 for (size_t block_x = 0; block_x < block_cols; block_x++)
1504 {
1505 astcenc_block_info block_info;
1506 astcenc_get_block_info(context, data, &block_info);
1507 data += 16;
1508
1509 size_t start_row = block_y * image.block_y;
1510 size_t start_col = block_x * image.block_x;
1511
1512 size_t end_row = astc::min(start_row + image.block_y, static_cast<size_t>(image.dim_y));
1513 size_t end_col = astc::min(start_col + image.block_x, static_cast<size_t>(image.dim_x));
1514
1515 for (size_t texel_y = start_row; texel_y < end_row; texel_y++)
1516 {
1517 for (size_t texel_x = start_col; texel_x < end_col; texel_x++)
1518 {
1519 vint4 color = texel_func(block_info, texel_x - start_col, texel_y - start_row);
1520 image_set_pixel_u8(diag_image, texel_x, texel_y, color);
1521 }
1522 }
1523 }
1524 }
1525 }
1526
1527 /**
1528 * @brief Print a set of diagnostic images showing aspects of the encoding.
1529 *
1530 * @param context The context to use.
1531 * @param image The compressed image to analyze.
1532 * @param output_file The output file name to use as a stem for new names.
1533 */
print_diagnostic_images(astcenc_context * context,const astc_compressed_image & image,const std::string & output_file)1534 static void print_diagnostic_images(
1535 astcenc_context* context,
1536 const astc_compressed_image& image,
1537 const std::string& output_file
1538 ) {
1539 if (image.dim_z != 1)
1540 {
1541 return;
1542 }
1543
1544 // Try to find a file extension we know about
1545 size_t index = output_file.find_last_of(".");
1546 std::string stem = output_file;
1547 if (index != std::string::npos)
1548 {
1549 stem = stem.substr(0, index);
1550 }
1551
1552 auto diag_image = alloc_image(8, image.dim_x, image.dim_y, image.dim_z);
1553
1554 // ---- ---- ---- ---- Partitioning ---- ---- ---- ----
1555 auto partition_func = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1556 const vint4 colors[] {
1557 vint4( 0, 0, 0, 255),
1558 vint4(255, 0, 0, 255),
1559 vint4( 0, 255, 0, 255),
1560 vint4( 0, 0, 255, 255),
1561 vint4(255, 255, 255, 255)
1562 };
1563
1564 size_t texel_index = texel_y * info.block_x + texel_x;
1565
1566 int partition { 0 };
1567 if (!info.is_constant_block)
1568 {
1569 partition = info.partition_assignment[texel_index] + 1;
1570 }
1571
1572 return colors[partition];
1573 };
1574
1575 print_diagnostic_image(context, image, *diag_image, partition_func);
1576 std::string fname = stem + "_diag_partitioning.png";
1577 store_ncimage(diag_image, fname.c_str(), false);
1578
1579 // ---- ---- ---- ---- Weight planes ---- ---- ---- ----
1580 auto texel_func1 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1581 (void)texel_x;
1582 (void)texel_y;
1583
1584 const vint4 colors[] {
1585 vint4( 0, 0, 0, 255),
1586 vint4(255, 0, 0, 255),
1587 vint4( 0, 255, 0, 255),
1588 vint4( 0, 0, 255, 255),
1589 vint4(255, 255, 255, 255)
1590 };
1591
1592 int component { 0 };
1593 if (info.is_dual_plane_block)
1594 {
1595 component = info.dual_plane_component + 1;
1596 }
1597
1598 return colors[component];
1599 };
1600
1601 print_diagnostic_image(context, image, *diag_image, texel_func1);
1602 fname = stem + "_diag_weight_plane2.png";
1603 store_ncimage(diag_image, fname.c_str(), false);
1604
1605 // ---- ---- ---- ---- Weight density ---- ---- ---- ----
1606 auto texel_func2 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1607 (void)texel_x;
1608 (void)texel_y;
1609
1610 float density = 0.0f;
1611 if (!info.is_constant_block)
1612 {
1613 float texel_count = static_cast<float>(info.block_x * info.block_y);
1614 float weight_count = static_cast<float>(info.weight_x * info.weight_y);
1615 density = weight_count / texel_count;
1616 }
1617
1618 int densityi = static_cast<int>(255.0f * density);
1619 return vint4(densityi, densityi, densityi, 255);
1620 };
1621
1622 print_diagnostic_image(context, image, *diag_image, texel_func2);
1623 fname = stem + "_diag_weight_density.png";
1624 store_ncimage(diag_image, fname.c_str(), false);
1625
1626 // ---- ---- ---- ---- Weight quant ---- ---- ---- ----
1627 auto texel_func3 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1628 (void)texel_x;
1629 (void)texel_y;
1630
1631 int quant { 0 };
1632 if (!info.is_constant_block)
1633 {
1634 quant = info.weight_level_count - 1;
1635 }
1636
1637 return vint4(quant, quant, quant, 255);
1638 };
1639
1640 print_diagnostic_image(context, image, *diag_image, texel_func3);
1641 fname = stem + "_diag_weight_quant.png";
1642 store_ncimage(diag_image, fname.c_str(), false);
1643
1644 // ---- ---- ---- ---- Color quant ---- ---- ---- ----
1645 auto texel_func4 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1646 (void)texel_x;
1647 (void)texel_y;
1648
1649 int quant { 0 };
1650 if (!info.is_constant_block)
1651 {
1652 quant = info.color_level_count - 1;
1653 }
1654
1655 return vint4(quant, quant, quant, 255);
1656 };
1657
1658 print_diagnostic_image(context, image, *diag_image, texel_func4);
1659 fname = stem + "_diag_color_quant.png";
1660 store_ncimage(diag_image, fname.c_str(), false);
1661
1662 // ---- ---- ---- ---- Color endpoint mode: Index ---- ---- ---- ----
1663 auto texel_func5 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1664 (void)texel_x;
1665 (void)texel_y;
1666
1667 size_t texel_index = texel_y * info.block_x + texel_x;
1668
1669 int cem { 255 };
1670 if (!info.is_constant_block)
1671 {
1672 uint8_t partition = info.partition_assignment[texel_index];
1673 cem = info.color_endpoint_modes[partition] * 16;
1674 }
1675
1676 return vint4(cem, cem, cem, 255);
1677 };
1678
1679 print_diagnostic_image(context, image, *diag_image, texel_func5);
1680 fname = stem + "_diag_cem_index.png";
1681 store_ncimage(diag_image, fname.c_str(), false);
1682
1683 // ---- ---- ---- ---- Color endpoint mode: Components ---- ---- ---- ----
1684 auto texel_func6 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1685 (void)texel_x;
1686 (void)texel_y;
1687
1688 const vint4 colors[] {
1689 vint4( 0, 0, 0, 255),
1690 vint4(255, 0, 0, 255),
1691 vint4( 0, 255, 0, 255),
1692 vint4( 0, 0, 255, 255),
1693 vint4(255, 255, 255, 255)
1694 };
1695
1696 size_t texel_index = texel_y * info.block_x + texel_x;
1697
1698 int components { 0 };
1699 if (!info.is_constant_block)
1700 {
1701 uint8_t partition = info.partition_assignment[texel_index];
1702 uint8_t cem = info.color_endpoint_modes[partition];
1703
1704 switch (cem)
1705 {
1706 case 0:
1707 case 1:
1708 case 2:
1709 case 3:
1710 components = 1;
1711 break;
1712 case 4:
1713 case 5:
1714 components = 2;
1715 break;
1716 case 6:
1717 case 7:
1718 case 8:
1719 case 9:
1720 case 11:
1721 components = 3;
1722 break;
1723 default:
1724 components = 4;
1725 break;
1726 }
1727 }
1728
1729 return colors[components];
1730 };
1731
1732 print_diagnostic_image(context, image, *diag_image, texel_func6);
1733 fname = stem + "_diag_cem_components.png";
1734 store_ncimage(diag_image, fname.c_str(), false);
1735
1736 // ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1737 auto texel_func7 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1738 (void)texel_x;
1739 (void)texel_y;
1740
1741 const vint4 colors[] {
1742 vint4( 0, 0, 0, 255),
1743 vint4(255, 0, 0, 255),
1744 vint4( 0, 255, 0, 255),
1745 vint4( 0, 0, 255, 255),
1746 };
1747
1748 size_t texel_index = texel_y * info.block_x + texel_x;
1749
1750 int style { 0 };
1751 if (!info.is_constant_block)
1752 {
1753 uint8_t partition = info.partition_assignment[texel_index];
1754 uint8_t cem = info.color_endpoint_modes[partition];
1755
1756 switch (cem)
1757 {
1758 // Direct - two absolute endpoints
1759 case 0:
1760 case 1:
1761 case 2:
1762 case 3:
1763 case 4:
1764 case 8:
1765 case 11:
1766 case 12:
1767 case 14:
1768 case 15:
1769 style = 1;
1770 break;
1771 // Offset - one absolute plus delta
1772 case 5:
1773 case 9:
1774 case 13:
1775 style = 2;
1776 break;
1777 // Scale - one absolute plus scale
1778 case 6:
1779 case 7:
1780 case 10:
1781 style = 3;
1782 break;
1783 // Shouldn't happen ...
1784 default:
1785 style = 0;
1786 break;
1787 }
1788 }
1789
1790 return colors[style];
1791 };
1792
1793 print_diagnostic_image(context, image, *diag_image, texel_func7);
1794 fname = stem + "_diag_cem_style.png";
1795 store_ncimage(diag_image, fname.c_str(), false);
1796
1797 // ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1798 auto texel_func8 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1799 (void)texel_x;
1800 (void)texel_y;
1801
1802 size_t texel_index = texel_y * info.block_x + texel_x;
1803
1804 int style { 0 };
1805 if (!info.is_constant_block)
1806 {
1807 uint8_t partition = info.partition_assignment[texel_index];
1808 uint8_t cem = info.color_endpoint_modes[partition];
1809
1810 switch (cem)
1811 {
1812 // LDR blocks
1813 case 0:
1814 case 1:
1815 case 4:
1816 case 5:
1817 case 6:
1818 case 8:
1819 case 9:
1820 case 10:
1821 case 12:
1822 case 13:
1823 style = 128;
1824 break;
1825 // HDR blocks
1826 default:
1827 style = 155;
1828 break;
1829 }
1830 }
1831
1832 return vint4(style, style, style, 255);
1833 };
1834
1835 print_diagnostic_image(context, image, *diag_image, texel_func8);
1836 fname = stem + "_diag_cem_hdr.png";
1837 store_ncimage(diag_image, fname.c_str(), false);
1838
1839 free_image(diag_image);
1840 }
1841
1842 /**
1843 * @brief The main entry point.
1844 *
1845 * @param argc The number of arguments.
1846 * @param argv The vector of arguments.
1847 *
1848 * @return 0 on success, non-zero otherwise.
1849 */
main(int argc,char ** argv)1850 int main(
1851 int argc,
1852 char **argv
1853 ) {
1854 double start_time = get_time();
1855
1856 if (argc < 2)
1857 {
1858 astcenc_print_shorthelp();
1859 return 0;
1860 }
1861
1862 astcenc_operation operation;
1863 astcenc_profile profile;
1864 int error = parse_commandline_options(argc, argv, operation, profile);
1865 if (error)
1866 {
1867 return 1;
1868 }
1869
1870 switch (operation)
1871 {
1872 case ASTCENC_OP_HELP:
1873 astcenc_print_longhelp();
1874 return 0;
1875 case ASTCENC_OP_VERSION:
1876 astcenc_print_header();
1877 return 0;
1878 default:
1879 break;
1880 }
1881
1882 std::string input_filename = argc >= 3 ? argv[2] : "";
1883 std::string output_filename = argc >= 4 ? argv[3] : "";
1884
1885 if (input_filename.empty())
1886 {
1887 printf("ERROR: Input file not specified\n");
1888 return 1;
1889 }
1890
1891 if (output_filename.empty())
1892 {
1893 printf("ERROR: Output file not specified\n");
1894 return 1;
1895 }
1896
1897 // TODO: Handle RAII resources so they get freed when out of scope
1898 // Load the compressed input file if needed
1899
1900 // This has to come first, as the block size is in the file header
1901 astc_compressed_image image_comp {};
1902 if (operation & ASTCENC_STAGE_LD_COMP)
1903 {
1904 if (ends_with(input_filename, ".astc"))
1905 {
1906 error = load_cimage(input_filename.c_str(), image_comp);
1907 if (error)
1908 {
1909 return 1;
1910 }
1911 }
1912 else if (ends_with(input_filename, ".ktx"))
1913 {
1914 bool is_srgb;
1915 error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
1916 if (error)
1917 {
1918 return 1;
1919 }
1920
1921 if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
1922 {
1923 printf("WARNING: Input file is sRGB, but decompressing as linear\n");
1924 }
1925
1926 if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
1927 {
1928 printf("WARNING: Input file is linear, but decompressing as sRGB\n");
1929 }
1930 }
1931 else
1932 {
1933 printf("ERROR: Unknown compressed input file type\n");
1934 return 1;
1935 }
1936 }
1937
1938 astcenc_config config {};
1939 astcenc_preprocess preprocess;
1940 error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
1941 if (error)
1942 {
1943 return 1;
1944 }
1945
1946 // Initialize cli_config_options with default values
1947 cli_config_options cli_config { 0, 1, 1, false, false, false, -10, 10,
1948 { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
1949 { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
1950
1951 error = edit_astcenc_config(argc, argv, operation, cli_config, config);
1952 if (error)
1953 {
1954 return 1;
1955 }
1956
1957 astcenc_image* image_uncomp_in = nullptr ;
1958 unsigned int image_uncomp_in_component_count = 0;
1959 bool image_uncomp_in_is_hdr = false;
1960 astcenc_image* image_decomp_out = nullptr;
1961
1962 // TODO: Handle RAII resources so they get freed when out of scope
1963 astcenc_error codec_status;
1964 astcenc_context* codec_context;
1965
1966
1967 // Preflight - check we have valid extensions for storing a file
1968 if (operation & ASTCENC_STAGE_ST_NCOMP)
1969 {
1970 int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
1971 if (bitness < 0)
1972 {
1973 const char *eptr = strrchr(output_filename.c_str(), '.');
1974 eptr = eptr ? eptr : "";
1975 printf("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
1976 return 1;
1977 }
1978 }
1979
1980 if (operation & ASTCENC_STAGE_ST_COMP)
1981 {
1982 #if defined(_WIN32)
1983 bool is_null = output_filename == "NUL" || output_filename == "nul";
1984 #else
1985 bool is_null = output_filename == "/dev/null";
1986 #endif
1987
1988 if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
1989 {
1990 const char *eptr = strrchr(output_filename.c_str(), '.');
1991 eptr = eptr ? eptr : "";
1992 printf("ERROR: Unknown compressed output file type '%s'\n", eptr);
1993 return 1;
1994 }
1995 }
1996
1997 codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
1998 if (codec_status != ASTCENC_SUCCESS)
1999 {
2000 printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
2001 return 1;
2002 }
2003
2004 // Load the uncompressed input file if needed
2005 if (operation & ASTCENC_STAGE_LD_NCOMP)
2006 {
2007 image_uncomp_in = load_uncomp_file(
2008 input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
2009 image_uncomp_in_is_hdr, image_uncomp_in_component_count);
2010 if (!image_uncomp_in)
2011 {
2012 printf ("ERROR: Failed to load uncompressed image file\n");
2013 return 1;
2014 }
2015
2016
2017 if (preprocess != ASTCENC_PP_NONE)
2018 {
2019 // Allocate a float image so we can avoid additional quantization,
2020 // as e.g. premultiplication can result in fractional color values
2021 astcenc_image* image_pp = alloc_image(32,
2022 image_uncomp_in->dim_x,
2023 image_uncomp_in->dim_y,
2024 image_uncomp_in->dim_z);
2025 if (!image_pp)
2026 {
2027 printf ("ERROR: Failed to allocate preprocessed image\n");
2028 return 1;
2029 }
2030
2031 if (preprocess == ASTCENC_PP_NORMALIZE)
2032 {
2033 image_preprocess_normalize(*image_uncomp_in, *image_pp);
2034 }
2035
2036 if (preprocess == ASTCENC_PP_PREMULTIPLY)
2037 {
2038 image_preprocess_premultiply(*image_uncomp_in, *image_pp,
2039 config.profile);
2040 }
2041
2042 // Delete the original as we no longer need it
2043 free_image(image_uncomp_in);
2044 image_uncomp_in = image_pp;
2045 }
2046
2047 if (!cli_config.silentmode)
2048 {
2049 printf("Source image\n");
2050 printf("============\n\n");
2051 printf(" Source: %s\n", input_filename.c_str());
2052 printf(" Color profile: %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
2053 if (image_uncomp_in->dim_z > 1)
2054 {
2055 printf(" Dimensions: 3D, %ux%ux%u\n",
2056 image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
2057 }
2058 else
2059 {
2060 printf(" Dimensions: 2D, %ux%u\n",
2061 image_uncomp_in->dim_x, image_uncomp_in->dim_y);
2062 }
2063 printf(" Components: %d\n\n", image_uncomp_in_component_count);
2064 }
2065 }
2066
2067 double image_size = 0.0;
2068 if (image_uncomp_in)
2069 {
2070 image_size = static_cast<double>(image_uncomp_in->dim_x) *
2071 static_cast<double>(image_uncomp_in->dim_y) *
2072 static_cast<double>(image_uncomp_in->dim_z);
2073 }
2074 else
2075 {
2076 image_size = static_cast<double>(image_comp.dim_x) *
2077 static_cast<double>(image_comp.dim_y) *
2078 static_cast<double>(image_comp.dim_z);
2079 }
2080
2081 // Compress an image
2082 double best_compression_time = 100000.0;
2083 double total_compression_time = 0.0;
2084 if (operation & ASTCENC_STAGE_COMPRESS)
2085 {
2086 print_astcenc_config(cli_config, config);
2087
2088 unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
2089 unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
2090 unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
2091 size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
2092 uint8_t* buffer = new uint8_t[buffer_size];
2093
2094 compression_workload work;
2095 work.context = codec_context;
2096 work.image = image_uncomp_in;
2097 work.swizzle = cli_config.swz_encode;
2098 work.data_out = buffer;
2099 work.data_len = buffer_size;
2100 work.error = ASTCENC_SUCCESS;
2101
2102 // Only launch worker threads for multi-threaded use - it makes basic
2103 // single-threaded profiling and debugging a little less convoluted
2104 double start_compression_time = get_time();
2105 for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2106 {
2107 double start_iter_time = get_time();
2108 if (cli_config.thread_count > 1)
2109 {
2110 launch_threads(cli_config.thread_count, compression_workload_runner, &work);
2111 }
2112 else
2113 {
2114 work.error = astcenc_compress_image(
2115 work.context, work.image, &work.swizzle,
2116 work.data_out, work.data_len, 0);
2117 }
2118
2119 astcenc_compress_reset(codec_context);
2120
2121 double iter_time = get_time() - start_iter_time;
2122 best_compression_time = astc::min(iter_time, best_compression_time);
2123 }
2124 total_compression_time = get_time() - start_compression_time;
2125
2126 if (work.error != ASTCENC_SUCCESS)
2127 {
2128 printf("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
2129 return 1;
2130 }
2131
2132 image_comp.block_x = config.block_x;
2133 image_comp.block_y = config.block_y;
2134 image_comp.block_z = config.block_z;
2135 image_comp.dim_x = image_uncomp_in->dim_x;
2136 image_comp.dim_y = image_uncomp_in->dim_y;
2137 image_comp.dim_z = image_uncomp_in->dim_z;
2138 image_comp.data = buffer;
2139 image_comp.data_len = buffer_size;
2140 }
2141
2142 // Decompress an image
2143 double best_decompression_time = 100000.0;
2144 double total_decompression_time = 0.0;
2145 if (operation & ASTCENC_STAGE_DECOMPRESS)
2146 {
2147 int out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2148 if (out_bitness == 0)
2149 {
2150 bool is_hdr = (config.profile == ASTCENC_PRF_HDR) || (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
2151 out_bitness = is_hdr ? 16 : 8;
2152 }
2153
2154 image_decomp_out = alloc_image(
2155 out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
2156
2157 decompression_workload work;
2158 work.context = codec_context;
2159 work.data = image_comp.data;
2160 work.data_len = image_comp.data_len;
2161 work.image_out = image_decomp_out;
2162 work.swizzle = cli_config.swz_decode;
2163 work.error = ASTCENC_SUCCESS;
2164
2165 // Only launch worker threads for multi-threaded use - it makes basic
2166 // single-threaded profiling and debugging a little less convoluted
2167 double start_decompression_time = get_time();
2168 for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2169 {
2170 double start_iter_time = get_time();
2171 if (cli_config.thread_count > 1)
2172 {
2173 launch_threads(cli_config.thread_count, decompression_workload_runner, &work);
2174 }
2175 else
2176 {
2177 work.error = astcenc_decompress_image(
2178 work.context, work.data, work.data_len,
2179 work.image_out, &work.swizzle, 0);
2180 }
2181
2182 astcenc_decompress_reset(codec_context);
2183
2184 double iter_time = get_time() - start_iter_time;
2185 best_decompression_time = astc::min(iter_time, best_decompression_time);
2186 }
2187 total_decompression_time = get_time() - start_decompression_time;
2188
2189 if (work.error != ASTCENC_SUCCESS)
2190 {
2191 printf("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
2192 return 1;
2193 }
2194 }
2195
2196 #if defined(_WIN32)
2197 bool is_null = output_filename == "NUL" || output_filename == "nul";
2198 #else
2199 bool is_null = output_filename == "/dev/null";
2200 #endif
2201
2202 // Print metrics in comparison mode
2203 if (operation & ASTCENC_STAGE_COMPARE)
2204 {
2205 bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
2206
2207 compute_error_metrics(
2208 image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
2209 image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
2210 }
2211
2212 // Store compressed image
2213 if (operation & ASTCENC_STAGE_ST_COMP)
2214 {
2215 if (ends_with(output_filename, ".astc"))
2216 {
2217 error = store_cimage(image_comp, output_filename.c_str());
2218 if (error)
2219 {
2220 printf ("ERROR: Failed to store compressed image\n");
2221 return 1;
2222 }
2223 }
2224 else if (ends_with(output_filename, ".ktx"))
2225 {
2226 bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
2227 error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
2228 if (error)
2229 {
2230 printf ("ERROR: Failed to store compressed image\n");
2231 return 1;
2232 }
2233 }
2234 else
2235 {
2236 if (!is_null)
2237 {
2238 printf("ERROR: Unknown compressed output file type\n");
2239 return 1;
2240 }
2241 }
2242 }
2243
2244 // Store decompressed image
2245 if (operation & ASTCENC_STAGE_ST_NCOMP)
2246 {
2247 if (!is_null)
2248 {
2249 bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
2250 cli_config.y_flip);
2251 if (!store_result)
2252 {
2253 printf("ERROR: Failed to write output image %s\n", output_filename.c_str());
2254 return 1;
2255 }
2256 }
2257 }
2258
2259 // Store diagnostic images
2260 if (cli_config.diagnostic_images && !is_null)
2261 {
2262 print_diagnostic_images(codec_context, image_comp, output_filename);
2263 }
2264
2265 free_image(image_uncomp_in);
2266 free_image(image_decomp_out);
2267 astcenc_context_free(codec_context);
2268
2269 delete[] image_comp.data;
2270
2271 if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
2272 {
2273 double end_time = get_time();
2274
2275 double repeats = static_cast<double>(cli_config.repeat_count);
2276 double avg_compression_time = total_compression_time / repeats;
2277 double avg_decompression_time = total_decompression_time / repeats;
2278 double total_time = (end_time - start_time) - ((repeats - 1.0) * avg_compression_time) - ((repeats - 1.0) * avg_decompression_time);
2279
2280 printf("Performance metrics\n");
2281 printf("===================\n\n");
2282 printf(" Total time: %8.4f s\n", total_time);
2283
2284 if (operation & ASTCENC_STAGE_COMPRESS)
2285 {
2286 double compression_rate = image_size / (best_compression_time * 1000000.0);
2287
2288 printf(" Coding time: %8.4f s\n", best_compression_time);
2289 printf(" Coding rate: %8.4f MT/s\n", compression_rate);
2290 }
2291
2292 if (operation & ASTCENC_STAGE_DECOMPRESS)
2293 {
2294 double decompression_rate = image_size / (best_decompression_time * 1000000.0);
2295 printf(" Decoding time: %8.4f s\n", best_decompression_time);
2296 printf(" Decoding rate: %8.4f MT/s\n", decompression_rate);
2297 }
2298 }
2299
2300 return 0;
2301 }
2302