@@ -27,6 +27,7 @@ dxt_hc::dxt_hc()
2727 : m_num_blocks(0 ),
2828 m_has_color_blocks (false ),
2929 m_has_etc_color_blocks(false ),
30+ m_has_subblocks(false ),
3031 m_num_alpha_blocks(0 ),
3132 m_main_thread_id(crn_get_current_thread_id()),
3233 m_canceled(false ),
@@ -78,9 +79,10 @@ bool dxt_hc::compress(
7879 const params& p
7980 ) {
8081 clear ();
81- m_has_etc_color_blocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A;
82+ m_has_etc_color_blocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A || p.m_format == cETC1S || p.m_format == cETC2AS;
83+ m_has_subblocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A;
8284 m_has_color_blocks = p.m_format == cDXT1 || p.m_format == cDXT5 || m_has_etc_color_blocks;
83- m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A || p.m_format == cETC2A ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 : 0 ;
85+ m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A || p.m_format == cETC2A || p. m_format == cETC2AS ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 : 0 ;
8486 if (!m_has_color_blocks && !m_num_alpha_blocks)
8587 return false ;
8688 m_blocks = blocks;
@@ -118,7 +120,7 @@ bool dxt_hc::compress(
118120 }
119121
120122 for (uint i = 0 ; i <= m_pTask_pool->get_num_threads (); i++)
121- m_pTask_pool->queue_object_task (this , m_has_etc_color_blocks ? &dxt_hc::determine_tiles_task_etc : &dxt_hc::determine_tiles_task, i);
123+ m_pTask_pool->queue_object_task (this , m_has_subblocks ? &dxt_hc::determine_tiles_task_etc : &dxt_hc::determine_tiles_task, i);
122124 m_pTask_pool->join ();
123125
124126 m_num_tiles = 0 ;
@@ -212,7 +214,7 @@ bool dxt_hc::compress(
212214 for (uint bx = 0 ; bx < block_width; bx++, b++) {
213215 bool top_match = by != 0 ;
214216 bool left_match = top_match || bx;
215- bool diag_match = m_has_etc_color_blocks && top_match && bx;
217+ bool diag_match = m_has_subblocks && top_match && bx;
216218 for (uint c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) {
217219 uint16 endpoint_index = (c ? alpha_endpoints_remap : color_endpoints_remap)[m_endpoint_indices[b].component [c]];
218220 left_match = left_match && endpoint_index == endpoint_indices[b - 1 ].component [c];
@@ -222,7 +224,7 @@ bool dxt_hc::compress(
222224 uint16 selector_index = (c ? alpha_selectors_remap : color_selectors_remap)[m_selector_indices[b].component [c]];
223225 selector_indices[b].component [c] = selector_index;
224226 }
225- endpoint_indices[b].reference = m_has_etc_color_blocks && b & 1 ? m_endpoint_indices[b].reference : left_match ? 1 : top_match ? 2 : diag_match ? 3 : 0 ;
227+ endpoint_indices[b].reference = m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : left_match ? 1 : top_match ? 2 : diag_match ? 3 : 0 ;
226228 }
227229 }
228230 }
@@ -290,6 +292,15 @@ void dxt_hc::determine_tiles_task(uint64 data, void*) {
290292 uint tile_error[3 ][9 ];
291293 uint total_error[3 ][8 ];
292294
295+ etc1_optimizer optimizer;
296+ etc1_optimizer::params params;
297+ params.m_use_color4 = false ;
298+ params.m_constrain_against_base_color5 = false ;
299+ etc1_optimizer::results results;
300+ results.m_pSelectors = selectors;
301+ int scan[] = {-1 , 0 , 1 };
302+ int refine[] = {-3 , -2 , 2 , 3 };
303+
293304 for (uint level = 0 ; level < m_params.m_num_levels ; level++) {
294305 float weight = m_params.m_levels [level].m_weight ;
295306 uint width = m_params.m_levels [level].m_block_width ;
@@ -318,7 +329,20 @@ void dxt_hc::determine_tiles_task(uint64 data, void*) {
318329 for (uint t = 0 ; t < 9 ; t++) {
319330 color_quad_u8* pixels = tilePixels + offsets[t];
320331 uint size = 16 << (t >> 2 );
321- if (m_has_color_blocks) {
332+ if (m_has_etc_color_blocks) {
333+ params.m_pSrc_pixels = pixels;
334+ params.m_num_src_pixels = results.m_n = size;
335+ optimizer.init (params, results);
336+ params.m_pScan_deltas = scan;
337+ params.m_scan_delta_size = sizeof (scan) / sizeof (*scan);
338+ optimizer.compute ();
339+ if (results.m_error > 375 * params.m_num_src_pixels ) {
340+ params.m_pScan_deltas = refine;
341+ params.m_scan_delta_size = sizeof (refine) / sizeof (*refine);
342+ optimizer.compute ();
343+ }
344+ tile_error[cColor][t] = results.m_error ;
345+ } else if (m_has_color_blocks) {
322346 uint low16, high16;
323347 dxt_fast::compress_color_block (size, pixels, low16, high16, selectors);
324348 color_quad_u8 block_colors[4 ];
@@ -605,23 +629,25 @@ void dxt_hc::determine_color_endpoint_codebook_task_etc(uint64 data, void*) {
605629 float endpoint_weight = powf (math::minimum ((cluster.color_values [3 ].get_luma () - cluster.color_values [0 ].get_luma ()) / 100 .0f , 1 .0f ), 2 .7f );
606630
607631 crnlib::vector<uint>& blocks = cluster.blocks [cColor];
632+ uint blockSize = m_has_subblocks ? 8 : 16 ;
608633 for (uint i = 0 ; i < blocks.size (); i++) {
609634 uint b = blocks[i];
635+ color_quad_u8* pixels = m_has_subblocks ? ((color_quad_u8 (*)[8 ])m_blocks)[b] : m_blocks[b];
610636 uint weight = (uint)(math::clamp<uint>(0x8000 * endpoint_weight * m_block_weights[b] * (m_block_encodings[b] ? 0 .972f : 1 .0f ), 1 , 0xFFFF ));
611637 uint32 selector = 0 ;
612- for (uint p = 0 ; p < 8 ; p++) {
638+ for (uint p = 0 ; p < blockSize ; p++) {
613639 uint error_best = cUINT32_MAX;
614640 uint8 s_best = 0 ;
615641 for (uint8 s = 0 ; s < 4 ; s++) {
616- uint error = color::color_distance (m_params.m_perceptual , (( color_quad_u8 (*)[ 8 ])m_blocks)[b] [p], cluster.color_values [s], false );
642+ uint error = color::color_distance (m_params.m_perceptual , pixels [p], cluster.color_values [s], false );
617643 if (error < error_best) {
618644 s_best = s;
619645 error_best = error;
620646 }
621647 }
622648 selector = selector << 2 | s_best;
623649 }
624- m_block_selectors[cColor][b] = (uint64)selector << ((b & 1 ) ? 32 : 48 ) | weight;
650+ m_block_selectors[cColor][b] = (uint64)selector << (!m_has_subblocks || (b & 1 ) ? 32 : 48 ) | weight;
625651 }
626652 }
627653 }
@@ -731,7 +757,7 @@ void dxt_hc::determine_color_endpoints() {
731757 uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices [cColor];
732758 m_endpoint_indices[b].component [cColor] = cluster_index;
733759 m_color_clusters[cluster_index].blocks [cColor].push_back (b);
734- if (m_has_etc_color_blocks && m_endpoint_indices[b].reference && cluster_index == m_endpoint_indices[b - 1 ].component [cColor]) {
760+ if (m_has_subblocks && m_endpoint_indices[b].reference && cluster_index == m_endpoint_indices[b - 1 ].component [cColor]) {
735761 if (m_endpoint_indices[b].reference >> 1 ) {
736762 color_quad_u8 mirror[16 ];
737763 for (uint p = 0 ; p < 16 ; p++)
@@ -808,7 +834,7 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void*) {
808834 uint8 s_best = 0 ;
809835 for (uint8 t = 0 ; t < 8 ; t++) {
810836 uint8 s = m_has_etc_color_blocks ? t : results.m_reordered ? 7 - g_dxt5_to_linear[t] : g_dxt5_to_linear[t];
811- int delta = m_blocks[m_has_etc_color_blocks ? b >> 1 : b][p][component_index] - alpha_values[s];
837+ int delta = m_blocks[m_has_subblocks ? b >> 1 : b][p][component_index] - alpha_values[s];
812838 uint error = delta >= 0 ? delta : -delta;
813839 if (error < error_best) {
814840 s_best = s;
@@ -946,7 +972,7 @@ void dxt_hc::determine_alpha_endpoints() {
946972 for (uint a = 0 ; a < m_num_alpha_blocks; a++) {
947973 uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices [cAlpha0 + a];
948974 m_endpoint_indices[b].component [cAlpha0 + a] = cluster_index;
949- if (!(m_has_etc_color_blocks && b & 1 ))
975+ if (!(m_has_subblocks && b & 1 ))
950976 m_alpha_clusters[cluster_index].blocks [cAlpha0 + a].push_back (b);
951977 }
952978 }
@@ -968,12 +994,12 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) {
968994 uint E2 [16 ][4 ];
969995 uint E4 [8 ][16 ];
970996 uint E8 [4 ][256 ];
971- for (uint n = m_has_etc_color_blocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1 ) / num_tasks; b < bEnd; b++) {
997+ for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1 ) / num_tasks; b < bEnd; b++) {
972998 color_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color ];
973999 color_quad_u8* endpoint_colors = cluster.color_values ;
9741000 for (uint p = 0 ; p < 16 ; p++) {
9751001 for (uint s = 0 ; s < 4 ; s++)
976- E2 [p][s] = m_has_etc_color_blocks ? color::color_distance (m_params.m_perceptual , m_blocks[b][p], m_color_clusters[m_endpoint_indices[b << 1 | p >> 3 ].color ].color_values [s], false ) :
1002+ E2 [p][s] = m_has_subblocks ? color::color_distance (m_params.m_perceptual , m_blocks[b][p], m_color_clusters[m_endpoint_indices[b << 1 | p >> 3 ].color ].color_values [s], false ) :
9771003 color::color_distance (m_params.m_perceptual , m_blocks[b][p], endpoint_colors[s], false );
9781004 }
9791005 for (uint p = 0 ; p < 8 ; p++) {
@@ -999,7 +1025,7 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) {
9991025 total_errors[p][s] += E2 [p][s];
10001026 }
10011027 selector_details[best_index].used = true ;
1002- m_selector_indices[m_has_etc_color_blocks ? b << 1 : b].color = best_index;
1028+ m_selector_indices[m_has_subblocks ? b << 1 : b].color = best_index;
10031029 }
10041030}
10051031
@@ -1012,9 +1038,9 @@ struct SelectorNode {
10121038
10131039void dxt_hc::create_color_selector_codebook () {
10141040 uint num_tasks = m_pTask_pool->get_num_threads () + 1 ;
1015- crnlib::vector<uint64> selectors (m_has_etc_color_blocks ? m_num_blocks >> 1 : m_num_blocks);
1016- for (uint i = 0 , b = 0 , step = m_has_etc_color_blocks ? 2 : 1 ; b < m_num_blocks; b += step)
1017- selectors[i++] = m_block_selectors[cColor][b] + (m_has_etc_color_blocks ? m_block_selectors[cColor][b + 1 ] : 0 );
1041+ crnlib::vector<uint64> selectors (m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks);
1042+ for (uint i = 0 , b = 0 , step = m_has_subblocks ? 2 : 1 ; b < m_num_blocks; b += step)
1043+ selectors[i++] = m_block_selectors[cColor][b] + (m_has_subblocks ? m_block_selectors[cColor][b + 1 ] : 0 );
10181044
10191045 crnlib::vector<SelectorNode> nodes;
10201046 SelectorNode node (0 , selectors.get_ptr ());
@@ -1115,10 +1141,10 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) {
11151141 uint num_tasks = m_pTask_pool->get_num_threads () + 1 ;
11161142 uint E3 [16 ][8 ];
11171143 uint E6 [8 ][64 ];
1118- for (uint n = m_has_etc_color_blocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1 ) / num_tasks; b < bEnd; b++) {
1144+ for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1 ) / num_tasks; b < bEnd; b++) {
11191145 for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) {
11201146 const uint alpha_pixel_comp = m_params.m_alpha_component_indices [c - cAlpha0];
1121- alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[m_has_etc_color_blocks ? b << 1 : b].component [c]];
1147+ alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[m_has_subblocks ? b << 1 : b].component [c]];
11221148 uint* block_values = cluster.alpha_values ;
11231149 for (uint p = 0 ; p < 16 ; p++) {
11241150 for (uint s = 0 ; s < 8 ; s++) {
@@ -1161,16 +1187,16 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) {
11611187 total_errors[p][s] += E3 [p][s];
11621188 }
11631189 selector_details[best_index].used = true ;
1164- m_selector_indices[m_has_etc_color_blocks ? b << 1 : b].component [c] = best_index;
1190+ m_selector_indices[m_has_subblocks ? b << 1 : b].component [c] = best_index;
11651191 }
11661192 }
11671193}
11681194
11691195void dxt_hc::create_alpha_selector_codebook () {
11701196 uint num_tasks = m_pTask_pool->get_num_threads () + 1 ;
1171- crnlib::vector<uint64> selectors (m_num_alpha_blocks * (m_has_etc_color_blocks ? m_num_blocks >> 1 : m_num_blocks));
1197+ crnlib::vector<uint64> selectors (m_num_alpha_blocks * (m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks));
11721198 for (uint i = 0 , c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) {
1173- for (uint b = 0 , step = m_has_etc_color_blocks ? 2 : 1 ; b < m_num_blocks; b += step)
1199+ for (uint b = 0 , step = m_has_subblocks ? 2 : 1 ; b < m_num_blocks; b += step)
11741200 selectors[i++] = m_block_selectors[c][b];
11751201 }
11761202
0 commit comments