From 1d27a277291d482e8bd4e06ba935745a36436e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Wed, 5 Nov 2025 19:46:38 +0100 Subject: [PATCH 1/3] Flux dype --- flux.hpp | 24 ++++++- rope.hpp | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 212 insertions(+), 4 deletions(-) diff --git a/flux.hpp b/flux.hpp index 95927f8b..469c8bc8 100644 --- a/flux.hpp +++ b/flux.hpp @@ -1266,7 +1266,8 @@ namespace Flux { set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data()); } y = to_backend(y); - + float current_timestep = ggml_get_f32_1d(timesteps, 0); + LOG_DEBUG("current_timestep %f", current_timestep); timesteps = to_backend(timesteps); if (flux_params.guidance_embed || flux_params.is_chroma) { guidance = to_backend(guidance); @@ -1275,6 +1276,22 @@ namespace Flux { ref_latents[i] = to_backend(ref_latents[i]); } + // get use_yarn and use_dype from env for now (TODO: add args) + bool use_yarn = false; + bool use_dype = false; + char* use_yarn_env = getenv("USE_YARN"); + if (use_yarn_env != nullptr) { + if (strcmp(use_yarn_env, "OFF") != 0) { + use_yarn = true; + char* use_dype_env = getenv("USE_DYPE"); + if (use_dype_env != nullptr) { + if (strcmp(use_dype_env, "OFF") != 0) { + use_dype = true; + } + } + } + } + pe_vec = Rope::gen_flux_pe(x->ne[1], x->ne[0], flux_params.patch_size, @@ -1283,7 +1300,10 @@ namespace Flux { ref_latents, increase_ref_index, flux_params.theta, - flux_params.axes_dim); + flux_params.axes_dim, + use_yarn, + use_dype, + current_timestep); int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2; // LOG_DEBUG("pos_len %d", pos_len); auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len); diff --git a/rope.hpp b/rope.hpp index bd1dfad5..ae0c8d5a 100644 --- a/rope.hpp +++ b/rope.hpp @@ -71,6 +71,138 @@ namespace Rope { return result; } + float find_correction_factor(float num_rotations, int dim, float base, float max_position_embeddings) { + return (dim * std::log(max_position_embeddings / (num_rotations * 2 * 3.14159265358979323846))) / (2 * std::log(base)); + } + + std::pair find_correction_range(float low_ratio, float high_ratio, int dim, float base, float ori_max_pe_len) { + float low = std::floor(find_correction_factor(low_ratio, dim, base, ori_max_pe_len)); + float high = std::ceil(find_correction_factor(high_ratio, dim, base, ori_max_pe_len)); + return {std::max(0, static_cast(low)), std::min(dim - 1, static_cast(high))}; + } + + std::vector linear_ramp_mask(int min, int max, int dim) { + if (min == max) { + max += 0.001f; // Prevent singularity + } + std::vector ramp(dim); + for (int i = 0; i < dim; ++i) { + ramp[i] = std::max(0.0f, std::min(1.0f, static_cast(i - min) / (max - min))); + } + return ramp; + } + + float find_newbase_ntk(int dim, float base, float scale) { + return base * std::pow(scale, static_cast(dim) / (dim - 2)); + } + + __STATIC_INLINE__ std::vector> rope_ext( + const std::vector& pos, + int dim, + float theta = 10000.0f, + bool use_real = false, + float linear_factor = 1.0f, + float ntk_factor = 1.0f, + bool repeat_interleave_real = true, + bool yarn = false, + int max_pe_len = -1, + int ori_max_pe_len = 64, + bool dype = false, + float current_timestep = 1.0f) { + assert(dim % 2 == 0); + int half_dim = dim / 2; + + // Compute scale for YARN + float scale = 1.0f; + if (yarn && max_pe_len > ori_max_pe_len) { + scale = std::max(1.0f, static_cast(max_pe_len) / ori_max_pe_len); + } + + // Compute frequencies + std::vector freqs_base(half_dim); + std::vector freqs_linear(half_dim); + std::vector freqs_ntk(half_dim); + std::vector freqs(half_dim); + + for (int i = 0; i < half_dim; ++i) { + float exponent = static_cast(i) / half_dim; + freqs_base[i] = 1.0f / std::pow(theta, exponent); + if (yarn && max_pe_len > ori_max_pe_len) { + freqs_linear[i] = 1.0f / std::pow(theta, exponent) / scale; + float new_base = 1.0f / std::pow(theta, exponent / scale); // Simplified for YARN + freqs_ntk[i] = 1.0f / std::pow(new_base, exponent); + } + } + + // YARN interpolation + if (yarn && max_pe_len > ori_max_pe_len) { + float beta_0 = 1.25f; + float beta_1 = 0.75f; + float gamma_0 = 16.0f; + float gamma_1 = 2.0f; + + if (dype) { + beta_0 = std::pow(beta_0, 2.0f * current_timestep * current_timestep); + beta_1 = std::pow(beta_1, 2.0f * current_timestep * current_timestep); + gamma_0 = std::pow(gamma_0, 2.0f * current_timestep * current_timestep); + gamma_1 = std::pow(gamma_1, 2.0f * current_timestep * current_timestep); + } + + // Compute freqs_linear and freqs_ntk + for (int i = 0; i < half_dim; ++i) { + float exponent = static_cast(i) / half_dim; + freqs_linear[i] = 1.0f / (std::pow(theta, exponent) * scale); + } + + float new_base = find_newbase_ntk(dim, theta, scale); + for (int i = 0; i < half_dim; ++i) { + float exponent = static_cast(i) / half_dim; + freqs_ntk[i] = 1.0f / std::pow(new_base, exponent); + } + + // Apply correction range and linear ramp mask + auto [low, high] = find_correction_range(beta_0, beta_1, dim, theta, ori_max_pe_len); + auto mask = linear_ramp_mask(low, high, half_dim); + for (int i = 0; i < half_dim; ++i) { + freqs[i] = freqs_linear[i] * (1.0f - mask[i]) + freqs_ntk[i] * mask[i]; + } + + // Apply gamma correction + auto [low_gamma, high_gamma] = find_correction_range(gamma_0, gamma_1, dim, theta, ori_max_pe_len); + auto mask_gamma = linear_ramp_mask(low_gamma, high_gamma, half_dim); + for (int i = 0; i < half_dim; ++i) { + freqs[i] = freqs[i] * (1.0f - mask_gamma[i]) + freqs_base[i] * mask_gamma[i]; + } + } else { + float theta_ntk = theta * ntk_factor; + for (int i = 0; i < half_dim; ++i) { + float exponent = static_cast(i) / half_dim; + freqs[i] = 1.0f / std::pow(theta_ntk, exponent) / linear_factor; + } + } + + // Outer product of pos and freqs + std::vector> freqs_outer(pos.size(), std::vector(half_dim)); + for (size_t i = 0; i < pos.size(); ++i) { + for (int j = 0; j < half_dim; ++j) { + freqs_outer[i][j] = pos[i] * freqs[j]; + } + } + + std::vector> result; + result.resize(pos.size(), std::vector(half_dim * 4)); + for (size_t i = 0; i < pos.size(); ++i) { + for (int j = 0; j < half_dim; ++j) { + result[i][4 * j] = std::cos(freqs_outer[i][j]); // cos + result[i][4 * j + 1] = -std::sin(freqs_outer[i][j]); // -sin + result[i][4 * j + 2] = std::sin(freqs_outer[i][j]); // sin + result[i][4 * j + 3] = std::cos(freqs_outer[i][j]); // cos + } + } + + return result; + } + // Generate IDs for image patches and text __STATIC_INLINE__ std::vector> gen_txt_ids(int bs, int context_len) { return std::vector>(bs * context_len, std::vector(3, 0.0)); @@ -151,6 +283,45 @@ namespace Rope { return flatten(emb); } + std::vector embed_nd_ext( + const std::vector>& ids, + int bs, + float theta, + const std::vector& axes_dim, + bool yarn = false, + int max_pe_len = -1, + int ori_max_pe_len = 64, + bool dype = false, + float current_timestep = 1.0f) { + std::vector> trans_ids = transpose(ids); + size_t pos_len = ids.size() / bs; + int num_axes = axes_dim.size(); + + int emb_dim = 0; + for (int d : axes_dim) { + emb_dim += d; + } + + std::vector> emb(bs * pos_len, std::vector(emb_dim * 2, 0.0f)); + int offset = 0; + + for (int i = 0; i < num_axes; ++i) { + std::vector> rope_emb = rope_ext( + trans_ids[i], axes_dim[i], theta, false, 1.0f, 1.0f, true, yarn, max_pe_len, ori_max_pe_len, dype, current_timestep); + + for (int b = 0; b < bs; ++b) { + for (size_t j = 0; j < pos_len; ++j) { + for (size_t k = 0; k < rope_emb[j].size(); ++k) { + emb[b * pos_len + j][offset + k] = rope_emb[j][k]; + } + } + } + offset += static_cast(axes_dim[i] * 2); + } + + return flatten(emb); + } + __STATIC_INLINE__ std::vector> gen_refs_ids(int patch_size, int bs, const std::vector& ref_latents, @@ -210,9 +381,26 @@ namespace Rope { const std::vector& ref_latents, bool increase_ref_index, int theta, - const std::vector& axes_dim) { + const std::vector& axes_dim, + bool use_yarn = false, + bool use_dype = false, + float current_timestep = 1.0f) { + const int base_patches = 1024 / 16; std::vector> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index); - return embed_nd(ids, bs, theta, axes_dim); + float max_pos_f = 0.0f; + for (const auto& row : ids) { + for (float val : row) { + if (val > max_pos_f) { + max_pos_f = val; + } + } + } + int max_pos = static_cast(max_pos_f) + 1; + if (use_yarn && max_pos > base_patches) { + return embed_nd_ext(ids, bs, theta, axes_dim, true, max_pos, base_patches, use_dype, current_timestep); + } else { + return embed_nd(ids, bs, theta, axes_dim); + } } __STATIC_INLINE__ std::vector> gen_qwen_image_ids(int h, From 5e6c77ea6f58fa6228decb856a342df322bca64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Thu, 6 Nov 2025 19:50:54 +0100 Subject: [PATCH 2/3] Working dype + NTK --- flux.hpp | 33 +++++++++++------- rope.hpp | 101 ++++++++++++++++++++++++++++--------------------------- 2 files changed, 73 insertions(+), 61 deletions(-) diff --git a/flux.hpp b/flux.hpp index 469c8bc8..bea12a11 100644 --- a/flux.hpp +++ b/flux.hpp @@ -1276,19 +1276,27 @@ namespace Flux { ref_latents[i] = to_backend(ref_latents[i]); } - // get use_yarn and use_dype from env for now (TODO: add args) - bool use_yarn = false; - bool use_dype = false; - char* use_yarn_env = getenv("USE_YARN"); - if (use_yarn_env != nullptr) { - if (strcmp(use_yarn_env, "OFF") != 0) { + // get use_yarn, use_ntk and use_dype from env for now (TODO: add args) + // Env value could be one of yarn, dy_yarn, ntk or dy_ntk, (anything else means disabled) + const char* env_value = getenv("FLUX_ROPE"); + bool use_yarn = false; + bool use_dype = false; + bool use_ntk = false; + if (env_value != nullptr) { + if (strcmp(env_value, "YARN") == 0) { + LOG_DEBUG("Using YARN RoPE"); use_yarn = true; - char* use_dype_env = getenv("USE_DYPE"); - if (use_dype_env != nullptr) { - if (strcmp(use_dype_env, "OFF") != 0) { - use_dype = true; - } - } + } else if (strcmp(env_value, "DY_YARN") == 0) { + LOG_DEBUG("Using DY YARN RoPE"); + use_yarn = true; + use_dype = true; + } else if (strcmp(env_value, "NTK") == 0) { + LOG_DEBUG("Using NTK RoPE"); + use_ntk = true; + } else if (strcmp(env_value, "DY_NTK") == 0) { + LOG_DEBUG("Using DY NTK RoPE"); + use_ntk = true; + use_dype = true; } } @@ -1303,6 +1311,7 @@ namespace Flux { flux_params.axes_dim, use_yarn, use_dype, + use_ntk, current_timestep); int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2; // LOG_DEBUG("pos_len %d", pos_len); diff --git a/rope.hpp b/rope.hpp index ae0c8d5a..bda4bbee 100644 --- a/rope.hpp +++ b/rope.hpp @@ -78,7 +78,7 @@ namespace Rope { std::pair find_correction_range(float low_ratio, float high_ratio, int dim, float base, float ori_max_pe_len) { float low = std::floor(find_correction_factor(low_ratio, dim, base, ori_max_pe_len)); float high = std::ceil(find_correction_factor(high_ratio, dim, base, ori_max_pe_len)); - return {std::max(0, static_cast(low)), std::min(dim - 1, static_cast(high))}; + return {std::max(0, static_cast(low)), std::min(dim / 2, static_cast(high))}; } std::vector linear_ramp_mask(int min, int max, int dim) { @@ -92,10 +92,6 @@ namespace Rope { return ramp; } - float find_newbase_ntk(int dim, float base, float scale) { - return base * std::pow(scale, static_cast(dim) / (dim - 2)); - } - __STATIC_INLINE__ std::vector> rope_ext( const std::vector& pos, int dim, @@ -112,35 +108,28 @@ namespace Rope { assert(dim % 2 == 0); int half_dim = dim / 2; - // Compute scale for YARN - float scale = 1.0f; - if (yarn && max_pe_len > ori_max_pe_len) { - scale = std::max(1.0f, static_cast(max_pe_len) / ori_max_pe_len); - } - // Compute frequencies std::vector freqs_base(half_dim); std::vector freqs_linear(half_dim); std::vector freqs_ntk(half_dim); std::vector freqs(half_dim); - for (int i = 0; i < half_dim; ++i) { - float exponent = static_cast(i) / half_dim; - freqs_base[i] = 1.0f / std::pow(theta, exponent); - if (yarn && max_pe_len > ori_max_pe_len) { - freqs_linear[i] = 1.0f / std::pow(theta, exponent) / scale; - float new_base = 1.0f / std::pow(theta, exponent / scale); // Simplified for YARN - freqs_ntk[i] = 1.0f / std::pow(new_base, exponent); - } - } - - // YARN interpolation if (yarn && max_pe_len > ori_max_pe_len) { float beta_0 = 1.25f; float beta_1 = 0.75f; float gamma_0 = 16.0f; float gamma_1 = 2.0f; + float scale = std::max(1.0f, static_cast(max_pe_len) / ori_max_pe_len); + // d,t,s + float new_base = theta * std::pow(scale, half_dim / (half_dim - 1)); + for (int i = 0; i < half_dim; ++i) { + float exponent = static_cast(i) / half_dim; + freqs_base[i] = 1.0f / std::pow(theta, exponent); + freqs_linear[i] = 1.0f / (scale * std::pow(theta, exponent)); + freqs_ntk[i] = 1.0f / std::pow(new_base, exponent); + } + if (dype) { beta_0 = std::pow(beta_0, 2.0f * current_timestep * current_timestep); beta_1 = std::pow(beta_1, 2.0f * current_timestep * current_timestep); @@ -148,30 +137,18 @@ namespace Rope { gamma_1 = std::pow(gamma_1, 2.0f * current_timestep * current_timestep); } - // Compute freqs_linear and freqs_ntk - for (int i = 0; i < half_dim; ++i) { - float exponent = static_cast(i) / half_dim; - freqs_linear[i] = 1.0f / (std::pow(theta, exponent) * scale); - } - - float new_base = find_newbase_ntk(dim, theta, scale); - for (int i = 0; i < half_dim; ++i) { - float exponent = static_cast(i) / half_dim; - freqs_ntk[i] = 1.0f / std::pow(new_base, exponent); - } - // Apply correction range and linear ramp mask auto [low, high] = find_correction_range(beta_0, beta_1, dim, theta, ori_max_pe_len); auto mask = linear_ramp_mask(low, high, half_dim); for (int i = 0; i < half_dim; ++i) { - freqs[i] = freqs_linear[i] * (1.0f - mask[i]) + freqs_ntk[i] * mask[i]; + freqs[i] = freqs_linear[i] * mask[i] + freqs_ntk[i] * (1.0f - mask[i]); } // Apply gamma correction auto [low_gamma, high_gamma] = find_correction_range(gamma_0, gamma_1, dim, theta, ori_max_pe_len); auto mask_gamma = linear_ramp_mask(low_gamma, high_gamma, half_dim); for (int i = 0; i < half_dim; ++i) { - freqs[i] = freqs[i] * (1.0f - mask_gamma[i]) + freqs_base[i] * mask_gamma[i]; + freqs[i] = freqs[i] * mask_gamma[i] + freqs_base[i] * (1.0f - mask_gamma[i]); } } else { float theta_ntk = theta * ntk_factor; @@ -288,15 +265,23 @@ namespace Rope { int bs, float theta, const std::vector& axes_dim, - bool yarn = false, - int max_pe_len = -1, - int ori_max_pe_len = 64, - bool dype = false, - float current_timestep = 1.0f) { + bool yarn = false, + std::vector max_pe_len = {}, + int ori_max_pe_len = 64, + bool dype = false, + float current_timestep = 1.0f, + std::vector ntk_factors = {}) { std::vector> trans_ids = transpose(ids); size_t pos_len = ids.size() / bs; int num_axes = axes_dim.size(); + if (ntk_factors.size() == 0) { + ntk_factors = std::vector(num_axes, 1.0f); + } + if (max_pe_len.size() == 0) { + max_pe_len = std::vector(num_axes, -1); + } + int emb_dim = 0; for (int d : axes_dim) { emb_dim += d; @@ -307,7 +292,7 @@ namespace Rope { for (int i = 0; i < num_axes; ++i) { std::vector> rope_emb = rope_ext( - trans_ids[i], axes_dim[i], theta, false, 1.0f, 1.0f, true, yarn, max_pe_len, ori_max_pe_len, dype, current_timestep); + trans_ids[i], axes_dim[i], theta, false, 1.0f, ntk_factors[i], true, yarn, max_pe_len[i], ori_max_pe_len, dype, current_timestep); for (int b = 0; b < bs; ++b) { for (size_t j = 0; j < pos_len; ++j) { @@ -384,20 +369,38 @@ namespace Rope { const std::vector& axes_dim, bool use_yarn = false, bool use_dype = false, + bool use_ntk = false, float current_timestep = 1.0f) { - const int base_patches = 1024 / 16; + int base_resolution = 1024; + // set it via environment variable for now (TODO: arg) + const char* env_base_resolution = getenv("FLUX_DYPE_BASE_RESOLUTION"); + if (env_base_resolution != nullptr) { + base_resolution = atoi(env_base_resolution); + } + int base_patches = base_resolution / 16; std::vector> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index); - float max_pos_f = 0.0f; - for (const auto& row : ids) { - for (float val : row) { + std::vector max_pos_vec = {}; + std::vector ntk_factor_vec = {}; + for (int i = 0; i < axes_dim.size(); i++) { + float max_pos_f = 0.0f; + for (const auto& row : ids) { + float val = row[i]; if (val > max_pos_f) { max_pos_f = val; } } + int max_pos = static_cast(max_pos_f) + 1; + max_pos_vec.push_back(max_pos); + float ntk_factor = 1.0f; + if (use_ntk) { + float base_ntk = pow((float)max_pos / base_patches, (float)axes_dim[i] / (axes_dim[i] - 2)); + ntk_factor = use_dype ? pow(base_ntk, 2.0f * current_timestep * current_timestep) : base_ntk; + ntk_factor = std::max(1.0f, ntk_factor); + } + ntk_factor_vec.push_back(ntk_factor); } - int max_pos = static_cast(max_pos_f) + 1; - if (use_yarn && max_pos > base_patches) { - return embed_nd_ext(ids, bs, theta, axes_dim, true, max_pos, base_patches, use_dype, current_timestep); + if (use_yarn || use_ntk) { + return embed_nd_ext(ids, bs, theta, axes_dim, use_yarn, max_pos_vec, base_patches, use_dype, current_timestep, ntk_factor_vec); } else { return embed_nd(ids, bs, theta, axes_dim); } From 795ad6732b7774e63b8cbaee9fc502aef9015623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 9 Nov 2025 01:24:41 +0100 Subject: [PATCH 3/3] base_resolution with desired aspect ratio --- rope.hpp | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/rope.hpp b/rope.hpp index bda4bbee..7d91d6b5 100644 --- a/rope.hpp +++ b/rope.hpp @@ -265,12 +265,12 @@ namespace Rope { int bs, float theta, const std::vector& axes_dim, - bool yarn = false, - std::vector max_pe_len = {}, - int ori_max_pe_len = 64, - bool dype = false, - float current_timestep = 1.0f, - std::vector ntk_factors = {}) { + bool yarn = false, + std::vector max_pe_len = {}, + std::vector ori_max_pe_len = {64, 64, 64}, + bool dype = false, + float current_timestep = 1.0f, + std::vector ntk_factors = {}) { std::vector> trans_ids = transpose(ids); size_t pos_len = ids.size() / bs; int num_axes = axes_dim.size(); @@ -292,7 +292,7 @@ namespace Rope { for (int i = 0; i < num_axes; ++i) { std::vector> rope_emb = rope_ext( - trans_ids[i], axes_dim[i], theta, false, 1.0f, ntk_factors[i], true, yarn, max_pe_len[i], ori_max_pe_len, dype, current_timestep); + trans_ids[i], axes_dim[i], theta, false, 1.0f, ntk_factors[i], true, yarn, max_pe_len[i], ori_max_pe_len[i], dype, current_timestep); for (int b = 0; b < bs; ++b) { for (size_t j = 0; j < pos_len; ++j) { @@ -372,12 +372,31 @@ namespace Rope { bool use_ntk = false, float current_timestep = 1.0f) { int base_resolution = 1024; + int base_patches_H = -1; + int base_patches_W = -1; + // set it via environment variable for now (TODO: arg) + // could be either a single integer, or WxH const char* env_base_resolution = getenv("FLUX_DYPE_BASE_RESOLUTION"); if (env_base_resolution != nullptr) { - base_resolution = atoi(env_base_resolution); + if (strchr(env_base_resolution, 'x') != nullptr) { + const char* x_pos = strchr(env_base_resolution, 'x'); + base_patches_H = atoi(x_pos + 1) / 16; + base_patches_W = atoi(env_base_resolution) / 16; + } else { + base_resolution = atoi(env_base_resolution); + } } - int base_patches = base_resolution / 16; + // preserve aspect ratio of the input image + // base_patches_W = k*w, base_patches_H = k*h, base_patches_W*base_patches_H = base_resolution^2 + // => k = base_resolution / sqrt(w*h) + if (base_patches_H == -1) + base_patches_H = (base_resolution * h * sqrt(1.0f / (w * h))) / 16; + if (base_patches_W == -1) + base_patches_W = (base_resolution * w * sqrt(1.0f / (w * h))) / 16; + + // First dim is ref image, should not need any weird rope modifications since the max pos should stay very low. 1024 is a lot + std::vector base_patches = {1024, base_patches_H, base_patches_W}; std::vector> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index); std::vector max_pos_vec = {}; std::vector ntk_factor_vec = {}; @@ -393,7 +412,7 @@ namespace Rope { max_pos_vec.push_back(max_pos); float ntk_factor = 1.0f; if (use_ntk) { - float base_ntk = pow((float)max_pos / base_patches, (float)axes_dim[i] / (axes_dim[i] - 2)); + float base_ntk = pow((float)max_pos / base_patches[i], (float)axes_dim[i] / (axes_dim[i] - 2)); ntk_factor = use_dype ? pow(base_ntk, 2.0f * current_timestep * current_timestep) : base_ntk; ntk_factor = std::max(1.0f, ntk_factor); }