From db6334401ecde289f05dac57240c62a3c1ff6266 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 19 Sep 2025 12:56:27 +0000 Subject: [PATCH 1/9] validate nodename groups --- environments/.stackhpc/tofu/main.tf | 2 +- environments/site/tofu/variables.tf | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf index 22113cdb9..649f2f7cf 100644 --- a/environments/.stackhpc/tofu/main.tf +++ b/environments/.stackhpc/tofu/main.tf @@ -76,7 +76,7 @@ module "cluster" { control_node_flavor = var.control_node_flavor login = { - login = { + head = { nodes = ["login-0"] flavor = var.other_node_flavor } diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 98f364a9a..3e2962aa6 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -52,7 +52,7 @@ variable "login" { be useful for e.g. separating nodes for ssh and Open Ondemand usage, or to define login nodes with different capabilities such as high-memory. - Keys are names of groups. + Keys are names of groups. Keys cannot be 'login', 'compute', or 'control'. Values are a mapping as follows: Required: @@ -88,6 +88,12 @@ variable "login" { EOF type = any + validation { + condition = length(setintersection(keys(var.login), ["login", "compute", "control"])) == 0 + error_message = <<-EOF + Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login keys: ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}. + EOF + } } variable "cluster_image_id" { @@ -101,7 +107,7 @@ variable "compute" { Mapping defining homogenous groups of compute nodes. Groups are used in Slurm partition definitions. - Keys are names of groups. + Keys are names of groups. Keys cannot be 'compute', 'login', 'control' or 'default'. Values are a mapping as follows: Required: @@ -139,6 +145,12 @@ variable "compute" { EOF type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings + validation { + condition = length(setintersection(keys(var.compute), ["login", "compute", "control", "default"])) == 0 + error_message = <<-EOF + Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute keys: ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}. + EOF + } } # tflint-ignore: terraform_typed_variables From 49a248a742efb2ed67e5411b0bcee8d9a155c204 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 19 Sep 2025 13:07:13 +0000 Subject: [PATCH 2/9] add validation for nodegroup name clashes --- environments/site/tofu/variables.tf | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 3e2962aa6..445d39aa0 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -91,7 +91,7 @@ variable "login" { validation { condition = length(setintersection(keys(var.login), ["login", "compute", "control"])) == 0 error_message = <<-EOF - Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login keys: ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}. + Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login key(s): ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}. EOF } } @@ -148,7 +148,13 @@ variable "compute" { validation { condition = length(setintersection(keys(var.compute), ["login", "compute", "control", "default"])) == 0 error_message = <<-EOF - Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute keys: ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}. + Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}. + EOF + } + validation { + condition = length(setintersection(keys(var.compute), keys(var.login))) == 0 + error_message = <<-EOF + Compute and login nodegroups cannot have the same name. Invalid var.compute/var.login key(s): ${join(", ", setintersection(keys(var.compute), keys(var.login)))} EOF } } From 77bf720f0b7bf878df530a294e5b408373c441ff Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 19 Sep 2025 13:07:55 +0000 Subject: [PATCH 3/9] add validation for nodegroup name clashes --- environments/site/tofu/variables.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 445d39aa0..a3aa94931 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -107,7 +107,8 @@ variable "compute" { Mapping defining homogenous groups of compute nodes. Groups are used in Slurm partition definitions. - Keys are names of groups. Keys cannot be 'compute', 'login', 'control' or 'default'. + Keys are names of groups. Keys cannot be 'compute', 'login', 'control' or 'default' + or be the same as keys in the login variable. Values are a mapping as follows: Required: From 5eff3c44f7bae5c3d9386ad836bc366e939292ef Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 19 Sep 2025 14:05:56 +0000 Subject: [PATCH 4/9] fix linter whinges --- environments/.stackhpc/tofu/ARCUS.tfvars | 6 +++--- environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars | 14 +++++++------- environments/.stackhpc/tofu/LEAFCLOUD.tfvars | 14 +++++++------- environments/.stackhpc/tofu/SMS.tfvars | 10 +++++----- environments/site/tofu/variables.tf | 6 +++--- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/environments/.stackhpc/tofu/ARCUS.tfvars b/environments/.stackhpc/tofu/ARCUS.tfvars index 6aec599ef..40daa516b 100644 --- a/environments/.stackhpc/tofu/ARCUS.tfvars +++ b/environments/.stackhpc/tofu/ARCUS.tfvars @@ -1,4 +1,4 @@ -cluster_net = "portal-internal" -cluster_subnet = "portal-internal" +cluster_net = "portal-internal" +cluster_subnet = "portal-internal" control_node_flavor = "vm.ska.cpu.general.eighth" -other_node_flavor = "vm.ska.cpu.general.small" +other_node_flavor = "vm.ska.cpu.general.small" diff --git a/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars index 82e336dd8..b45a961a3 100644 --- a/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars +++ b/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars @@ -1,10 +1,10 @@ cluster_networks = [ - { - network = "stackhpc-dev" - subnet = "stackhpc-dev" - } + { + network = "stackhpc-dev" + subnet = "stackhpc-dev" + } ] control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment -other_node_flavor = "en1.xsmall" -state_volume_type = "unencrypted" -home_volume_type = "unencrypted" +other_node_flavor = "en1.xsmall" +state_volume_type = "unencrypted" +home_volume_type = "unencrypted" diff --git a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars index 135aadc64..601910aba 100644 --- a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars +++ b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars @@ -1,10 +1,10 @@ cluster_networks = [ - { - network = "slurmapp-ci" - subnet = "slurmapp-ci" - } + { + network = "slurmapp-ci" + subnet = "slurmapp-ci" + } ] control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment -other_node_flavor = "en1.xsmall" -state_volume_type = "unencrypted" -home_volume_type = "unencrypted" +other_node_flavor = "en1.xsmall" +state_volume_type = "unencrypted" +home_volume_type = "unencrypted" diff --git a/environments/.stackhpc/tofu/SMS.tfvars b/environments/.stackhpc/tofu/SMS.tfvars index 808821bc0..6d14fc215 100644 --- a/environments/.stackhpc/tofu/SMS.tfvars +++ b/environments/.stackhpc/tofu/SMS.tfvars @@ -1,8 +1,8 @@ cluster_networks = [ - { - network = "stackhpc-ipv4-geneve" - subnet = "stackhpc-ipv4-geneve-subnet" - } + { + network = "stackhpc-ipv4-geneve" + subnet = "stackhpc-ipv4-geneve-subnet" + } ] control_node_flavor = "general.v1.small" -other_node_flavor = "general.v1.small" \ No newline at end of file +other_node_flavor = "general.v1.small" \ No newline at end of file diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index a3aa94931..12a89e3f5 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -89,7 +89,7 @@ variable "login" { type = any validation { - condition = length(setintersection(keys(var.login), ["login", "compute", "control"])) == 0 + condition = length(setintersection(keys(var.login), ["login", "compute", "control"])) == 0 error_message = <<-EOF Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login key(s): ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}. EOF @@ -147,13 +147,13 @@ variable "compute" { type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings validation { - condition = length(setintersection(keys(var.compute), ["login", "compute", "control", "default"])) == 0 + condition = length(setintersection(keys(var.compute), ["login", "compute", "control", "default"])) == 0 error_message = <<-EOF Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}. EOF } validation { - condition = length(setintersection(keys(var.compute), keys(var.login))) == 0 + condition = length(setintersection(keys(var.compute), keys(var.login))) == 0 error_message = <<-EOF Compute and login nodegroups cannot have the same name. Invalid var.compute/var.login key(s): ${join(", ", setintersection(keys(var.compute), keys(var.login)))} EOF From 061859718a46ff394cd9bdf74b614644b5f9caa8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 23 Sep 2025 08:20:27 +0000 Subject: [PATCH 5/9] extend validation to cover additional_nodegroups --- environments/site/tofu/variables.tf | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 12a89e3f5..b7349ca1b 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -52,7 +52,8 @@ variable "login" { be useful for e.g. separating nodes for ssh and Open Ondemand usage, or to define login nodes with different capabilities such as high-memory. - Keys are names of groups. Keys cannot be 'login', 'compute', or 'control'. + Keys are names of groups, and cannot be 'login', 'compute', 'control', or + keys in the compute or additional_nodegroups variables. Values are a mapping as follows: Required: @@ -107,8 +108,8 @@ variable "compute" { Mapping defining homogenous groups of compute nodes. Groups are used in Slurm partition definitions. - Keys are names of groups. Keys cannot be 'compute', 'login', 'control' or 'default' - or be the same as keys in the login variable. + Keys are names of groups, and cannot be 'compute', 'login', 'control', 'default' + or keys in the login or additional_nodegroups variables. Values are a mapping as follows: Required: @@ -153,9 +154,9 @@ variable "compute" { EOF } validation { - condition = length(setintersection(keys(var.compute), keys(var.login))) == 0 + condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 error_message = <<-EOF - Compute and login nodegroups cannot have the same name. Invalid var.compute/var.login key(s): ${join(", ", setintersection(keys(var.compute), keys(var.login)))} + Compute and additional nodegroups cannot have the same name - var.compute and var.additional_nodegroups have the same keys(s): ${join(", ", setintersection(keys(var.compute), keys(var.additional_nodegroups)))} EOF } } @@ -168,7 +169,8 @@ variable "additional_nodegroups" { These nodes are not in the compute or login inventory groups so they will not run slurmd. - Keys are names of groups. + Keys are names of groups and cannot be 'login', 'compute, 'control', or + keys in the login or additional_nodegroups variables. Values are a mapping as for the "login" variable, with the addition of the optional entry: @@ -181,6 +183,19 @@ variable "additional_nodegroups" { - $cluster_name + '_' + $group_name - 'additional' EOF + type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings + validation { + condition = length(setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"])) == 0 + error_message = <<-EOF + Additional nodegroup names cannot be 'compute', 'login' or 'control'. Invalid var.additional_nodegroups key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control"]))}. + EOF + } + validation { + condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 + error_message = <<-EOF + Additional and login nodegroups cannot have the same name - var.additional_nodegroups and var.login have the same keys(s): ${join(", ", setintersection(keys(var.additional_nodegroups), keys(var.login)))} + EOF + } } variable "environment_root" { From 46dfbfb9ece04d528774ff972854387bb80d628c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 24 Sep 2025 10:10:46 +0000 Subject: [PATCH 6/9] fix TF linting --- environments/site/tofu/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index b7349ca1b..5d94d2a51 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -183,7 +183,7 @@ variable "additional_nodegroups" { - $cluster_name + '_' + $group_name - 'additional' EOF - type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings + type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings validation { condition = length(setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"])) == 0 error_message = <<-EOF From 26dbc7ca30fe30a8a978bfcb17b6dc88ba2647f1 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 24 Sep 2025 10:44:21 +0000 Subject: [PATCH 7/9] fixup logic --- environments/site/tofu/variables.tf | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 5d94d2a51..004fefef0 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -95,6 +95,12 @@ variable "login" { Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login key(s): ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}. EOF } + validation { + condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 + error_message = <<-EOF + Login and compute nodegroups cannot have the same name - var.login and var.compute have the same keys(s): ${join(", ", setintersection(keys(var.login), keys(var.compute)))} + EOF + } } variable "cluster_image_id" { @@ -154,7 +160,7 @@ variable "compute" { EOF } validation { - condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 + condition = length(setintersection(keys(var.compute), keys(var.additional_nodegroups))) == 0 error_message = <<-EOF Compute and additional nodegroups cannot have the same name - var.compute and var.additional_nodegroups have the same keys(s): ${join(", ", setintersection(keys(var.compute), keys(var.additional_nodegroups)))} EOF @@ -191,7 +197,7 @@ variable "additional_nodegroups" { EOF } validation { - condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 + condition = length(setintersection(keys(var.additional_nodegroups), keys(var.login))) == 0 error_message = <<-EOF Additional and login nodegroups cannot have the same name - var.additional_nodegroups and var.login have the same keys(s): ${join(", ", setintersection(keys(var.additional_nodegroups), keys(var.login)))} EOF From 51e8bc26a5d58a00480bee751cf013dd3ce759f5 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 24 Sep 2025 12:19:17 +0000 Subject: [PATCH 8/9] fix logic --- environments/site/tofu/variables.tf | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 004fefef0..3d9c72f6a 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -96,10 +96,8 @@ variable "login" { EOF } validation { - condition = length(setintersection(keys(var.login), keys(var.compute))) == 0 - error_message = <<-EOF - Login and compute nodegroups cannot have the same name - var.login and var.compute have the same keys(s): ${join(", ", setintersection(keys(var.login), keys(var.compute)))} - EOF + condition = length(distinct(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups)))) == length(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups))) + error_message = "Nodegroup names must be unique - variables login, compute and additional nodegroups have one or more keys in common" } } @@ -159,12 +157,6 @@ variable "compute" { Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}. EOF } - validation { - condition = length(setintersection(keys(var.compute), keys(var.additional_nodegroups))) == 0 - error_message = <<-EOF - Compute and additional nodegroups cannot have the same name - var.compute and var.additional_nodegroups have the same keys(s): ${join(", ", setintersection(keys(var.compute), keys(var.additional_nodegroups)))} - EOF - } } # tflint-ignore: terraform_typed_variables @@ -193,13 +185,7 @@ variable "additional_nodegroups" { validation { condition = length(setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"])) == 0 error_message = <<-EOF - Additional nodegroup names cannot be 'compute', 'login' or 'control'. Invalid var.additional_nodegroups key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control"]))}. - EOF - } - validation { - condition = length(setintersection(keys(var.additional_nodegroups), keys(var.login))) == 0 - error_message = <<-EOF - Additional and login nodegroups cannot have the same name - var.additional_nodegroups and var.login have the same keys(s): ${join(", ", setintersection(keys(var.additional_nodegroups), keys(var.login)))} + Additional nodegroup names cannot be 'compute', 'login' or 'control'. Invalid var.additional_nodegroups key(s): ${join(", ", setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"]))}. EOF } } From f4ed140141638c92051f5ce618d16f2ac1186c17 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 24 Sep 2025 12:38:24 +0000 Subject: [PATCH 9/9] fix linter --- environments/site/tofu/variables.tf | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf index 3d9c72f6a..82358b22f 100644 --- a/environments/site/tofu/variables.tf +++ b/environments/site/tofu/variables.tf @@ -96,9 +96,18 @@ variable "login" { EOF } validation { - condition = length(distinct(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups)))) == length(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups))) - error_message = "Nodegroup names must be unique - variables login, compute and additional nodegroups have one or more keys in common" + condition = length(distinct(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups)))) == length(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups))) + error_message = <<-EOF + Nodegroup names must be unique. Shared key(s) found in variables login, compute and/or additional_nodegroups: ${ + join(", ", setunion( + setintersection(keys(var.login), keys(var.compute)), + setintersection(keys(var.compute), keys(var.additional_nodegroups)), + setintersection(keys(var.additional_nodegroups), keys(var.login)) + )) } + EOF + +} } variable "cluster_image_id" {