fix: Fixing addon test teardown messages (#1087)

daniel-butler-irl · web-flow · commit 76ce4d8fc3cf · 2025-12-04T14:21:50.000Z
* fix: wrap error returns with setFailureResult helper function to properly set failure

* fix: add manual debugging guidance when job ID unavailable and correct undeployment terminology

* docs: add test failure handling and resource cleanup troubleshooting guide

* add DoNotDestroyOnFailure helper for test resource preservation

* refactor: centralize DO_NOT_DESTROY_ON_FAILURE check and add teardown logging

* fix: remove terraform destroy call to preserve failed test resources
diff --git a/cloudinfo/projects.go b/cloudinfo/projects.go
@@ -1156,6 +1156,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
 					} else {
 						terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
 					}
+				} else {
+					// Job ID not available - provide manual debugging guidance
+					logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
+					logMessage.WriteString("\n\tTo view logs manually:")
+					logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
+					logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'destroy' job")
+					logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform destroy logs")
 				}
 			}
 			if member.LastUndeployed.Result != nil {
@@ -1168,18 +1175,18 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
 					logMessage.WriteString(fmt.Sprintf("\n\t(%s) Failed resource: %s", memberName, failedResource))
 				}
 			} else {
-				logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed resources returned", memberName))
+				logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no failed resources returned", memberName))
 			}
 
 			if member.LastUndeployed.Job.Summary != nil && member.LastUndeployed.Job.Summary.DestroyMessages != nil && member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages != nil {
 				for _, applyError := range member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages {
-					logMessage.WriteString(fmt.Sprintf("\n\t(%s) Deployment error:\n", memberName))
+					logMessage.WriteString(fmt.Sprintf("\n\t(%s) Undeployment error:\n", memberName))
 					for key, value := range applyError.GetProperties() {
 						logMessage.WriteString(fmt.Sprintf("\t\t%s: %v\n", key, value))
 					}
 				}
 			} else {
-				logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed plan messages returned", memberName))
+				logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no destroy messages returned", memberName))
 			}
 		}
 	} else if member.LastDeployed != nil {
@@ -1209,6 +1216,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
 				} else {
 					terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
 				}
+			} else {
+				// Job ID not available - provide manual debugging guidance
+				logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
+				logMessage.WriteString("\n\tTo view logs manually:")
+				logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
+				logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'apply' job")
+				logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform apply logs")
 			}
 		}
 		if member.LastDeployed.Result != nil {
@@ -1261,6 +1275,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
 				} else {
 					terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
 				}
+			} else {
+				// Job ID not available - provide manual debugging guidance
+				logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
+				logMessage.WriteString("\n\tTo view logs manually:")
+				logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
+				logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'plan' job")
+				logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform plan logs")
 			}
 		}
 
diff --git a/common/general.go b/common/general.go
@@ -403,3 +403,26 @@ func IsRunningInCI() bool {
 	branch, err := git.getCurrentBranch()
 	return err == nil && branch == "HEAD"
 }
+
+// DoNotDestroyOnFailure checks if the DO_NOT_DESTROY_ON_FAILURE environment variable
+// is set to a truthy value. This is used to preserve test resources when a test fails,
+// allowing for manual debugging in the IBM Cloud console.
+//
+// Accepted truthy values (case-insensitive, whitespace-trimmed):
+//   - "true", "1", "yes"
+//
+// Usage: Call this in teardown logic along with Testing.Failed() to determine
+// whether to skip resource cleanup:
+//
+//	if t.Failed() && common.DoNotDestroyOnFailure() {
+//	    // Skip cleanup - resources preserved for debugging
+//	}
+func DoNotDestroyOnFailure() bool {
+	envVal, exists := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
+	if !exists {
+		return false
+	}
+	// Trim whitespace and convert to lowercase for comparison
+	normalizedVal := strings.ToLower(strings.TrimSpace(envVal))
+	return normalizedVal == "true" || normalizedVal == "1" || normalizedVal == "yes"
+}
diff --git a/docs/projects/addons/troubleshooting.md b/docs/projects/addons/troubleshooting.md
@@ -400,6 +400,65 @@ options.PreDeployHook = func(options *testaddons.TestAddonOptions) error {
 options.DeployTimeoutMinutes = 480
 ```
 
+## Understanding Test Failure Handling
+
+When a test fails, the framework uses three different mechanisms to track and report the failure. Understanding these helps when debugging issues like resources being cleaned up when they shouldn't be.
+
+### The Three Failure Mechanisms
+
+| Mechanism | What it does | What it affects |
+|-----------|--------------|-----------------|
+| `options.Testing.Fail()` | Marks the Go test as failed | Controls `DO_NOT_DESTROY_ON_FAILURE` behavior and actual test pass/fail |
+| `options.Logger.MarkFailed()` | Marks the logger as failed | Controls whether buffered logs are flushed to output |
+| `setFailureResult()` | Sets internal result string | Controls the "TEST EXECUTION END: RESULT" message |
+
+### How They Work Together
+
+When an error occurs, the framework calls all three:
+
+```go
+options.Logger.MarkFailed()       // 1. Prepare to flush buffered logs
+options.Logger.FlushOnFailure()   // 2. Output the buffered logs
+options.Testing.Fail()            // 3. Mark Go test as failed
+return setFailureResult(err, "STAGE")  // 4. Set result for logging
+```
+
+### DO_NOT_DESTROY_ON_FAILURE Behavior
+
+The `DO_NOT_DESTROY_ON_FAILURE` environment variable prevents resource cleanup when tests fail, allowing you to debug in the IBM Cloud console.
+
+```bash
+export DO_NOT_DESTROY_ON_FAILURE=true
+```
+
+**How it works:** During teardown, the framework checks:
+
+```go
+if options.Testing.Failed() && DO_NOT_DESTROY_ON_FAILURE == "true" {
+    // Skip cleanup - resources are preserved for debugging
+}
+```
+
+**Important:** This only works if `options.Testing.Fail()` was called before teardown runs. If you see resources being deleted despite this setting, check that the error path properly calls `Testing.Fail()`.
+
+### Troubleshooting Resource Cleanup Issues
+
+If resources are deleted when `DO_NOT_DESTROY_ON_FAILURE=true`:
+
+1. **Verify the environment variable is set:**
+   ```bash
+   echo $DO_NOT_DESTROY_ON_FAILURE  # Should print: true
+   ```
+
+2. **Check the test result log:** Look for `TEST EXECUTION END`:
+   - `RESULT: PASSED` - The test didn't register as failed (missing `Testing.Fail()` call)
+   - `RESULT: FAILED_AT_<STAGE>` - Test failed correctly at that stage
+
+3. **Check for the skip message:** When working correctly, you should see:
+   ```
+   Terratest failed. Debug the Test and delete resources manually.
+   ```
+
 ## Debugging Techniques
 
 ### Enable Verbose Logging
diff --git a/testaddons/setup_teardown.go b/testaddons/setup_teardown.go
@@ -2,7 +2,6 @@ package testaddons
 
 import (
 	"fmt"
-	"os"
 	"regexp"
 	"strings"
 	"time"
@@ -468,10 +467,7 @@ func (options *TestAddonOptions) testTearDown() {
 	}
 
 	// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
-	envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
-
-	// Do not destroy if tests failed and "DO_NOT_DESTROY_ON_FAILURE" is true
-	if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
+	if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
 		if options.currentProject == nil || options.currentProject.ID == nil {
 			options.Logger.ShortError("Terratest failed. No project to delete.")
 		} else {
@@ -480,6 +476,9 @@ func (options *TestAddonOptions) testTearDown() {
 		return
 	}
 
+	options.Logger.ShortInfo("Destroying test resources")
+	options.Logger.ShortInfo(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
+
 	// Project cleanup logic: always clean up projects since we're not sharing them
 	if options.currentProject != nil && options.currentProject.ID != nil {
 		options.Logger.ShortInfo(fmt.Sprintf("Deleting the project %s with ID %s", options.ProjectName, *options.currentProject.ID))
diff --git a/testaddons/tests.go b/testaddons/tests.go
@@ -393,7 +393,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
 		options.Logger.MarkFailed()
 		options.Logger.FlushOnFailure()
 		options.Testing.Fail()
-		return fmt.Errorf("error getting the configuration: %w", err)
+		return setFailureResult(fmt.Errorf("error getting the configuration: %w", err), "GET_CONFIGURATION")
 	}
 	options.Logger.ShortInfo(fmt.Sprintf("All Configurations in Project ID: %s", options.currentProjectConfig.ProjectID))
 	options.Logger.ShortInfo("Configurations:")
@@ -578,7 +578,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
 					options.Logger.MarkFailed()
 					options.Logger.FlushOnFailure()
 					options.Testing.Fail()
-					return fmt.Errorf("error resolving references: %w", err)
+					return setFailureResult(fmt.Errorf("error resolving references: %w", err), "RESOLVE_REFERENCES")
 				}
 				options.Logger.ShortInfo("  Resolved References:")
 				for _, ref := range res_resp.References {
@@ -1209,7 +1209,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
 		options.Logger.MarkFailed()
 		options.Logger.FlushOnFailure()
 		options.Testing.Fail()
-		return fmt.Errorf("missing required inputs: %s", strings.Join(inputValidationIssues, "; "))
+		return setFailureResult(fmt.Errorf("missing required inputs: %s", strings.Join(inputValidationIssues, "; ")), "MISSING_INPUTS")
 	}
 
 	// Now evaluate waiting input issues after dependency validation has provided context
@@ -1516,7 +1516,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
 			options.Logger.MarkFailed()
 			options.Logger.FlushOnFailure()
 			options.Testing.Fail()
-			return fmt.Errorf("errors occurred during undeploy")
+			return setFailureResult(fmt.Errorf("errors occurred during undeploy"), "UNDEPLOY")
 		}
 		options.Logger.ShortInfo("Undeploy completed successfully")
 	} else {
diff --git a/testhelper/tests.go b/testhelper/tests.go
@@ -138,13 +138,12 @@ func (options *TestOptions) testTearDown() {
 	}
 
 	if !options.SkipTestTearDown {
-		// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
-		envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
-
-		// Do not destroy if tests failed and "DO_NOT_DESTROY_ON_FAILURE" is true
-		if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
+		// Check if destroy should be skipped due to test failure
+		if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
 			fmt.Println("Terratest failed. Debug the Test and delete resources manually.")
 		} else {
+			logger.Log(options.Testing, "Destroying test resources")
+			logger.Log(options.Testing, fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
 
 			for _, address := range options.ImplicitDestroy {
 				// TODO: is this the correct path to the state file? and/or does it need to be updated upstream to a relative path(temp dir)?
@@ -200,6 +199,8 @@ func (options *TestOptions) testTearDown() {
 					logger.Log(options.Testing, "END: PreDestroyHook")
 				}
 			}
+			logger.Log(options.Testing, "Destroying test resources")
+			logger.Log(options.Testing, fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
 			logger.Log(options.Testing, "START: Destroy")
 			destroyOutput, destroyError := terraform.DestroyE(options.Testing, options.TerraformOptions)
 			if !assert.NoError(options.Testing, destroyError) {
diff --git a/testprojects/tests.go b/testprojects/tests.go
@@ -3,7 +3,6 @@ package testprojects
 import (
 	"errors"
 	"fmt"
-	"os"
 	"path"
 	"regexp"
 	"runtime"
@@ -993,7 +992,6 @@ func (options *TestProjectsOptions) TestTearDown() {
 	}
 	if !options.SkipTestTearDown {
 		if options.executeResourceTearDown() {
-
 			// Trigger undeploy and wait for completion
 			options.Logger.ShortInfo("Triggering Undeploy and waiting for completion")
 			undeployErrors := options.TriggerUnDeployAndWait()
@@ -1101,17 +1099,19 @@ func (options *TestProjectsOptions) executeResourceTearDown() bool {
 
 	// if skipundeploy is true, short circuit we are done
 	if options.SkipUndeploy {
+		options.Logger.ShortInfo("SkipUndeploy is set")
 		execute = false
 	}
 
 	// dont teardown if there is nothing to teardown
 	if options.currentStackConfig == nil || options.currentStackConfig.ConfigID == "" {
+		options.Logger.ShortInfo("No resources to delete")
 		execute = false
 	}
 
-	envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
-
-	if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
+	if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
+		options.Logger.ShortInfo("DO_NOT_DESTROY_ON_FAILURE is set")
+		options.Logger.ShortInfo(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
 		execute = false
 	}
 
@@ -1124,6 +1124,10 @@ func (options *TestProjectsOptions) executeResourceTearDown() bool {
 		}
 	}
 
+	if execute {
+		options.Logger.ShortInfo("Executing resource teardown")
+
+	}
 	return execute
 }
 
diff --git a/testprojects/tests_test.go b/testprojects/tests_test.go
@@ -51,7 +51,7 @@ func TestCorrectResourceTeardownFlag(t *testing.T) {
 			SkipUndeploy:       false,
 			SkipProjectDelete:  false,
 			currentStackConfig: &cloudinfo.ConfigDetails{ConfigID: "1234"},
-			Logger:             &common.TestLogger{},
+			Logger:             common.NewTestLogger(t.Name()),
 		}
 		o.Testing.Fail()
 		assert.Equal(t, true, o.executeResourceTearDown())
diff --git a/testschematic/tests.go b/testschematic/tests.go
@@ -445,10 +445,12 @@ func testTearDown(svc *SchematicsTestService, options *TestSchematicOptions) {
 			svc.TerraformResourcesCreated = false
 
 			// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
-			envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
-			if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
+			if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
 				options.Testing.Log("[SCHEMATICS] Schematics APPLY failed. Debug the Test and delete resources manually.")
 			} else {
+				options.Testing.Log("Preforming Teardown")
+				options.Testing.Log(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
+
 				destroySuccess := false // will only flip to true if job completes
 				destroyResponse, destroyErr := svc.CreateDestroyJob()
 				if assert.NoErrorf(options.Testing, destroyErr, "error creating DESTROY - %s", svc.WorkspaceName) {

Original file line number	Diff line number	Diff line change
`@@ -1156,6 +1156,13 @@ func (infoSvc CloudInfoService) GetSchematicsJobLogsForMember(member project.P`
`1156`	`1156`	`} else {`
`1157`	`1157`	`terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))`
`1158`	`1158`	`}`
	`1159`	`+ } else {`
	`1160`	`+ // Job ID not available - provide manual debugging guidance`
	`1161`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))`
	`1162`	`+ logMessage.WriteString("\n\tTo view logs manually:")`
	`1163`	`+ logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")`
	`1164`	`+ logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'destroy' job")`
	`1165`	`+ logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform destroy logs")`
`1159`	`1166`	`}`
`1160`	`1167`	`}`
`1161`	`1168`	`if member.LastUndeployed.Result != nil {`
`@@ -1168,18 +1175,18 @@ func (infoSvc CloudInfoService) GetSchematicsJobLogsForMember(member project.P`
`1168`	`1175`	`logMessage.WriteString(fmt.Sprintf("\n\t(%s) Failed resource: %s", memberName, failedResource))`
`1169`	`1176`	`}`
`1170`	`1177`	`} else {`
`1171`		`- logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed resources returned", memberName))`
	`1178`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no failed resources returned", memberName))`
`1172`	`1179`	`}`
`1173`	`1180`
`1174`	`1181`	`if member.LastUndeployed.Job.Summary != nil && member.LastUndeployed.Job.Summary.DestroyMessages != nil && member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages != nil {`
`1175`	`1182`	`for _, applyError := range member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages {`
`1176`		`- logMessage.WriteString(fmt.Sprintf("\n\t(%s) Deployment error:\n", memberName))`
	`1183`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) Undeployment error:\n", memberName))`
`1177`	`1184`	`for key, value := range applyError.GetProperties() {`
`1178`	`1185`	`logMessage.WriteString(fmt.Sprintf("\t\t%s: %v\n", key, value))`
`1179`	`1186`	`}`
`1180`	`1187`	`}`
`1181`	`1188`	`} else {`
`1182`		`- logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed plan messages returned", memberName))`
	`1189`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no destroy messages returned", memberName))`
`1183`	`1190`	`}`
`1184`	`1191`	`}`
`1185`	`1192`	`} else if member.LastDeployed != nil {`
`@@ -1209,6 +1216,13 @@ func (infoSvc CloudInfoService) GetSchematicsJobLogsForMember(member project.P`
`1209`	`1216`	`} else {`
`1210`	`1217`	`terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))`
`1211`	`1218`	`}`
	`1219`	`+ } else {`
	`1220`	`+ // Job ID not available - provide manual debugging guidance`
	`1221`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))`
	`1222`	`+ logMessage.WriteString("\n\tTo view logs manually:")`
	`1223`	`+ logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")`
	`1224`	`+ logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'apply' job")`
	`1225`	`+ logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform apply logs")`
`1212`	`1226`	`}`
`1213`	`1227`	`}`
`1214`	`1228`	`if member.LastDeployed.Result != nil {`
`@@ -1261,6 +1275,13 @@ func (infoSvc CloudInfoService) GetSchematicsJobLogsForMember(member project.P`
`1261`	`1275`	`} else {`
`1262`	`1276`	`terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))`
`1263`	`1277`	`}`
	`1278`	`+ } else {`
	`1279`	`+ // Job ID not available - provide manual debugging guidance`
	`1280`	`+ logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))`
	`1281`	`+ logMessage.WriteString("\n\tTo view logs manually:")`
	`1282`	`+ logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")`
	`1283`	`+ logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'plan' job")`
	`1284`	`+ logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform plan logs")`
`1264`	`1285`	`}`
`1265`	`1286`	`}`
`1266`	`1287`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ func TestCorrectResourceTeardownFlag(t *testing.T) {`
`51`	`51`	`SkipUndeploy: false,`
`52`	`52`	`SkipProjectDelete: false,`
`53`	`53`	`currentStackConfig: &cloudinfo.ConfigDetails{ConfigID: "1234"},`
`54`		`- Logger: &common.TestLogger{},`
	`54`	`+ Logger: common.NewTestLogger(t.Name()),`
`55`	`55`	`}`
`56`	`56`	`o.Testing.Fail()`
`57`	`57`	`assert.Equal(t, true, o.executeResourceTearDown())`