Skip to content

Commit 76ce4d8

Browse files
fix: Fixing addon test teardown messages (#1087)
* fix: wrap error returns with setFailureResult helper function to properly set failure * fix: add manual debugging guidance when job ID unavailable and correct undeployment terminology * docs: add test failure handling and resource cleanup troubleshooting guide * add DoNotDestroyOnFailure helper for test resource preservation * refactor: centralize DO_NOT_DESTROY_ON_FAILURE check and add teardown logging * fix: remove terraform destroy call to preserve failed test resources
1 parent 89a9519 commit 76ce4d8

File tree

9 files changed

+134
-25
lines changed

9 files changed

+134
-25
lines changed

cloudinfo/projects.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
11561156
} else {
11571157
terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
11581158
}
1159+
} else {
1160+
// Job ID not available - provide manual debugging guidance
1161+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
1162+
logMessage.WriteString("\n\tTo view logs manually:")
1163+
logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
1164+
logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'destroy' job")
1165+
logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform destroy logs")
11591166
}
11601167
}
11611168
if member.LastUndeployed.Result != nil {
@@ -1168,18 +1175,18 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
11681175
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Failed resource: %s", memberName, failedResource))
11691176
}
11701177
} else {
1171-
logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed resources returned", memberName))
1178+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no failed resources returned", memberName))
11721179
}
11731180

11741181
if member.LastUndeployed.Job.Summary != nil && member.LastUndeployed.Job.Summary.DestroyMessages != nil && member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages != nil {
11751182
for _, applyError := range member.LastUndeployed.Job.Summary.DestroyMessages.ErrorMessages {
1176-
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Deployment error:\n", memberName))
1183+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Undeployment error:\n", memberName))
11771184
for key, value := range applyError.GetProperties() {
11781185
logMessage.WriteString(fmt.Sprintf("\t\t%s: %v\n", key, value))
11791186
}
11801187
}
11811188
} else {
1182-
logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Deployment, no failed plan messages returned", memberName))
1189+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) failed Undeployment, no destroy messages returned", memberName))
11831190
}
11841191
}
11851192
} else if member.LastDeployed != nil {
@@ -1209,6 +1216,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
12091216
} else {
12101217
terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
12111218
}
1219+
} else {
1220+
// Job ID not available - provide manual debugging guidance
1221+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
1222+
logMessage.WriteString("\n\tTo view logs manually:")
1223+
logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
1224+
logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'apply' job")
1225+
logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform apply logs")
12121226
}
12131227
}
12141228
if member.LastDeployed.Result != nil {
@@ -1261,6 +1275,13 @@ func (infoSvc *CloudInfoService) GetSchematicsJobLogsForMember(member *project.P
12611275
} else {
12621276
terraformLogMessage.WriteString(fmt.Sprintf("\nJob logs for Job ID: %s member: %s\n%s", jobID, memberName, logs))
12631277
}
1278+
} else {
1279+
// Job ID not available - provide manual debugging guidance
1280+
logMessage.WriteString(fmt.Sprintf("\n\t(%s) Unable to retrieve Terraform logs automatically (Job ID not available from Projects API)", memberName))
1281+
logMessage.WriteString("\n\tTo view logs manually:")
1282+
logMessage.WriteString("\n\t\t1. Go to the Schematics workspace URL above")
1283+
logMessage.WriteString("\n\t\t2. Click on 'Jobs' tab and find the most recent failed 'plan' job")
1284+
logMessage.WriteString("\n\t\t3. Click on the job to view the Terraform plan logs")
12641285
}
12651286
}
12661287

common/general.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,3 +403,26 @@ func IsRunningInCI() bool {
403403
branch, err := git.getCurrentBranch()
404404
return err == nil && branch == "HEAD"
405405
}
406+
407+
// DoNotDestroyOnFailure checks if the DO_NOT_DESTROY_ON_FAILURE environment variable
408+
// is set to a truthy value. This is used to preserve test resources when a test fails,
409+
// allowing for manual debugging in the IBM Cloud console.
410+
//
411+
// Accepted truthy values (case-insensitive, whitespace-trimmed):
412+
// - "true", "1", "yes"
413+
//
414+
// Usage: Call this in teardown logic along with Testing.Failed() to determine
415+
// whether to skip resource cleanup:
416+
//
417+
// if t.Failed() && common.DoNotDestroyOnFailure() {
418+
// // Skip cleanup - resources preserved for debugging
419+
// }
420+
func DoNotDestroyOnFailure() bool {
421+
envVal, exists := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
422+
if !exists {
423+
return false
424+
}
425+
// Trim whitespace and convert to lowercase for comparison
426+
normalizedVal := strings.ToLower(strings.TrimSpace(envVal))
427+
return normalizedVal == "true" || normalizedVal == "1" || normalizedVal == "yes"
428+
}

docs/projects/addons/troubleshooting.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,65 @@ options.PreDeployHook = func(options *testaddons.TestAddonOptions) error {
400400
options.DeployTimeoutMinutes = 480
401401
```
402402

403+
## Understanding Test Failure Handling
404+
405+
When a test fails, the framework uses three different mechanisms to track and report the failure. Understanding these helps when debugging issues like resources being cleaned up when they shouldn't be.
406+
407+
### The Three Failure Mechanisms
408+
409+
| Mechanism | What it does | What it affects |
410+
|-----------|--------------|-----------------|
411+
| `options.Testing.Fail()` | Marks the Go test as failed | Controls `DO_NOT_DESTROY_ON_FAILURE` behavior and actual test pass/fail |
412+
| `options.Logger.MarkFailed()` | Marks the logger as failed | Controls whether buffered logs are flushed to output |
413+
| `setFailureResult()` | Sets internal result string | Controls the "TEST EXECUTION END: RESULT" message |
414+
415+
### How They Work Together
416+
417+
When an error occurs, the framework calls all three:
418+
419+
```go
420+
options.Logger.MarkFailed() // 1. Prepare to flush buffered logs
421+
options.Logger.FlushOnFailure() // 2. Output the buffered logs
422+
options.Testing.Fail() // 3. Mark Go test as failed
423+
return setFailureResult(err, "STAGE") // 4. Set result for logging
424+
```
425+
426+
### DO_NOT_DESTROY_ON_FAILURE Behavior
427+
428+
The `DO_NOT_DESTROY_ON_FAILURE` environment variable prevents resource cleanup when tests fail, allowing you to debug in the IBM Cloud console.
429+
430+
```bash
431+
export DO_NOT_DESTROY_ON_FAILURE=true
432+
```
433+
434+
**How it works:** During teardown, the framework checks:
435+
436+
```go
437+
if options.Testing.Failed() && DO_NOT_DESTROY_ON_FAILURE == "true" {
438+
// Skip cleanup - resources are preserved for debugging
439+
}
440+
```
441+
442+
**Important:** This only works if `options.Testing.Fail()` was called before teardown runs. If you see resources being deleted despite this setting, check that the error path properly calls `Testing.Fail()`.
443+
444+
### Troubleshooting Resource Cleanup Issues
445+
446+
If resources are deleted when `DO_NOT_DESTROY_ON_FAILURE=true`:
447+
448+
1. **Verify the environment variable is set:**
449+
```bash
450+
echo $DO_NOT_DESTROY_ON_FAILURE # Should print: true
451+
```
452+
453+
2. **Check the test result log:** Look for `TEST EXECUTION END`:
454+
- `RESULT: PASSED` - The test didn't register as failed (missing `Testing.Fail()` call)
455+
- `RESULT: FAILED_AT_<STAGE>` - Test failed correctly at that stage
456+
457+
3. **Check for the skip message:** When working correctly, you should see:
458+
```
459+
Terratest failed. Debug the Test and delete resources manually.
460+
```
461+
403462
## Debugging Techniques
404463

405464
### Enable Verbose Logging

testaddons/setup_teardown.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package testaddons
22

33
import (
44
"fmt"
5-
"os"
65
"regexp"
76
"strings"
87
"time"
@@ -468,10 +467,7 @@ func (options *TestAddonOptions) testTearDown() {
468467
}
469468

470469
// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
471-
envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
472-
473-
// Do not destroy if tests failed and "DO_NOT_DESTROY_ON_FAILURE" is true
474-
if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
470+
if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
475471
if options.currentProject == nil || options.currentProject.ID == nil {
476472
options.Logger.ShortError("Terratest failed. No project to delete.")
477473
} else {
@@ -480,6 +476,9 @@ func (options *TestAddonOptions) testTearDown() {
480476
return
481477
}
482478

479+
options.Logger.ShortInfo("Destroying test resources")
480+
options.Logger.ShortInfo(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
481+
483482
// Project cleanup logic: always clean up projects since we're not sharing them
484483
if options.currentProject != nil && options.currentProject.ID != nil {
485484
options.Logger.ShortInfo(fmt.Sprintf("Deleting the project %s with ID %s", options.ProjectName, *options.currentProject.ID))

testaddons/tests.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
393393
options.Logger.MarkFailed()
394394
options.Logger.FlushOnFailure()
395395
options.Testing.Fail()
396-
return fmt.Errorf("error getting the configuration: %w", err)
396+
return setFailureResult(fmt.Errorf("error getting the configuration: %w", err), "GET_CONFIGURATION")
397397
}
398398
options.Logger.ShortInfo(fmt.Sprintf("All Configurations in Project ID: %s", options.currentProjectConfig.ProjectID))
399399
options.Logger.ShortInfo("Configurations:")
@@ -578,7 +578,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
578578
options.Logger.MarkFailed()
579579
options.Logger.FlushOnFailure()
580580
options.Testing.Fail()
581-
return fmt.Errorf("error resolving references: %w", err)
581+
return setFailureResult(fmt.Errorf("error resolving references: %w", err), "RESOLVE_REFERENCES")
582582
}
583583
options.Logger.ShortInfo(" Resolved References:")
584584
for _, ref := range res_resp.References {
@@ -1209,7 +1209,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
12091209
options.Logger.MarkFailed()
12101210
options.Logger.FlushOnFailure()
12111211
options.Testing.Fail()
1212-
return fmt.Errorf("missing required inputs: %s", strings.Join(inputValidationIssues, "; "))
1212+
return setFailureResult(fmt.Errorf("missing required inputs: %s", strings.Join(inputValidationIssues, "; ")), "MISSING_INPUTS")
12131213
}
12141214

12151215
// Now evaluate waiting input issues after dependency validation has provided context
@@ -1516,7 +1516,7 @@ func (options *TestAddonOptions) runAddonTest(enhancedReporting bool) error {
15161516
options.Logger.MarkFailed()
15171517
options.Logger.FlushOnFailure()
15181518
options.Testing.Fail()
1519-
return fmt.Errorf("errors occurred during undeploy")
1519+
return setFailureResult(fmt.Errorf("errors occurred during undeploy"), "UNDEPLOY")
15201520
}
15211521
options.Logger.ShortInfo("Undeploy completed successfully")
15221522
} else {

testhelper/tests.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,12 @@ func (options *TestOptions) testTearDown() {
138138
}
139139

140140
if !options.SkipTestTearDown {
141-
// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
142-
envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
143-
144-
// Do not destroy if tests failed and "DO_NOT_DESTROY_ON_FAILURE" is true
145-
if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
141+
// Check if destroy should be skipped due to test failure
142+
if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
146143
fmt.Println("Terratest failed. Debug the Test and delete resources manually.")
147144
} else {
145+
logger.Log(options.Testing, "Destroying test resources")
146+
logger.Log(options.Testing, fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
148147

149148
for _, address := range options.ImplicitDestroy {
150149
// TODO: is this the correct path to the state file? and/or does it need to be updated upstream to a relative path(temp dir)?
@@ -200,6 +199,8 @@ func (options *TestOptions) testTearDown() {
200199
logger.Log(options.Testing, "END: PreDestroyHook")
201200
}
202201
}
202+
logger.Log(options.Testing, "Destroying test resources")
203+
logger.Log(options.Testing, fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
203204
logger.Log(options.Testing, "START: Destroy")
204205
destroyOutput, destroyError := terraform.DestroyE(options.Testing, options.TerraformOptions)
205206
if !assert.NoError(options.Testing, destroyError) {

testprojects/tests.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package testprojects
33
import (
44
"errors"
55
"fmt"
6-
"os"
76
"path"
87
"regexp"
98
"runtime"
@@ -993,7 +992,6 @@ func (options *TestProjectsOptions) TestTearDown() {
993992
}
994993
if !options.SkipTestTearDown {
995994
if options.executeResourceTearDown() {
996-
997995
// Trigger undeploy and wait for completion
998996
options.Logger.ShortInfo("Triggering Undeploy and waiting for completion")
999997
undeployErrors := options.TriggerUnDeployAndWait()
@@ -1101,17 +1099,19 @@ func (options *TestProjectsOptions) executeResourceTearDown() bool {
11011099

11021100
// if skipundeploy is true, short circuit we are done
11031101
if options.SkipUndeploy {
1102+
options.Logger.ShortInfo("SkipUndeploy is set")
11041103
execute = false
11051104
}
11061105

11071106
// dont teardown if there is nothing to teardown
11081107
if options.currentStackConfig == nil || options.currentStackConfig.ConfigID == "" {
1108+
options.Logger.ShortInfo("No resources to delete")
11091109
execute = false
11101110
}
11111111

1112-
envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
1113-
1114-
if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
1112+
if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
1113+
options.Logger.ShortInfo("DO_NOT_DESTROY_ON_FAILURE is set")
1114+
options.Logger.ShortInfo(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
11151115
execute = false
11161116
}
11171117

@@ -1124,6 +1124,10 @@ func (options *TestProjectsOptions) executeResourceTearDown() bool {
11241124
}
11251125
}
11261126

1127+
if execute {
1128+
options.Logger.ShortInfo("Executing resource teardown")
1129+
1130+
}
11271131
return execute
11281132
}
11291133

testprojects/tests_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestCorrectResourceTeardownFlag(t *testing.T) {
5151
SkipUndeploy: false,
5252
SkipProjectDelete: false,
5353
currentStackConfig: &cloudinfo.ConfigDetails{ConfigID: "1234"},
54-
Logger: &common.TestLogger{},
54+
Logger: common.NewTestLogger(t.Name()),
5555
}
5656
o.Testing.Fail()
5757
assert.Equal(t, true, o.executeResourceTearDown())

testschematic/tests.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,10 +445,12 @@ func testTearDown(svc *SchematicsTestService, options *TestSchematicOptions) {
445445
svc.TerraformResourcesCreated = false
446446

447447
// Check if "DO_NOT_DESTROY_ON_FAILURE" is set
448-
envVal, _ := os.LookupEnv("DO_NOT_DESTROY_ON_FAILURE")
449-
if options.Testing.Failed() && strings.ToLower(envVal) == "true" {
448+
if options.Testing.Failed() && common.DoNotDestroyOnFailure() {
450449
options.Testing.Log("[SCHEMATICS] Schematics APPLY failed. Debug the Test and delete resources manually.")
451450
} else {
451+
options.Testing.Log("Preforming Teardown")
452+
options.Testing.Log(fmt.Sprintf("Test Passed: %t", !options.Testing.Failed()))
453+
452454
destroySuccess := false // will only flip to true if job completes
453455
destroyResponse, destroyErr := svc.CreateDestroyJob()
454456
if assert.NoErrorf(options.Testing, destroyErr, "error creating DESTROY - %s", svc.WorkspaceName) {

0 commit comments

Comments
 (0)