Skip to content

Commit e340847

Browse files
authored
Refactor handler lifecycle management with timeouts and auto-recovery (#9)
Refactors handler process management to prevent test suite hangs and enable reliable execution: - Add `--handler-timeout` flag for max response wait per test case - Add `--timeout` flag for total execution time limit - Auto-respawn crashed/broken handlers to allow remaining tests to continue - Clean handler shutdown with force-kill fallback - Introduce re-exec pattern for unit testing handler behaviors
2 parents 4df1da5 + d6bf3b0 commit e340847

File tree

6 files changed

+487
-93
lines changed

6 files changed

+487
-93
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ mock-handler:
2020

2121
test:
2222
@echo "Running runner unit tests..."
23-
go test ./runner/...
23+
go test -v ./runner/...
2424
@echo "Running conformance tests with mock handler..."
2525
$(RUNNER_BIN) -handler $(MOCK_HANDLER_BIN)
2626

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,20 @@ make runner
5252

5353
# Run the test runner against your handler binary
5454
./build/runner --handler <path-to-your-handler>
55+
56+
# Configure timeouts (optional)
57+
./build/runner --handler <path-to-your-handler> \
58+
--handler-timeout 30s \ # Max wait per test case (default: 10s)
59+
--timeout 2m # Total execution limit (default: 30s)
5560
```
5661

62+
#### Timeout Flags
63+
64+
- **`--handler-timeout`** (default: 10s): Maximum time to wait for handler response to each test case. Prevents hangs on unresponsive handlers.
65+
- **`--timeout`** (default: 30s): Total execution time limit across all test suites. Ensures bounded test runs.
66+
67+
The runner automatically detects and recovers from crashed/unresponsive handlers, allowing remaining tests to continue.
68+
5769
### Testing the Runner
5870

5971
Build and test the runner:

cmd/runner/main.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
package main
22

33
import (
4+
"context"
45
"flag"
56
"fmt"
67
"io/fs"
78
"os"
89
"strings"
10+
"time"
911

1012
"github.com/stringintech/kernel-bindings-tests/runner"
1113
"github.com/stringintech/kernel-bindings-tests/testdata"
1214
)
1315

1416
func main() {
1517
handlerPath := flag.String("handler", "", "Path to handler binary")
18+
handlerTimeout := flag.Duration("handler-timeout", 10*time.Second, "Max time to wait for handler to respond to each test case (e.g., 10s, 500ms)")
19+
timeout := flag.Duration("timeout", 30*time.Second, "Total timeout for executing all test suites (e.g., 30s, 1m)")
1620
flag.Parse()
1721

1822
if *handlerPath == "" {
@@ -33,6 +37,18 @@ func main() {
3337
os.Exit(1)
3438
}
3539

40+
// Create test runner
41+
testRunner, err := runner.NewTestRunner(*handlerPath, *handlerTimeout, *timeout)
42+
if err != nil {
43+
fmt.Fprintf(os.Stderr, "Error creating test runner: %v\n", err)
44+
os.Exit(1)
45+
}
46+
defer testRunner.CloseHandler()
47+
48+
// Create context with total execution timeout
49+
ctx, cancel := context.WithTimeout(context.Background(), *timeout)
50+
defer cancel()
51+
3652
// Run tests
3753
totalPassed := 0
3854
totalFailed := 0
@@ -48,17 +64,8 @@ func main() {
4864
continue
4965
}
5066

51-
// Create test runner
52-
testRunner, err := runner.NewTestRunner(*handlerPath)
53-
if err != nil {
54-
fmt.Fprintf(os.Stderr, "Error creating test runner: %v\n", err)
55-
continue
56-
}
57-
5867
// Run suite
59-
result := testRunner.RunTestSuite(*suite)
60-
testRunner.Close()
61-
68+
result := testRunner.RunTestSuite(ctx, *suite)
6269
printResults(suite, result)
6370

6471
totalPassed += result.PassedTests

runner/handler.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package runner
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"errors"
7+
"fmt"
8+
"io"
9+
"log/slog"
10+
"os/exec"
11+
"time"
12+
)
13+
14+
var (
15+
// ErrHandlerTimeout indicates the handler did not respond within the timeout
16+
ErrHandlerTimeout = errors.New("handler timeout")
17+
// ErrHandlerClosed indicates the handler closed stdout unexpectedly
18+
ErrHandlerClosed = errors.New("handler closed unexpectedly")
19+
)
20+
21+
// HandlerConfig configures a handler process
22+
type HandlerConfig struct {
23+
Path string
24+
Args []string
25+
Env []string
26+
// Timeout specifies the maximum duration to wait when reading from the handler's
27+
// stdout. If zero, defaults to 10 seconds. The handler is killed if it fails to
28+
// write output within this timeout.
29+
Timeout time.Duration
30+
}
31+
32+
// Handler manages a conformance handler process communicating via stdin/stdout
33+
type Handler struct {
34+
cmd *exec.Cmd
35+
stdin io.WriteCloser
36+
stdout *bufio.Scanner
37+
stderr io.ReadCloser
38+
timeout time.Duration
39+
}
40+
41+
// NewHandler spawns a new handler process with the given configuration
42+
func NewHandler(cfg *HandlerConfig) (*Handler, error) {
43+
cmd := exec.Command(cfg.Path, cfg.Args...)
44+
if cfg.Env != nil {
45+
cmd.Env = append(cmd.Environ(), cfg.Env...)
46+
}
47+
48+
stdin, err := cmd.StdinPipe()
49+
if err != nil {
50+
return nil, fmt.Errorf("failed to create stdin pipe: %w", err)
51+
}
52+
53+
stdout, err := cmd.StdoutPipe()
54+
if err != nil {
55+
return nil, fmt.Errorf("failed to create stdout pipe: %w", err)
56+
}
57+
58+
stderr, err := cmd.StderrPipe()
59+
if err != nil {
60+
return nil, fmt.Errorf("failed to create stderr pipe: %w", err)
61+
}
62+
63+
// Start() automatically closes all pipes on failure, no manual cleanup needed
64+
if err := cmd.Start(); err != nil {
65+
return nil, fmt.Errorf("failed to start handler: %w", err)
66+
}
67+
68+
timeout := cfg.Timeout
69+
if timeout == 0 {
70+
timeout = 10 * time.Second
71+
}
72+
73+
return &Handler{
74+
cmd: cmd,
75+
stdin: stdin,
76+
stdout: bufio.NewScanner(stdout),
77+
stderr: stderr,
78+
timeout: timeout,
79+
}, nil
80+
}
81+
82+
// SendLine writes a line to the handler's stdin
83+
func (h *Handler) SendLine(line []byte) error {
84+
_, err := h.stdin.Write(append(line, '\n'))
85+
return err
86+
}
87+
88+
// ReadLine reads a line from the handler's stdout with a configurable timeout
89+
func (h *Handler) ReadLine() ([]byte, error) {
90+
// Use a timeout for Scan() in case the handler hangs
91+
scanDone := make(chan bool, 1)
92+
go func() {
93+
scanDone <- h.stdout.Scan()
94+
}()
95+
96+
var baseErr error
97+
select {
98+
case ok := <-scanDone:
99+
if ok {
100+
return h.stdout.Bytes(), nil
101+
}
102+
if err := h.stdout.Err(); err != nil {
103+
return nil, err
104+
}
105+
// EOF - handler closed stdout prematurely, fall through to kill and capture stderr
106+
baseErr = ErrHandlerClosed
107+
case <-time.After(h.timeout):
108+
// Timeout - handler didn't respond, fall through to kill and capture stderr
109+
baseErr = ErrHandlerTimeout
110+
}
111+
112+
// Kill the process immediately to force stderr to close.
113+
// Without this, there's a rare scenario where stdout closes but stderr remains open,
114+
// causing io.ReadAll(h.stderr) below to block indefinitely waiting for stderr EOF.
115+
if h.cmd.Process != nil {
116+
h.cmd.Process.Kill()
117+
}
118+
119+
// Capture stderr to provide diagnostic information when the handler fails.
120+
if stderrOut, err := io.ReadAll(h.stderr); err == nil && len(stderrOut) > 0 {
121+
return nil, fmt.Errorf("%w: %s", baseErr, bytes.TrimSpace(stderrOut))
122+
}
123+
return nil, baseErr
124+
}
125+
126+
// Close closes stdin and waits for the handler to exit with a 5-second timeout.
127+
// If the handler doesn't exit within the timeout, it is killed.
128+
func (h *Handler) Close() {
129+
if h.stdin != nil {
130+
// Close stdin to signal the handler that we're done sending requests.
131+
// Per the handler specification, the handler should exit cleanly when stdin closes.
132+
h.stdin.Close()
133+
}
134+
if h.cmd != nil {
135+
// Wait for the handler to exit cleanly in response to stdin closing.
136+
// Wait() automatically closes all remaining pipes after the process exits.
137+
// Use a timeout in case the handler doesn't respect the protocol.
138+
done := make(chan error, 1)
139+
go func() {
140+
done <- h.cmd.Wait()
141+
}()
142+
143+
select {
144+
case err := <-done:
145+
if err != nil {
146+
slog.Warn("Handler exit with error", "error", err)
147+
}
148+
case <-time.After(5 * time.Second):
149+
slog.Warn("Handler did not exit within a 5-second timeout, killing process")
150+
if h.cmd.Process != nil {
151+
h.cmd.Process.Kill()
152+
// Call Wait() again to let the process finish cleanup (closing pipes, etc.)
153+
// No timeout needed since Kill() should guarantee the process will exit
154+
h.cmd.Wait()
155+
}
156+
}
157+
}
158+
}

0 commit comments

Comments
 (0)