@@ -12,6 +12,7 @@ import (
1212 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1"
1313 "github.com/stretchr/testify/require"
1414 appsv1 "k8s.io/api/apps/v1"
15+ corev1 "k8s.io/api/core/v1"
1516 apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
1617 "k8s.io/apimachinery/pkg/api/errors"
1718 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -182,3 +183,218 @@ func GetSampleCR(t *testing.T) *v1alpha1.LlamaStackDistribution {
182183
183184 return distribution
184185}
186+
187+ // checkLlamaStackDistributionStatus helps identify if the custom resource reached the expected state during test execution.
188+ func checkLlamaStackDistributionStatus (t * testing.T , testenv * TestEnvironment , namespace , name string ) {
189+ t .Helper ()
190+
191+ llsDistro := & v1alpha1.LlamaStackDistribution {}
192+ err := testenv .Client .Get (testenv .Ctx , client.ObjectKey {Namespace : namespace , Name : name }, llsDistro )
193+ if err != nil {
194+ t .Logf ("⚠️ Error getting LlamaStackDistribution: %v" , err )
195+ return
196+ }
197+
198+ t .Logf ("LlamaStackDistribution status:" )
199+ t .Logf (" Phase: %s" , llsDistro .Status .Phase )
200+ t .Logf (" Generation: %d" , llsDistro .Generation )
201+ t .Logf (" ResourceVersion: %s" , llsDistro .ResourceVersion )
202+ t .Logf (" Conditions: %+v" , llsDistro .Status .Conditions )
203+ }
204+
205+ // checkNamespaceEvents reveals what Kubernetes operations occurred and why they may have failed.
206+ func checkNamespaceEvents (t * testing.T , testenv * TestEnvironment , namespace string ) {
207+ t .Helper ()
208+
209+ eventList := & corev1.EventList {}
210+ err := testenv .Client .List (testenv .Ctx , eventList , client .InNamespace (namespace ))
211+ if err != nil {
212+ t .Logf ("⚠️ Error getting events: %v" , err )
213+ return
214+ }
215+
216+ if len (eventList .Items ) == 0 {
217+ t .Log ("📝 No events found in namespace" )
218+ return
219+ }
220+
221+ maxEvents := 25
222+ if len (eventList .Items ) > maxEvents {
223+ t .Logf ("📝 Showing first %d events (of %d total):" , maxEvents , len (eventList .Items ))
224+ eventList .Items = eventList .Items [:maxEvents ]
225+ } else {
226+ t .Logf ("📝 Found %d events in namespace %s:" , len (eventList .Items ), namespace )
227+ }
228+
229+ for _ , event := range eventList .Items {
230+ t .Logf (" %s: %s (%s) - %s" ,
231+ event .LastTimestamp .Format ("15:04:05" ),
232+ event .Reason ,
233+ event .Type ,
234+ event .Message )
235+ }
236+ }
237+
238+ // requireNoErrorWithDebugging provides comprehensive debugging context when tests fail to help identify root causes quickly.
239+ func requireNoErrorWithDebugging (t * testing.T , testenv * TestEnvironment , err error , msg string , namespace , crName string ) {
240+ t .Helper ()
241+ if err != nil {
242+ t .Logf ("💥 ERROR OCCURRED: %s - %v" , msg , err )
243+
244+ // Check custom resource status first to see if the operator processed the request correctly
245+ checkLlamaStackDistributionStatus (t , testenv , namespace , crName )
246+
247+ // Check events to understand what Kubernetes operations were attempted and why they failed
248+ checkNamespaceEvents (t , testenv , namespace )
249+
250+ // Check pod details to identify container startup issues or crash loops
251+ logPodDetails (t , testenv , namespace )
252+
253+ // Check service endpoints to see if pods are being discovered by services
254+ logServiceEndpoints (t , testenv , namespace , crName + "-service" )
255+
256+ // Check service configuration to identify selector mismatches
257+ logServiceSpec (t , testenv , namespace , crName + "-service" )
258+
259+ // Check deployment spec to identify configuration problems preventing pod startup
260+ logDeploymentSpec (t , testenv , namespace , crName )
261+
262+ require .NoError (t , err , msg )
263+ }
264+ }
265+
266+ // logPodDetails helps diagnose pod startup issues and container restart problems during test failures.
267+ func logPodDetails (t * testing.T , testenv * TestEnvironment , namespace string ) {
268+ t .Helper ()
269+
270+ podList := & corev1.PodList {}
271+ err := testenv .Client .List (testenv .Ctx , podList , client .InNamespace (namespace ))
272+ if err != nil {
273+ t .Logf ("Failed to list pods: %v" , err )
274+ return
275+ }
276+
277+ t .Logf ("📦 Found %d pods in namespace %s:" , len (podList .Items ), namespace )
278+ for _ , pod := range podList .Items {
279+ t .Logf ("Pod: %s, Phase: %s" , pod .Name , pod .Status .Phase )
280+
281+ for _ , cs := range pod .Status .ContainerStatuses {
282+ // RestartCount indicates crash loops or configuration issues
283+ t .Logf (" Container %s: Ready=%v, RestartCount=%d" ,
284+ cs .Name , cs .Ready , cs .RestartCount )
285+
286+ // Container states reveal why pods aren't starting or are crashing
287+ if cs .State .Waiting != nil {
288+ t .Logf (" Waiting: %s - %s" ,
289+ cs .State .Waiting .Reason , cs .State .Waiting .Message )
290+ }
291+ if cs .State .Terminated != nil {
292+ t .Logf (" Terminated: %s - %s" ,
293+ cs .State .Terminated .Reason , cs .State .Terminated .Message )
294+ }
295+ }
296+
297+ // Pod logs would show startup errors but require different client access
298+ t .Logf (" (Pod logs require direct kubectl access)" )
299+ }
300+ }
301+
302+ // logServiceEndpoints logs service endpoint details to see if pods are ready.
303+ func logServiceEndpoints (t * testing.T , testenv * TestEnvironment , namespace , serviceName string ) {
304+ t .Helper ()
305+
306+ endpoints := & corev1.Endpoints {}
307+ err := testenv .Client .Get (testenv .Ctx , types.NamespacedName {
308+ Name : serviceName ,
309+ Namespace : namespace ,
310+ }, endpoints )
311+
312+ if err != nil {
313+ t .Logf ("Failed to get endpoints for service %s: %v" , serviceName , err )
314+ return
315+ }
316+
317+ t .Logf ("🔗 Service %s endpoints:" , serviceName )
318+ for i , subset := range endpoints .Subsets {
319+ t .Logf (" Subset %d:" , i )
320+ // Ready addresses indicate pods that passed health checks and can receive traffic
321+ t .Logf (" Ready addresses: %d" , len (subset .Addresses ))
322+ for _ , addr := range subset .Addresses {
323+ t .Logf (" - %s" , addr .IP )
324+ }
325+ // Not ready addresses show pods that exist but failed health checks
326+ t .Logf (" Not ready addresses: %d" , len (subset .NotReadyAddresses ))
327+ for _ , addr := range subset .NotReadyAddresses {
328+ t .Logf (" - %s" , addr .IP )
329+ }
330+ t .Logf (" Ports:" )
331+ for _ , port := range subset .Ports {
332+ t .Logf (" - %s: %d" , port .Name , port .Port )
333+ }
334+ }
335+ }
336+
337+ // logDeploymentSpec helps identify configuration mismatches that prevent pods from starting correctly.
338+ func logDeploymentSpec (t * testing.T , testenv * TestEnvironment , namespace , name string ) {
339+ t .Helper ()
340+
341+ deployment := & appsv1.Deployment {}
342+ err := testenv .Client .Get (testenv .Ctx , types.NamespacedName {
343+ Name : name ,
344+ Namespace : namespace ,
345+ }, deployment )
346+
347+ if err != nil {
348+ t .Logf ("Failed to get deployment: %v" , err )
349+ return
350+ }
351+
352+ t .Logf ("🚀 Deployment %s spec:" , name )
353+ t .Logf (" Replicas: %d" , * deployment .Spec .Replicas )
354+ // Selector must match pod labels or pods won't be managed by deployment
355+ t .Logf (" Selector: %+v" , deployment .Spec .Selector .MatchLabels )
356+ t .Logf (" Template labels: %+v" , deployment .Spec .Template .Labels )
357+
358+ for _ , container := range deployment .Spec .Template .Spec .Containers {
359+ t .Logf (" Container: %s" , container .Name )
360+ t .Logf (" Image: %s" , container .Image )
361+ t .Logf (" Ports:" )
362+ for _ , port := range container .Ports {
363+ t .Logf (" - %d" , port .ContainerPort )
364+ }
365+ // Environment variables can cause startup failures if misconfigured
366+ t .Logf (" Env vars:" )
367+ for _ , env := range container .Env {
368+ t .Logf (" %s=%s" , env .Name , env .Value )
369+ }
370+ // Readiness probe configuration affects when pods become service endpoints
371+ if container .ReadinessProbe != nil {
372+ t .Logf (" Readiness probe: %+v" , container .ReadinessProbe )
373+ }
374+ }
375+ }
376+
377+ // logServiceSpec logs the actual service configuration to debug selector issues.
378+ func logServiceSpec (t * testing.T , testenv * TestEnvironment , namespace , serviceName string ) {
379+ t .Helper ()
380+
381+ service := & corev1.Service {}
382+ err := testenv .Client .Get (testenv .Ctx , types.NamespacedName {
383+ Name : serviceName ,
384+ Namespace : namespace ,
385+ }, service )
386+
387+ if err != nil {
388+ t .Logf ("Failed to get service %s: %v" , serviceName , err )
389+ return
390+ }
391+
392+ t .Logf ("🔧 Service %s spec:" , serviceName )
393+ t .Logf (" Type: %s" , service .Spec .Type )
394+ // Selector must match pod labels or service won't route traffic to pods
395+ t .Logf (" Selector: %+v" , service .Spec .Selector )
396+ t .Logf (" Ports:" )
397+ for _ , port := range service .Spec .Ports {
398+ t .Logf (" - %s: %d -> %s" , port .Name , port .Port , port .TargetPort .String ())
399+ }
400+ }
0 commit comments