@@ -478,6 +478,152 @@ macro_rules! killswitch_tests {
478478 } )
479479 }
480480
481+ // If we terminate in the signal handler, but before termination has been disabled, a
482+ // signal will be sent to the guest. Lucet must correctly handle this case, lest the sigalrm be
483+ // delivered to disastrous effect to the host.
484+ //
485+ // This corresponds to a race during the documentation's State B -> State E "guest faults
486+ // or is terminated" transition.
487+ #[ test]
488+ fn terminate_during_guest_fault( ) {
489+ test_c_with_instrumented_guest_entry( "timeout" , "fault.c" , |mut inst| {
490+ let kill_switch = inst. kill_switch( ) ;
491+
492+ // *Before* termination is critical, since afterward the `KillSwitch` we test with will
493+ // just take no action.
494+ let unfortunate_time_to_terminate = inst
495+ . lock_testpoints
496+ . signal_handler_before_disabling_termination
497+ . wait_at( ) ;
498+ // Wait for the guest to reach a point we reaaallly don't want to signal at - somewhere in
499+ // the signal handler.
500+ let exiting_signal_handler = inst
501+ . lock_testpoints
502+ . signal_handler_before_returning
503+ . wait_at( ) ;
504+ // Finally, we need to know when we're ready to signal to ensure it races with.
505+ let killswitch_send_signal =
506+ inst. lock_testpoints. kill_switch_after_guest_alarm. wait_at( ) ;
507+
508+ let guest = thread:: Builder :: new( )
509+ . name( "guest" . to_owned( ) )
510+ . spawn( move || {
511+ match inst. run( "main" , & [ 0u32 . into( ) , 0u32 . into( ) ] ) {
512+ Err ( Error :: RuntimeFault ( details) ) => {
513+ assert_eq!( details. trapcode, Some ( TrapCode :: HeapOutOfBounds ) ) ;
514+ }
515+ res => panic!( "unexpected result: {:?}" , res) ,
516+ }
517+
518+ // Check that we can reset the instance and run a normal function.
519+ inst. reset( ) . expect( "instance resets" ) ;
520+ run_onetwothree( & mut inst) ;
521+ } )
522+ . expect( "can spawn guest thread" ) ;
523+
524+ let termination_thread = unfortunate_time_to_terminate. wait_and_then( || {
525+ let thread = thread:: Builder :: new( )
526+ . name( "killswitch" . to_owned( ) )
527+ . spawn( move || {
528+ assert_eq!( kill_switch. terminate( ) , Ok ( KillSuccess :: Signalled ) ) ;
529+ } )
530+ . expect( "can spawn killswitch thread" ) ;
531+ killswitch_send_signal. wait( ) ;
532+ thread
533+ } ) ;
534+
535+ // Get ready to signal...
536+ // and be sure that we haven't exited the signal handler until afterward
537+ exiting_signal_handler. wait( ) ;
538+
539+ guest. join( ) . expect( "guest exits without panic" ) ;
540+ termination_thread
541+ . join( )
542+ . expect( "termination completes without panic" ) ;
543+ } )
544+ }
545+
546+ // Variant of the above where for scheduler reasons `terminable` and
547+ // `execution_domain.lock()` happen on different sides of an instance descheduling.
548+ //
549+ // This corresponds to a race during the documentation's State B -> State E "guest faults
550+ // or is terminated" transition.
551+ //
552+ // Specifically, we want:
553+ // * signal handler fires, handling a guest fault
554+ // * timeout fires, acquiring terminable
555+ // * signal handler completes, locking in deschedule to serialize pending KillSwitch
556+ // * KillSwitch is rescheduled, then fires
557+ //
558+ // And for all of this to complete without error!
559+ #[ test]
560+ fn terminate_during_guest_fault_racing_deschedule( ) {
561+ test_c_with_instrumented_guest_entry( "timeout" , "fault.c" , |mut inst| {
562+ let kill_switch = inst. kill_switch( ) ;
563+
564+ // *before* termination is critical, since afterward the `KillSwitch` we test with will
565+ // just take no action.
566+ let unfortunate_time_to_terminate = inst
567+ . lock_testpoints
568+ . signal_handler_before_disabling_termination
569+ . wait_at( ) ;
570+ // we need to let the instance deschedule before our KillSwitch takes
571+ // `execution_domain`.
572+ let killswitch_acquire_termination = inst
573+ . lock_testpoints
574+ . kill_switch_after_acquiring_termination
575+ . wait_at( ) ;
576+ // and the entire test revolves around KillSwitch taking effect after
577+ // `CURRENT_INSTANCE` is cleared!
578+ let current_instance_cleared = inst
579+ . lock_testpoints
580+ . instance_after_clearing_current_instance
581+ . wait_at( ) ;
582+
583+ let guest = thread:: Builder :: new( )
584+ . name( "guest" . to_owned( ) )
585+ . spawn( move || {
586+ match inst. run( "main" , & [ 0u32 . into( ) , 0u32 . into( ) ] ) {
587+ Err ( Error :: RuntimeFault ( details) ) => {
588+ assert_eq!( details. trapcode, Some ( TrapCode :: HeapOutOfBounds ) ) ;
589+ }
590+ res => panic!( "unexpected result: {:?}" , res) ,
591+ }
592+
593+ // Check that we can reset the instance and run a normal function.
594+ inst. reset( ) . expect( "instance resets" ) ;
595+ run_onetwothree( & mut inst) ;
596+ } )
597+ . expect( "can spawn guest thread" ) ;
598+
599+ let ( termination_thread, killswitch_before_domain) = unfortunate_time_to_terminate
600+ . wait_and_then( || {
601+ let ks_thread = thread:: Builder :: new( )
602+ . name( "killswitch" . to_owned( ) )
603+ . spawn( move || {
604+ assert_eq!( kill_switch. terminate( ) , Err ( KillError :: NotTerminable ) ) ;
605+ } )
606+ . expect( "can spawn killswitch thread" ) ;
607+
608+ // Pause the KillSwitch thread right before it acquires `execution_domain`
609+ let killswitch_before_domain = killswitch_acquire_termination. pause( ) ;
610+
611+ ( ks_thread, killswitch_before_domain)
612+ } ) ;
613+
614+ // `execution_domain` is not held, so instance descheduling will complete promptly.
615+ current_instance_cleared. wait( ) ;
616+
617+ // Resume `KillSwitch`, which will acquire `execution_domain` and terminate.
618+ killswitch_before_domain. resume( ) ;
619+
620+ guest. join( ) . expect( "guest exits without panic" ) ;
621+ termination_thread
622+ . join( )
623+ . expect( "termination completes without panic" ) ;
624+ } )
625+ }
626+
481627 // This doesn't doesn't correspond to any state change in the documentation because it should have
482628 // no effect. The guest is in State E before, and should remain in State E after.
483629 #[ test]
@@ -744,12 +890,12 @@ macro_rules! killswitch_tests {
744890
745891 ks1. join( ) . expect( "killswitch_1 did not panic" ) ;
746892
747- // At this point the first `KillSwitch` has completed terminating the instance. Now try
748- // again and make sure there's no boom.
749- assert_eq!( second_kill_switch. terminate( ) , Err ( KillError :: Invalid ) ) ;
750-
751893 // Allow the instance to reset and run a new function after termination.
752- guest_exit_testpoint. wait( ) ;
894+ guest_exit_testpoint. wait_and_then( || {
895+ // At this point the first `KillSwitch` has completed terminating the instance. Now try
896+ // again and make sure there's no boom.
897+ assert_eq!( second_kill_switch. terminate( ) , Err ( KillError :: Invalid ) ) ;
898+ } ) ;
753899
754900 // And after the instance successfully runs a test function, it exits without error.
755901 guest. join( ) . expect( "guest stops running" ) ;
0 commit comments