@@ -37,12 +37,17 @@ module Copilot.Theorem.What4
3737 prove
3838 , Solver (.. )
3939 , SatResult (.. )
40+ , proveWithCounterExample
41+ , SatResultCex (.. )
42+ , CounterExample (.. )
4043 -- * Bisimulation proofs about @copilot-c99@ code
4144 , computeBisimulationProofBundle
4245 , BisimulationProofBundle (.. )
4346 , BisimulationProofState (.. )
4447 -- * What4 representations of Copilot expressions
4548 , XExpr (.. )
49+ , CopilotValue (.. )
50+ , StreamOffset (.. )
4651 ) where
4752
4853import qualified Copilot.Core.Expr as CE
@@ -91,7 +96,160 @@ data Solver = CVC4 | DReal | Yices | Z3
9196data SatResult = Valid | Invalid | Unknown
9297 deriving Show
9398
94- type CounterExample = [(String , Some CopilotValue )]
99+ -- | The 'proveWithCounterExample' function returns results of this form for
100+ -- each property in a spec. This is largely the same as 'SatResult', except that
101+ -- 'InvalidCex' also records a 'CounterExample'.
102+ data SatResultCex = ValidCex | InvalidCex CounterExample | UnknownCex
103+
104+ -- | Concrete values that cause a property in a Copilot specification to be
105+ -- invalid. As a simple example, consider the following spec:
106+ --
107+ -- @
108+ -- spec :: Spec
109+ -- spec = do
110+ -- let s :: Stream Bool
111+ -- s = [False] ++ constant True
112+ -- void $ prop "should be invalid" (forAll s)
113+ -- @
114+ --
115+ -- This defines a stream @s@ where the first value is @False@, but all
116+ -- subsequent values are @True@'. This is used in a property that asserts that
117+ -- the values in @s@ will be @True@ at all possible time steps. This is clearly
118+ -- not true, given that @s@'s first value is @False@. As such, we would expect
119+ -- that proving this property would yield an 'InvalidCex' result, where one of
120+ -- the base cases would state that the @s@ stream contains a @False@ value.
121+ --
122+ -- We can use the 'proveWithCounterExample' function to query an SMT solver to
123+ -- compute a counterexample:
124+ --
125+ -- @
126+ -- CounterExample
127+ -- { 'baseCases' =
128+ -- [False]
129+ -- , 'inductionStep' =
130+ -- True
131+ -- , 'concreteExternVars' =
132+ -- fromList []
133+ -- , 'concreteStreamValues' =
134+ -- fromList
135+ -- [ ( (0, 'AbsoluteOffset' 0), False )
136+ -- , ( (0, 'RelativeOffset' 0), False )
137+ -- , ( (0, 'RelativeOffset' 1), True )
138+ -- ]
139+ -- }
140+ -- @
141+ --
142+ -- Let's go over what this counterexample is saying:
143+ --
144+ -- * The 'inductionStep' of the proof is 'True', so that part of the proof was
145+ -- successful. On the other hand, the 'baseCases' contain a 'False', so the
146+ -- proof was falsified when proving the base cases. (In this case, the list
147+ -- has only one element, so there is only a single base case.)
148+ --
149+ -- * 'concreteStreamValues' reports the SMT solver's concrete values for each
150+ -- stream during relevant parts of the proof as a 'Map.Map'.
151+ --
152+ -- The keys of the map are pairs. The first element of the pair is the stream
153+ -- 'CE.Id', and in this example, the only 'CE.Id' is @0@, corresponding to the
154+ -- stream @s@. The second element is the time offset. An 'AbsoluteOffset'
155+ -- indicates an offset starting from the initial time step, and a
156+ -- 'RelativeOffset' indicates an offset from an arbitrary point in time.
157+ -- 'AbsoluteOffset's are used in the base cases of the proof, and
158+ -- 'RelativeOffset's are used in the induction step of the proof.
159+ --
160+ -- The part of the map that is most interesting to us is the
161+ -- @( (0, 'AbsoluteOffset' 0), False )@ entry, which represents a base case
162+ -- where there is a value of @False@ in the stream @s@ during the initial time
163+ -- step. Sure enough, this is enough to falsify the property @forAll s@.
164+ --
165+ -- * There are no extern streams in this example, so 'concreteExternVars' is
166+ -- empty.
167+ --
168+ -- We can also see an example of where a proof succeeds in the base cases, but
169+ -- fails during the induction step:
170+ --
171+ -- @
172+ -- spec :: Spec
173+ -- spec = do
174+ -- let t :: Stream Bool
175+ -- t = [True] ++ constant False
176+ -- void $ prop "should also be invalid" (forAll t)
177+ -- @
178+ --
179+ -- With the @t@ stream above, the base cases will succeed
180+ -- ('proveWithCounterExample' uses @k@-induction with a value of @k == 1@ in
181+ -- this example, so there will only be a single base case). On the other hand,
182+ -- the induction step will fail, as later values in the stream will be @False@.
183+ -- If we try to 'proveWithCounterExample' this property, then it will fail with:
184+ --
185+ -- @
186+ -- CounterExample
187+ -- { 'baseCases' =
188+ -- [True]
189+ -- , 'inductionStep' =
190+ -- False
191+ -- , 'concreteExternVars' =
192+ -- fromList []
193+ -- , 'concreteStreamValues' =
194+ -- fromList
195+ -- [ ( (0, 'AbsoluteOffset' 0), True )
196+ -- , ( (0, 'RelativeOffset' 0), True )
197+ -- , ( (0, 'RelativeOffset' 1), False )
198+ -- ]
199+ -- }
200+ -- @
201+ --
202+ -- This time, the 'inductionStep' is 'False'. If we look at the
203+ -- 'concreteStreamValues', we see the values at @'RelativeOffset' 0@ and
204+ -- @'RelativeOffset' 1@ (which are relevant to the induction step) are @True@
205+ -- and @False@, respectively. Since this is a proof by @k@-induction where
206+ -- @k == 1@, the fact that the value at @'RelativeOffset 1@ is @False@ indicates
207+ -- that the induction step was falsified.
208+ --
209+ -- Note that this proof does not say /when/ exactly the time steps at
210+ -- @'RelativeOffset' 0@ or @'RelativeOffset' 1@ occur, only that that will occur
211+ -- relative to some arbitrary point in time. In this example, they occur
212+ -- relative to the initial time step, so @'RelativeOffset' 1@ would occur at the
213+ -- second time step overall. In general, however, these time steps may occur far
214+ -- in the future, so it is possible that one would need to step through the
215+ -- execution of the streams for quite some time before finding the
216+ -- counterexample.
217+ --
218+ -- Be aware that counterexamples involving struct values are not currently
219+ -- supported.
220+ data CounterExample = CounterExample
221+ { -- | A list of base cases in the proof, where each entry in the list
222+ -- corresponds to a particular time step. For instance, the first element
223+ -- in the list corresponds to the initial time step, the second element in
224+ -- the list corresponds to the second time step, and so on. A 'False' entry
225+ -- anywhere in this list will cause the overall proof to be 'InvalidCex'.
226+ --
227+ -- Because the proof uses @k@-induction, the number of base cases (i.e., the
228+ -- number of entries in this list) is equal to the value of @k@, which is
229+ -- chosen using heuristics.
230+ baseCases :: [Bool ]
231+ -- | Whether the induction step of the proof was valid or not. That is,
232+ -- given an arbitrary time step @n@, if the property is assumed to hold at
233+ -- time steps @n@, @n+1@, ..., @n+k@, then this will be @True@ is the
234+ -- property can the be proven to hold at time step @n+k+1@ (and 'False'
235+ -- otherwise). If this is 'False', then the overall proof will be
236+ -- 'InvalidCex'.
237+ , inductionStep :: Bool
238+ -- | The concrete values in the Copilot specification's extern streams that
239+ -- lead to the property being invalid.
240+ --
241+ -- Each key in the 'Map.Map' is the 'CE.Name' of an extern stream paired
242+ -- with a 'StreamOffset' representing the time step. The key's corresponding
243+ -- value is the concrete value of the extern stream at that time step.
244+ , concreteExternValues :: Map. Map (CE. Name , StreamOffset ) (Some CopilotValue )
245+ -- | The concrete values in the Copilot specification's streams (excluding
246+ -- extern streams) that lead to the property being invalid.
247+ --
248+ -- Each key in the 'Map.Map' is the 'CE.Id' of a stream paired with a
249+ -- 'StreamOffset' representing the time step. The key's corresponding value
250+ -- is the concrete value of the extern stream at that time step.
251+ , concreteStreamValues :: Map. Map (CE. Id , StreamOffset ) (Some CopilotValue )
252+ }
95253
96254-- | Attempt to prove all of the properties in a spec via an SMT solver (which
97255-- must be installed locally on the host). Return an association list mapping
@@ -101,7 +259,73 @@ prove :: Solver
101259 -> CS. Spec
102260 -- ^ Spec
103261 -> IO [(CE. Name , SatResult )]
104- prove solver spec = do
262+ prove solver spec = proveInternal solver spec $ \ _ _ _ satRes ->
263+ case satRes of
264+ WS. Sat _ -> pure Invalid
265+ WS. Unsat _ -> pure Valid
266+ WS. Unknown -> pure Unknown
267+
268+ -- | Attempt to prove all of the properties in a spec via an SMT solver (which
269+ -- must be installed locally on the host). Return an association list mapping
270+ -- the names of each property to the result returned by the solver.
271+ --
272+ -- Unlike 'prove', 'proveWithCounterExample' returns a 'SatResultCex'. This
273+ -- means that if a result is invalid, then it will include a 'CounterExample'
274+ -- which describes the circumstances under which the property was falsified. See
275+ -- the Haddocks for 'CounterExample' for more details.
276+ --
277+ -- Note that this function does not currently support creating counterexamples
278+ -- involving struct values, so attempting to call 'proveWithCounterExample' on a
279+ -- specification that uses structs will raise an error.
280+ proveWithCounterExample :: Solver
281+ -- ^ Solver to use
282+ -> CS. Spec
283+ -- ^ Spec
284+ -> IO [(CE. Name , SatResultCex )]
285+ proveWithCounterExample solver spec =
286+ proveInternal solver spec $ \ baseCases indStep st satRes ->
287+ case satRes of
288+ WS. Sat ge -> do
289+ gBaseCases <- traverse (WG. groundEval ge) baseCases
290+ gIndStep <- WG. groundEval ge indStep
291+ gExternValues <- traverse (valFromExpr ge) (externVars st)
292+ gStreamValues <- traverse (valFromExpr ge) (streamValues st)
293+ let cex = CounterExample
294+ { baseCases = gBaseCases
295+ , inductionStep = gIndStep
296+ , concreteExternValues = gExternValues
297+ , concreteStreamValues = gStreamValues
298+ }
299+ pure (InvalidCex cex)
300+ WS. Unsat _ -> pure ValidCex
301+ WS. Unknown -> pure UnknownCex
302+
303+ -- | Attempt to prove all of the properties in a spec via an SMT solver (which
304+ -- must be installed locally on the host). For each 'WS.SatResult' returned by
305+ -- the solver, pass it to a continuation along with the relevant parts of the
306+ -- proof-related state.
307+ --
308+ -- This is an internal-only function that is used to power 'prove' and
309+ -- 'proveWithCounterExample'.
310+ proveInternal :: Solver
311+ -- ^ Solver to use
312+ -> CS. Spec
313+ -- ^ Spec
314+ -> (forall sym t st fm
315+ . ( sym ~ WB. ExprBuilder t st (WB. Flags fm )
316+ , WI. KnownRepr WB. FloatModeRepr fm )
317+ => [WI. Pred sym ]
318+ -- The proof's base cases
319+ -> WI. Pred sym
320+ -- The proof's induction step
321+ -> TransState sym
322+ -- The proof state
323+ -> WS. SatResult (WG. GroundEvalFn t ) ()
324+ -- The overall result of the proof
325+ -> IO a )
326+ -- ^ Continuation to call on each solver result
327+ -> IO [(CE. Name , a )]
328+ proveInternal solver spec k = do
105329 -- Setup symbolic backend
106330 Some ng <- newIONonceGenerator
107331 sym <- WB. newExprBuilder WB. FloatIEEERepr EmptyState ng
@@ -155,23 +379,30 @@ prove solver spec = do
155379 not_p <- liftIO $ WI. notPred sym p
156380 let clauses = [not_p]
157381
158- case solver of
159- CVC4 -> liftIO $ WS. runCVC4InOverride sym WS. defaultLogData clauses $ \ case
160- WS. Sat (_ge, _) -> return (CS. propertyName pr, Invalid )
161- WS. Unsat _ -> return (CS. propertyName pr, Valid )
162- WS. Unknown -> return (CS. propertyName pr, Unknown )
163- DReal -> liftIO $ WS. runDRealInOverride sym WS. defaultLogData clauses $ \ case
164- WS. Sat (_ge, _) -> return (CS. propertyName pr, Invalid )
165- WS. Unsat _ -> return (CS. propertyName pr, Valid )
166- WS. Unknown -> return (CS. propertyName pr, Unknown )
167- Yices -> liftIO $ WS. runYicesInOverride sym WS. defaultLogData clauses $ \ case
168- WS. Sat _ge -> return (CS. propertyName pr, Invalid )
169- WS. Unsat _ -> return (CS. propertyName pr, Valid )
170- WS. Unknown -> return (CS. propertyName pr, Unknown )
171- Z3 -> liftIO $ WS. runZ3InOverride sym WS. defaultLogData clauses $ \ case
172- WS. Sat (_ge, _) -> return (CS. propertyName pr, Invalid )
173- WS. Unsat _ -> return (CS. propertyName pr, Valid )
174- WS. Unknown -> return (CS. propertyName pr, Unknown )
382+ st <- get
383+ let k' = k base_cases ind_case st
384+ satRes <-
385+ case solver of
386+ CVC4 -> liftIO $ WS. runCVC4InOverride sym WS. defaultLogData clauses $ \ case
387+ WS. Sat (ge, _) -> k' (WS. Sat ge)
388+ WS. Unsat x -> k' (WS. Unsat x)
389+ WS. Unknown -> k' WS. Unknown
390+ DReal -> liftIO $ WS. runDRealInOverride sym WS. defaultLogData clauses $ \ case
391+ WS. Sat (c, m) -> do
392+ ge <- WS. getAvgBindings c m
393+ k' (WS. Sat ge)
394+ WS. Unsat x -> k' (WS. Unsat x)
395+ WS. Unknown -> k' WS. Unknown
396+ Yices -> liftIO $ WS. runYicesInOverride sym WS. defaultLogData clauses $ \ case
397+ WS. Sat ge -> k' (WS. Sat ge)
398+ WS. Unsat x -> k' (WS. Unsat x)
399+ WS. Unknown -> k' WS. Unknown
400+ Z3 -> liftIO $ WS. runZ3InOverride sym WS. defaultLogData clauses $ \ case
401+ WS. Sat (ge, _) -> k' (WS. Sat ge)
402+ WS. Unsat x -> k' (WS. Unsat x)
403+ WS. Unknown -> k' WS. Unknown
404+
405+ pure (CS. propertyName pr, satRes)
175406
176407 -- Execute the action and return the results for each property
177408 runTransM spec proveProperties
0 commit comments