Skip to content

Commit 0727256

Browse files
committed
copilot-theorem: Add test cases for proveWithCounterExample. Refs #589.
Currently, the `Copilot.Theorem.What4.prove` function returns a list of results, where each result contains a `SatResult` that describes whether a property is `Valid`, `Invalid`, or `Unknown`. The `Invalid` result has the limitation that it does not give any information about a specific counterexample that could drive Copilot into falsifying the property, however. This makes it challenging to interpret what the results of prove mean. A prior commit has introduced a `proveWithCounterExample` function, which provides a counterexample when a property is proven invalid. This commit updates the test suite to ensure that basic uses of `proveWithCounterExample` work as intended.
1 parent 8d2a643 commit 0727256

File tree

2 files changed

+113
-11
lines changed

2 files changed

+113
-11
lines changed

copilot-theorem/copilot-theorem.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ test-suite unit-tests
121121

122122
build-depends:
123123
base
124+
, HUnit
124125
, QuickCheck
125126
, test-framework
126127
, test-framework-quickcheck2

copilot-theorem/tests/Test/Copilot/Theorem/What4.hs

Lines changed: 112 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,25 @@ import Data.Int (Int8)
1010
import Data.Word (Word32)
1111
import Test.Framework (Test, testGroup)
1212
import Test.Framework.Providers.QuickCheck2 (testProperty)
13+
import Test.HUnit (assertFailure)
1314
import Test.QuickCheck (Arbitrary (arbitrary), Property,
1415
arbitrary, forAll)
1516
import Test.QuickCheck.Monadic (monadicIO, run)
1617

1718
-- External imports: Copilot
18-
import Copilot.Core.Expr (Expr (Const, Op1, Op2))
19+
import Copilot.Core.Expr (Expr (Const, Drop, Op1, Op2), Id)
1920
import Copilot.Core.Operators (Op1 (..), Op2 (..))
20-
import Copilot.Core.Spec (Spec (..))
21+
import Copilot.Core.Spec (Spec (..), Stream (..))
2122
import qualified Copilot.Core.Spec as Copilot
2223
import Copilot.Core.Type (Field (..),
2324
Struct (toValues, typeName),
2425
Type (Struct), Typed (typeOf),
2526
Value (..))
2627

2728
-- Internal imports: Modules being tested
28-
import Copilot.Theorem.What4 (SatResult (..), Solver (..), prove)
29+
import Copilot.Theorem.What4 (CounterExample (..), SatResult (..),
30+
SatResultCex (..), Solver (..), prove,
31+
proveWithCounterExample)
2932

3033
-- * Constants
3134

@@ -37,6 +40,8 @@ tests =
3740
, testProperty "Prove via Z3 that false is invalid" testProveZ3False
3841
, testProperty "Prove via Z3 that x == x is valid" testProveZ3EqConst
3942
, testProperty "Prove via Z3 that a struct update is valid" testProveZ3StructUpdate
43+
, testProperty "Counterexample with invalid base case" testCounterExampleBaseCase
44+
, testProperty "Counterexample with invalid induction step" testCounterExampleInductionStep
4045
]
4146

4247
-- * Individual tests
@@ -53,7 +58,7 @@ testProveZ3True =
5358
propName = "prop"
5459

5560
spec :: Spec
56-
spec = propSpec propName $ Const typeOf True
61+
spec = propSpec propName [] $ Const typeOf True
5762

5863
-- | Test that Z3 is able to prove the following expression invalid:
5964
-- @
@@ -67,7 +72,7 @@ testProveZ3False =
6772
propName = "prop"
6873

6974
spec :: Spec
70-
spec = propSpec propName $ Const typeOf False
75+
spec = propSpec propName [] $ Const typeOf False
7176

7277
-- | Test that Z3 is able to prove the following expresion valid:
7378
-- @
@@ -81,7 +86,7 @@ testProveZ3EqConst = forAll arbitrary $ \x ->
8186
propName = "prop"
8287

8388
spec :: Int8 -> Spec
84-
spec x = propSpec propName $
89+
spec x = propSpec propName [] $
8590
Op2 (Eq typeOf) (Const typeOf x) (Const typeOf x)
8691

8792
-- | Test that Z3 is able to prove the following expresion valid:
@@ -97,7 +102,7 @@ testProveZ3StructUpdate = forAll arbitrary $ \x ->
97102
propName = "prop"
98103

99104
spec :: TestStruct -> Spec
100-
spec s = propSpec propName $
105+
spec s = propSpec propName [] $
101106
Op2
102107
(Eq typeOf)
103108
(getField
@@ -116,6 +121,70 @@ testProveZ3StructUpdate = forAll arbitrary $ \x ->
116121
add1 :: Expr Word32 -> Expr Word32
117122
add1 x = Op2 (Add typeOf) x (Const typeOf 1)
118123

124+
-- | Test that Z3 is able to produce a counterexample to the following property,
125+
-- where the base case is proved invalid:
126+
--
127+
-- @
128+
-- let s :: Stream Bool
129+
-- s = [False] ++ constant True
130+
-- in forAll s
131+
-- @
132+
testCounterExampleBaseCase :: Property
133+
testCounterExampleBaseCase =
134+
monadicIO $ run $
135+
checkCounterExample Z3 propName spec $ \cex ->
136+
pure $ not $ and $ baseCases cex
137+
where
138+
propName :: String
139+
propName = "prop"
140+
141+
-- s = [False] ++ constant True
142+
s :: Stream
143+
s = Stream
144+
{ streamId = sId
145+
, streamBuffer = [False]
146+
, streamExpr = Const typeOf True
147+
, streamExprType = typeOf
148+
}
149+
150+
sId :: Id
151+
sId = 0
152+
153+
spec :: Spec
154+
spec = propSpec propName [s] $ Drop typeOf 0 sId
155+
156+
-- | Test that Z3 is able to produce a counterexample to the following property,
157+
-- where the induction step is proved invalid:
158+
--
159+
-- @
160+
-- let s :: Stream Bool
161+
-- s = [True] ++ constant False
162+
-- in forAll s
163+
-- @
164+
testCounterExampleInductionStep :: Property
165+
testCounterExampleInductionStep =
166+
monadicIO $ run $
167+
checkCounterExample Z3 propName spec $ \cex ->
168+
pure $ not $ inductionStep cex
169+
where
170+
propName :: String
171+
propName = "prop"
172+
173+
-- s = [True] ++ constant False
174+
s :: Stream
175+
s = Stream
176+
{ streamId = sId
177+
, streamBuffer = [True]
178+
, streamExpr = Const typeOf False
179+
, streamExprType = typeOf
180+
}
181+
182+
sId :: Id
183+
sId = 0
184+
185+
spec :: Spec
186+
spec = propSpec propName [s] $ Drop typeOf 0 sId
187+
119188
-- | A simple data type with a 'Struct' instance and a 'Field'. This is only
120189
-- used as part of 'testProveZ3StructUpdate'.
121190
newtype TestStruct = TestStruct { testField :: Field "testField" Word32 }
@@ -145,12 +214,44 @@ checkResult solver propName spec expectation = do
145214
-- does not exist in the results, in which case the lookup returns 'Nothing'.
146215
return $ propResult == Just expectation
147216

217+
-- | Check that the solver produces an invalid result for the given property and
218+
-- that the resulting 'CounterExample' satifies the given predicate.
219+
checkCounterExample :: Solver
220+
-> String
221+
-> Spec
222+
-> (CounterExample -> IO Bool)
223+
-> IO Bool
224+
checkCounterExample solver propName spec cexPred = do
225+
results <- proveWithCounterExample solver spec
226+
227+
-- Find the satisfiability result for propName. If the property name does not
228+
-- exist in the results, raise an assertion failure.
229+
propResult <-
230+
case lookup propName results of
231+
Just propResult ->
232+
pure propResult
233+
Nothing ->
234+
assertFailure $
235+
"Could not find property in results: " ++ propName
236+
237+
-- Assert that the solver returned an invalid result and pass the
238+
-- counterexample to the predicate. If the result is anything other than
239+
-- invalid, raise an assertion failure.
240+
case propResult of
241+
InvalidCex cex ->
242+
cexPred cex
243+
ValidCex {} ->
244+
assertFailure "Expected invalid result, but result was valid"
245+
UnknownCex {} ->
246+
assertFailure "Expected invalid result, but result was unknown"
247+
148248
-- * Auxiliary
149249

150-
-- | Build a 'Spec' that contains one property with the given name and defined
151-
-- by the given boolean expression.
152-
propSpec :: String -> Expr Bool -> Spec
153-
propSpec propName propExpr = Spec [] [] [] [Copilot.Property propName propExpr]
250+
-- | Build a 'Spec' that contains one property with the given name, which
251+
-- contains the given streams, and is defined by the given boolean expression.
252+
propSpec :: String -> [Stream] -> Expr Bool -> Spec
253+
propSpec propName propStreams propExpr =
254+
Spec propStreams [] [] [Copilot.Property propName propExpr]
154255

155256
-- | Equality for 'SatResult'.
156257
--

0 commit comments

Comments
 (0)