fix: bug in chapter05 (#43)

kir0ul · web-flow · commit e8b90a437620 · 2021-06-16T13:15:26.000+08:00
Thanks!
diff --git a/notebooks/Chapter05_Blackjack.jl b/notebooks/Chapter05_Blackjack.jl
@@ -1,5 +1,5 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.14.7
 
 using Markdown
 using InteractiveUtils
@@ -269,13 +269,13 @@ INIT_STATE = state(static_env)
 GOLD_VAL = -0.27726
 
 # ╔═╡ ecade5b4-5e61-11eb-0fd5-d1b149adbece
-Base.@kwdef struct StoreMSE <: AbstractHook
-    mse::Vector{Float64} = []
+begin
+	Base.@kwdef struct StoreMSE <: AbstractHook
+		mse::Vector{Float64} = []
+	end
+	(f::StoreMSE)(::PostEpisodeStage, agent, env) = push!(f.mse, (GOLD_VAL - agent.policy.π_target.learner.approximator[1](INIT_STATE))^2)
 end
 
-# ╔═╡ fdf615da-5e61-11eb-0df8-dbb3810a408d
-(f::StoreMSE)(::PostEpisodeStage, agent, env) = push!(f.mse, (GOLD_VAL - agent.policy.π_target.learner.approximator[1](INIT_STATE))^2)
-
 # ╔═╡ 4f8a0a80-5e64-11eb-13db-3b117cdd35b6
 target_policy_mapping = (env, V) -> sum_hand(env.env.player_hand) in (20, 21) ? 2 : 1
 
@@ -389,7 +389,6 @@ end
 # ╠═e31a1da6-5e61-11eb-09ac-1f29247434ca
 # ╠═f1437b16-5e61-11eb-2f14-976483f8d924
 # ╠═ecade5b4-5e61-11eb-0fd5-d1b149adbece
-# ╠═fdf615da-5e61-11eb-0df8-dbb3810a408d
 # ╠═4f8a0a80-5e64-11eb-13db-3b117cdd35b6
 # ╠═5f5473ba-5e64-11eb-1b37-7706eb629b45
 # ╠═bceb8f5a-5e63-11eb-25fb-1d73dc919323
diff --git a/notebooks/Chapter05_Left_Right.jl b/notebooks/Chapter05_Left_Right.jl
@@ -1,5 +1,5 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.14.7
 
 using Markdown
 using InteractiveUtils
@@ -82,14 +82,14 @@ end
 RLBase.prob(::typeof(π_t), s, a) = a == 1 ? 1.0 : 0.
 
 # ╔═╡ 88b48522-5e20-11eb-00df-65834ec124b2
-struct CollectValue <: AbstractHook
-    values::Vector{Float64}
-    CollectValue() = new([])
+begin
+	struct CollectValue <: AbstractHook
+		values::Vector{Float64}
+		CollectValue() = new([])
+	end
+	(f::CollectValue)(::PostEpisodeStage, agent, env) = push!(f.values, agent.policy.π_target.learner.approximator[2](1))
 end
 
-# ╔═╡ 8c218bba-5e20-11eb-1840-d5e92d1830da
-(f::CollectValue)(::PostEpisodeStage, agent, env) = push!(f.values, agent.policy.π_target.learner.approximator[2](1))
-
 # ╔═╡ 4e0a26b0-5e21-11eb-01bf-85db236b9bf8
 begin
 	p = plot()
@@ -120,5 +120,4 @@ end
 # ╠═597bbcd2-5e21-11eb-10c9-7db2f46cfffb
 # ╠═9cdef3ba-5e21-11eb-1cfd-fb1abbf1d608
 # ╠═88b48522-5e20-11eb-00df-65834ec124b2
-# ╠═8c218bba-5e20-11eb-1840-d5e92d1830da
 # ╠═4e0a26b0-5e21-11eb-01bf-85db236b9bf8