11# ## A Pluto.jl notebook ###
2- # v0.17.1
2+ # v0.16.4
33
44using Markdown
55using InteractiveUtils
@@ -11,16 +11,16 @@ begin
1111 using ReinforcementLearning
1212end
1313
14- # ╔═╡ 510ef350-00b5-4618-ba55-f891603604d1
14+ # ╔═╡ 109c4fb2-4a5b-11eb-08d5-bd6b1eb0ebe9
1515using Plots
1616
17- # ╔═╡ 0d5a05bf-0fa2-43ff-af77-e2f308c0f4bf
17+ # ╔═╡ 1fcd93f0-4a5c-11eb-252d-9da5bc78b08b
1818using StatsPlots
1919
20- # ╔═╡ 55c903fb-46be-4187-bc64-dc4e716ea101
20+ # ╔═╡ 1fbc2952-4b1b-11eb-3b65-75c1058a9537
2121using Flux
2222
23- # ╔═╡ 5cd818c6-073f-446c-ad92-b6b5e31db860
23+ # ╔═╡ db64341a-4b1b-11eb-3f7b-f11b26f442f4
2424using Statistics
2525
2626# ╔═╡ 538080e4-4a5a-11eb-0570-65614c5797f0
@@ -51,9 +51,6 @@ violin(
5151 leg= false
5252)
5353
54- # ╔═╡ ef5ee496-f2c3-4536-8436-1e2575bbde72
55- violin (repeat ([1 ,2 ,3 ],outer= 100 ),randn (300 ))
56-
5754# ╔═╡ 69bc9e66-4a5c-11eb-0288-1930cdb31d9d
5855md """
5956The above figure shows the reward distribution of each action. (Figure 2.1)
@@ -265,17 +262,16 @@ end
265262# ╔═╡ Cell order:
266263# ╟─538080e4-4a5a-11eb-0570-65614c5797f0
267264# ╠═dab179ae-4a5a-11eb-317c-c7fa9d9ccf8f
268- # ╠═510ef350-00b5-4618-ba55-f891603604d1
269- # ╠═0d5a05bf-0fa2-43ff-af77-e2f308c0f4bf
270- # ╠═55c903fb-46be-4187-bc64-dc4e716ea101
271- # ╠═5cd818c6-073f-446c-ad92-b6b5e31db860
272265# ╠═dfe484d4-4a5a-11eb-0224-573d091b3d08
266+ # ╠═109c4fb2-4a5b-11eb-08d5-bd6b1eb0ebe9
267+ # ╠═1fcd93f0-4a5c-11eb-252d-9da5bc78b08b
273268# ╠═1427b132-4a5b-11eb-3506-e744a9a5595c
274- # ╠═ef5ee496-f2c3-4536-8436-1e2575bbde72
275269# ╟─69bc9e66-4a5c-11eb-0288-1930cdb31d9d
276270# ╟─c0ca4172-4aac-11eb-255d-8b0005441fb0
277271# ╠═4bf0f782-4aad-11eb-291c-afa853f150a3
278272# ╟─1ff8d726-4aad-11eb-0d88-c7f6080c4072
273+ # ╠═1fbc2952-4b1b-11eb-3b65-75c1058a9537
274+ # ╠═db64341a-4b1b-11eb-3f7b-f11b26f442f4
279275# ╠═1dfcd040-4b1a-11eb-248e-990c0e029f43
280276# ╠═15e65f7e-4b1b-11eb-26b7-ef85168b5112
281277# ╠═b302e1e6-4b1c-11eb-1cb8-91be2a24d5a7
0 commit comments