JuliaReinforcementLearning
diff --git a/‎Manifest.toml
+415-605 b/‎Manifest.toml
+415-605
diff --git a/‎Project.toml
+3-2 b/‎Project.toml
+3-2
diff --git a/‎notebooks/Chapter01_Tic_Tac_Toe.jl
+6-2 b/‎notebooks/Chapter01_Tic_Tac_Toe.jl
+6-2
diff --git a/‎notebooks/Chapter02_Ten_Armed_Testbed.jl
+8-4 b/‎notebooks/Chapter02_Ten_Armed_Testbed.jl
+8-4
diff --git a/‎notebooks/Chapter03_Grid_World.jl
+3-1 b/‎notebooks/Chapter03_Grid_World.jl
+3-1
diff --git a/‎notebooks/Chapter04_Car_Rental.jl
+3-1 b/‎notebooks/Chapter04_Car_Rental.jl
+3-1
diff --git a/‎notebooks/Chapter04_Gambler_Problem.jl
+3-1 b/‎notebooks/Chapter04_Gambler_Problem.jl
+3-1
diff --git a/‎notebooks/Chapter04_Grid_World.jl
+3-1 b/‎notebooks/Chapter04_Grid_World.jl
+3-1
@@ -1,6 +1,7 @@
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
 ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318"
@@ -10,9 +11,9 @@ StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 
 [compat]
 Distributions = "0.24"
-Flux = "0.11"
+Flux = "0.12"
 Plots = "1.10"
 PlutoUI = "0.7"
-ReinforcementLearning = "0.8"
+ReinforcementLearning = "0.10"
 StatsBase = "0.33"
 StatsPlots = "0.14"
@@ -1,11 +1,15 @@
 ### A Pluto.jl notebook ###
-# v0.14.3
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ 9b8c8d1a-481e-11eb-1b85-91264e100b12
-using ReinforcementLearning
+begin
+	import Pkg
+	Pkg.activate(Base.current_project())
+	using ReinforcementLearning
+end
 
 # ╔═╡ 7441759c-4853-11eb-3d63-2be1f95f59fe
 using Plots
 
@@ -1,11 +1,15 @@
 ### A Pluto.jl notebook ###
-# v0.14.3
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ dab179ae-4a5a-11eb-317c-c7fa9d9ccf8f
-using ReinforcementLearning
+begin
+	import Pkg
+	Pkg.activate(Base.current_project())
+	using ReinforcementLearning
+end
 
 # ╔═╡ 109c4fb2-4a5b-11eb-08d5-bd6b1eb0ebe9
 using Plots
@@ -110,7 +114,7 @@ function bandit_testbed(
 	   trajectory=VectorSARTTrajectory()
 	)
 	h1 = CollectBestActions(;best_action=findmax(env.true_values)[2])
-	h2 = TotalRewardPerEpisode()
+	h2 = TotalRewardPerEpisode(;is_display_on_exit=false)
 	run(agent, env, StopAfterStep(1000), ComposedHook(h1, h2))
     h1.isbest, h2.rewards
 end
@@ -198,7 +202,7 @@ function gb_bandit_testbed(
 	)
 
 	h1 = CollectBestActions(;best_action=findmax(env.true_values)[2])
-	h2 = TotalRewardPerEpisode()
+	h2 = TotalRewardPerEpisode(;is_display_on_exit=false)
 	run(agent, env, StopAfterStep(1000), ComposedHook(h1, h2))
     h1.isbest, h2.rewards
 end
 
@@ -1,11 +1,13 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ bd94028c-5d8e-11eb-22b6-5fa384999fdb
 begin
+	import Pkg
+	Pkg.activate(Base.current_project())
 	using ReinforcementLearning
 	using Flux
 	using Statistics
 
@@ -1,11 +1,13 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ 92081fb8-5d90-11eb-2078-ddbf87421051
 begin
+	import Pkg
+	Pkg.activate(Base.current_project())
 	using ReinforcementLearning
 	using Flux
 	using Statistics
 
@@ -1,11 +1,13 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ f5cc0f04-5d99-11eb-3abe-bf3fccdac9e6
 begin
+	import Pkg
+	Pkg.activate(Base.current_project())
 	using ReinforcementLearning
 	using Flux
 	using Statistics
 
@@ -1,11 +1,13 @@
 ### A Pluto.jl notebook ###
-# v0.12.18
+# v0.16.4
 
 using Markdown
 using InteractiveUtils
 
 # ╔═╡ 604afcfc-5d9d-11eb-0e2d-4971e8c87824
 begin
+	import Pkg
+	Pkg.activate(Base.current_project())
 	using ReinforcementLearning
 	using Flux
 	using Statistics