11using ReinforcementLearningBase: RLBase
22
3- mutable struct DojoRLEnv <: RLBase.AbstractEnv
4- dojoenv
5- action_space
6- observation_space
7- state
8- reward
3+ mutable struct DojoRLEnv{T} <: RLBase.AbstractEnv
4+ dojoenv:: Environment
5+ state:: Vector{T}
6+ reward:: T
97 done:: Bool
108 info:: Dict
119end
1210
13- function DojoRLEnv (dojoenv:: Environment )
14- action_space = convert (RLBase. Space, dojoenv. input_space)
15- observation_space = convert (RLBase. Space, dojoenv. observation_space)
11+ function DojoRLEnv (dojoenv:: Environment{X,T} ) where {X,T}
1612 state = reset (dojoenv)
17- return DojoRLEnv (dojoenv, action_space, observation_space, state, 0.0 , false , Dict ())
13+ return DojoRLEnv {T} (dojoenv, state, convert (T, 0.0 ) , false , Dict ())
1814end
1915
20- RLBase. action_space (env:: DojoRLEnv ) = env. action_space
21- RLBase. state_space (env:: DojoRLEnv ) = env. observation_space
16+ function DojoRLEnv (name:: String ; kwargs... )
17+ DojoRLEnv (Dojo. get_environment (name; kwargs... ))
18+ end
19+
20+ RLBase. action_space (env:: DojoRLEnv ) = env. dojoenv. input_space
21+ RLBase. state_space (env:: DojoRLEnv ) = env. dojoenv. observation_space
2222RLBase. is_terminated (env:: DojoRLEnv ) = env. done
2323
2424RLBase. reset! (env:: DojoRLEnv ) = reset (env. dojoenv)
@@ -28,6 +28,9 @@ RLBase.state(env::DojoRLEnv) = env.state
2828
2929Random. seed! (env:: DojoRLEnv , seed) = Dojo. seed (env. dojoenv, seed)
3030
31+ # TODO :
32+ # RLBase.ChanceStyle(env::DojoRLEnv) = RLBase.DETERMINISTIC
33+
3134function (env:: DojoRLEnv )(a)
3235 s, r, d, i = step (env. dojoenv, a)
3336 env. state = s
0 commit comments