我是Julia和JuliaReinforcementLearning的新手,只想开始在https://juliareinforcementlearning.org/docs/experiments/提供的实验。
因此,我创建了一个类似于以下内容的文件:
using ReinforcementLearning
using StableRNGs
using Flux
using Flux.Losses
function RL.Experiment(
::Val{:JuliaRL},
::Val{:BasicDQN},
::Val{:CartPole},
::Nothing;
seed = 123,
)
rng = StableRNG(seed)
env = CartPoleEnv(; T = Float32, rng = rng)
ns, na = length(state(env)), length(action_space(env))
policy = Agent(
policy = QBasedPolicy(
learner = BasicDQNLearner(
approximator = NeuralNetworkApproximator(
model = Chain(
Dense(ns, 128, relu; init = glorot_uniform(rng)),
Dense(128, 128, relu; init = glorot_uniform(rng)),
Dense(128, na; init = glorot_uniform(rng)),
) |> gpu,
optimizer = ADAM(),
),
batch_size = 32,
min_replay_history = 100,
loss_func = huber_loss,
rng = rng,
),
explorer = EpsilonGreedyExplorer(
kind = :exp,
ϵ_stable = 0.01,
decay_steps = 500,
rng = rng,
),
),
trajectory = CircularArraySARTTrajectory(
capacity = 1000,
state = Vector{Float32} => (ns,),
),
)
stop_condition = StopAfterStep(10_000, is_show_progress=!haskey(ENV, "CI"))
hook = TotalRewardPerEpisode()
Experiment(policy, env, stop_condition, hook, "# BasicDQN <-> CartPole")
end
将文件命名为"JuliaRL_BasicDQN_CartPole.jl"
还有第二个文件如下:
include("JuliaRL_BasicDQN_CartPole.jl")
using Plots
pyplot()
ex = E`JuliaRL_BasicDQN_CartPole`
run(ex)
plot(ex.hook.rewards)
savefig("assets/JuliaRL_BasicDQN_CartPole.png") #hide
命名为"test.jl"。
(->一个问题:Exxx
究竟是什么意思?)
实验似乎开始了,它显示出这段文字:
BasicDQN <-> CartPole
≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡
但是它停在这个错误信息:
LoadError: UndefVarError: params not defined
Stacktrace:
[1] update!(learner::BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, batch::NamedTuple{(:state, :action, :reward, :terminal, :next_state), Tuple{Matrix{Float32}, Vector{Int64}, Vector{Float32}, Vector{Bool}, Matrix{Float32}}})
@ ReinforcementLearningZoo ~/.julia/packages/ReinforcementLearningZoo/tvfq9/src/algorithms/dqns/basic_dqn.jl:78
[2] update!(learner::BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, traj::CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}})
@ ReinforcementLearningZoo ~/.julia/packages/ReinforcementLearningZoo/tvfq9/src/algorithms/dqns/basic_dqn.jl:65
[3] update!
@ ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/q_based_policies/learners/abstract_learner.jl:35 [inlined]
[4] update!
@ ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/q_based_policies/q_based_policy.jl:67 [inlined]
[5] (::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}})(stage::PreActStage, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, action::Int64)
@ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/agents/agent.jl:78
[6] _run(policy::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}}, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode)
@ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/run.jl:29
[7] run(policy::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}}, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode)
@ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/run.jl:10
[8] run(x::Experiment; describe::Bool)
@ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/experiment.jl:56
[9] run(x::Experiment)
@ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/experiment.jl:54
[10] top-level scope
@ ~/Documents/julia/reinforcement/test.jl:9
[11] include(fname::String)
@ Base.MainInclude ./client.jl:476
[12] top-level scope
@ REPL[6]:1
[13] top-level scope
@ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52
in expression starting at /home/std/Documents/julia/reinforcement/test.jl:9
那么运行实验还需要定义哪些参数呢?
谢谢!
using Flux: params
应该可以解决问题。 - mcabbottusing Flux
行,但是请将其作为单独的一行添加。也就是说,你需要同时添加using Flux
和using Flux: params
。 - Sundar R