在运行最后一句trainingStats = train(agent, env, trainOpts);之前没报错,DDPGAgent也创建出来了
16 views (last 30 days)
Show older comments
clear all
![](https://www.mathworks.com/matlabcentral/answers/uploaded_files/1699896/image.png)
clc
addpath('matpower6.0');
define_constants;
% define named indices into bus, gen, branch matrices
[PQ, PV, REF, NONE, BUS_I, BUS_TYPE, PD, QD, GS, BS, BUS_AREA, VM, ...
VA, BASE_KV, ZONE, VMAX, VMIN, LAM_P, LAM_Q, MU_VMAX, MU_VMIN] = idx_bus;
[F_BUS, T_BUS, BR_R, BR_X, BR_B, RATE_A, RATE_B, RATE_C, ...
TAP, SHIFT, BR_STATUS, PF, QF, PT, QT, MU_SF, MU_ST, ...
ANGMIN, ANGMAX, MU_ANGMIN, MU_ANGMAX] = idx_brch;
[GEN_BUS, PG, QG, QMAX, QMIN, VG, MBASE, GEN_STATUS, PMAX, PMIN, ...
MU_PMAX, MU_PMIN, MU_QMAX, MU_QMIN, PC1, PC2, QC1MIN, QC1MAX, ...
QC2MIN, QC2MAX, RAMP_AGC, RAMP_10, RAMP_30, RAMP_Q, APF] = idx_gen;
%% 创建环境
env = PowerSystemEnv;
% 查看环境的状态和动作规范
obsInfo = getObservationInfo(env);
actInfo = getActionInfo(env);
% 固定随机种子确保实验的可重复性
rng(0)
%% 创建神经网络
% 创建观察路径
obsPath = [
featureInputLayer(obsInfo.Dimension(1), 'Normalization', 'none', 'Name', 'state')
fullyConnectedLayer(200, 'Name', 'fc1')
reluLayer('Name', 'relu1')
fullyConnectedLayer(100, 'Name', 'fc2')
reluLayer('Name', 'relu2')];
% 创建动作路径
actPath = [
featureInputLayer(actInfo.Dimension(1), 'Normalization', 'none', 'Name', 'action')
fullyConnectedLayer(100, 'Name', 'fc3')
reluLayer('Name', 'relu3')];
% 创建公共路径
commonPath = [
concatenationLayer(1, 2, 'Name', 'concat')
fullyConnectedLayer(50, 'Name', 'fc4')
reluLayer('Name', 'relu4')
fullyConnectedLayer(1, 'Name', 'output')];
% 创建并连接网络
net = layerGraph(obsPath);
net = addLayers(net, actPath);
net = addLayers(net, commonPath);
net = connectLayers(net, 'relu2', 'concat/in1');
net = connectLayers(net, 'relu3', 'concat/in2');
% 将神经网络转换为dlnetwork
net = dlnetwork(net);
% 使用神经网络创建连续动作空间的价值函数
critic = rlQValueFunction(net, obsInfo, actInfo, ...
'ObservationInputNames', 'state', 'ActionInputNames', 'action');
%% 创建Actor网络
layers = [
featureInputLayer(obsInfo.Dimension(1), 'Normalization', 'none', 'Name', 'state')
fullyConnectedLayer(200, 'Name', 'fc1')
reluLayer('Name', 'relu1')
fullyConnectedLayer(100, 'Name', 'fc2')
reluLayer('Name', 'relu2')
fullyConnectedLayer(actInfo.Dimension(1), 'Name', 'output')
tanhLayer('Name', 'tanh')];
net = dlnetwork(layers);
actor = rlContinuousDeterministicActor(net, obsInfo, actInfo);
%% 设置优化选项
criticOpts = rlOptimizerOptions('LearnRate', 1e-3, 'GradientThreshold', 1);
actorOpts = rlOptimizerOptions('LearnRate', 1e-4, 'GradientThreshold', 1);
%% 设置DDPG超参数
agentOpts = rlDDPGAgentOptions( ...
'SampleTime', 0.1, ...
'CriticOptimizerOptions', criticOpts, ...
'ActorOptimizerOptions', actorOpts, ...
'ExperienceBufferLength', 1e4, ...
'MiniBatchSize', 256, ...
'DiscountFactor', 0.99, ...
'TargetSmoothFactor', 1e-3, ...
'TargetUpdateFrequency', 1 );
% 创建DDPG智能体
agent = rlDDPGAgent(actor, critic, agentOpts);
%% 训练智能体
trainOpts = rlTrainingOptions( ...
'MaxEpisodes', 2000, ...
'MaxStepsPerEpisode', 5000, ...
'StopTrainingCriteria', 'EpisodeReward', ...
'StopTrainingValue', 40, ...
'SaveAgentCriteria', 'EpisodeReward', ...
'SaveAgentValue', 40, ...
'Verbose', false, ...
'Plots', 'training-progress');
% 开始训练智能体
trainingStats = train(agent, env, trainOpts);
% 运行后报错
错误使用 rl.train.SeriesTrainer/run
There was an error executing the ProcessExperienceFcn.
Caused by:
错误使用 rl.policy.rlAdditiveNoisePolicy/getAction_
Batch observations not supported for Ornstein-Uhlenbeck noise model.
出错 rl.policy.PolicyInterface/getAction (第 36 行)
[action,this] = getAction_(this,observation);
出错 rl.agent.AbstractOffPolicyAgent/getExplorationAction_ (第 116 行)
[action,this.ExplorationPolicy_] = getAction(this.ExplorationPolicy_,...
出错 rl.agent.AbstractAgent/getAction_ (第 90 行)
[action,this] = getExplorationAction_(this,observation);
出错 rl.policy.PolicyInterface/getAction (第 36 行)
[action,this] = getAction_(this,observation);
出错 rl.env.internal.PolicyExperienceProcessorInterface/evaluateAction_ (第 32 行)
[action,this.Policy_] = getAction(this.Policy_,observation);
出错 rl.env.internal.ExperienceProcessorInterface/evaluateAction (第 62 行)
action = evaluateAction_(this,observation);
出错 rl.env.internal.MATLABSimulator/simInternal_ (第 109 行)
act = evaluateAction(expProcessor,obs);
出错 rl.env.internal.MATLABSimulator/sim_ (第 67 行)
out = simInternal_(this,simPkg);
出错 rl.env.internal.AbstractSimulator/sim (第 30 行)
out = sim_(this,simData,policy,processExpFcn,processExpData);
出错 rl.env.AbstractEnv/runEpisode (第 144 行)
out = sim(simulator,simData,policy,processExpFcn,processExpData);
出错 rl.train.SeriesTrainer/run (第 64 行)
out = runEpisode(...
出错 rl.train.TrainingManager/train (第 516 行)
run(trainer);
出错 rl.train.TrainingManager/run (第 253 行)
train(this);
出错 rl.agent.AbstractAgent/train (第 187 行)
trainingResult = run(trainMgr,checkpoint);
出错 main (第 104 行)
trainingStats = train(agent, env, trainOpts);
出错 rl.train.TrainingManager/train (第 516 行)
run(trainer);
出错 rl.train.TrainingManager/run (第 253 行)
train(this);
出错 rl.agent.AbstractAgent/train (第 187 行)
trainingResult = run(trainMgr,checkpoint);
出错 main (第 104 行)
trainingStats = train(agent, env, trainOpts);
open 'run Run script.
↑
错误: 字符向量未正常终止。
0 Comments
Answers (0)
See Also
Categories
Find more on Applications in Help Center and File Exchange
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!