大家好,我是毛毛。
今天是Day 28
要來看Acrobot的結果啦~ ヽ(✿゚▽゚)ノ
env = gym.make("Acrobot-v1")
dqn_agent = DQN(env.observation_space.shape[0], env.action_space.n, e = 0.01)
print("=================================================================")
print("Old weights: \n", dqn_agent.get_weights())
print("=================================================================")
filepath = "./dqn.Acrobot_weights.h5"
if os.path.isfile(filepath):
print("Exist\n")
dqn_agent.load_weights("./dqn.Acrobot_weights.h5")
print("=================================================================")
print("Revised weights: \n", dqn_agent.get_weights())
print("=================================================================")
else:
print("Not exist\n")
這個部分跟上一個的一樣
times = 1000
batch_size = 32
steps = []
for time in range(times):
obs = env.reset()
action = dqn_agent.choose_action(np.reshape(obs, [1, env.observation_space.shape[0]]))
step = 0
while True:
env.render()
step = step + 1
obs_, reward, terminal, _ = env.step(action)
dqn_agent.store_transition(np.reshape(obs, [1, env.observation_space.shape[0]]), action, reward, np.reshape(obs_, [1, env.observation_space.shape[0]]), terminal)
if len(dqn_agent.memory) > batch_size:
dqn_agent.replay_transition(batch_size)
obs = obs_
action = dqn_agent.choose_action(np.reshape(obs_, [1, env.observation_space.shape[0]]))
if terminal:
env.render()
print('Nice!! 。:.゚ヽ(*´∀`)ノ゚.:。')
print("In {} times: it takes {} steps with {} reward".format(time, step, reward))
steps.append(step)
dqn_agent.target_replacement()
dqn_agent.save_weights()
break
env.close()
上面就是main function的部分啦。
執行結果
Console的執行結果~ d(d'∀')
Acrobot就到這啦~ 0(:3 )~ ('、3_ヽ)_
大家明天見