Commit b91799a9 authored by Eduard Pizur's avatar Eduard Pizur
Browse files

added tensorboard

parent da4837fb
......@@ -75,12 +75,6 @@ class Agent():
experiences = self.memory.sample()
states, actions, next_states, rewards, dones = zip(*experiences)
print("states", states)
import sys
sys.exit(0)
states = T.tensor(states, dtype=T.float32).to(self.device)
rewards = T.tensor(rewards, dtype=T.int32).to(self.device)
actions = T.tensor(actions, dtype=T.int64).to(self.device)
......
......@@ -5,6 +5,7 @@ import time
from torch.utils.tensorboard import SummaryWriter
import datetime
import sys
import os
from dqn_agent import Agent
......@@ -27,23 +28,23 @@ if __name__ == '__main__':
# Init tensorboard
# log_name = "runs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + " " + ENVIRONMENT
# tb = SummaryWriter(log_name)
# tb.add_graph(agent.network)
run_name = "runs/{}/{}".format("DQN", datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"))
writer = SummaryWriter(run_name)
best_score = 0
learn_steps = 0
scores = []
writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', best_score, learn_steps)
for episode in range(NUM_OF_EPISODES):
# initilization of each episode
state = env.reset()
done = False
score = 0
# tb.add_scalar('Epsilon', agent.epsilon, learn_steps)
# tb.add_scalar('Best Score', best_score, learn_steps)
# repeat until dies
while not done:
action = agent.choose_action(state)
......@@ -58,6 +59,9 @@ if __name__ == '__main__':
agent.train()
agent.decay_epsilon()
if learn_steps % 2000 == 0:
writer.add_scalar('Training loss', agent.network.loss.item(), learn_steps)
score += reward
learn_steps += 1
......@@ -72,10 +76,14 @@ if __name__ == '__main__':
scores.append(score)
avg_score = np.mean(scores[-100:])
print('episode: ', episode,'score: ', score,
' average score %.1f' % avg_score, 'best score %.2f' % best_score,
'epsilon %.2f' % agent.epsilon, 'steps', learn_steps)
# tb.add_scalar('Score', score, learn_steps)
writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', score, learn_steps)
tb.close()
writer.close()
env.close()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment