Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Eduard Pizur
dp 2020
Commits
6aad48b2
Commit
6aad48b2
authored
Apr 25, 2020
by
udoedo
Browse files
frozen lake Qlearning and SARSA
parent
1b26a909
Changes
6
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
6aad48b2
# dp 2020
# dp 2020
diplomovy projekt
\ No newline at end of file
frozen-lake/.vscode/settings.json
0 → 100644
View file @
6aad48b2
{
"python.pythonPath"
:
"C:
\\
Users
\\
Eduard Pizur
\\
AppData
\\
Local
\\
Programs
\\
Python
\\
Python36
\\
python.exe"
}
\ No newline at end of file
frozen-lake/__pycache__/frozenagent.cpython-36.pyc
0 → 100644
View file @
6aad48b2
File added
frozen-lake/__pycache__/frozenagent.cpython-38.pyc
0 → 100644
View file @
6aad48b2
File added
frozen-lake/frozen_lake.py
0 → 100644
View file @
6aad48b2
import
gym
import
random
import
numpy
as
np
import
time
import
pprint
as
pp
import
matplotlib.pyplot
as
plt
from
frozenagent
import
Agent
#HYPERPARAMETERS
LEARNING_RATE
=
.
01
DISCOUNT_FACTOR
=
.
9
EPSILON_START
=
1
EPSILON_END
=
.
1
EPSILON_DECAY_RATE
=
.
999995
NUMBER_OF_GAMES
=
400_000
def
print_table
(
table
):
possible_actions
=
{
0
:
'LEFT'
,
1
:
'DOWN'
,
2
:
'RIGHT'
,
3
:
'UP'
}
k
=
0
for
i
in
range
(
4
):
for
j
in
range
(
4
):
action
=
np
.
argmax
(
table
[
k
])
if
k
not
in
[
5
,
7
,
11
,
12
,
15
]:
print
(
"{} "
.
format
(
possible_actions
[
action
]),
end
=
''
)
else
:
print
(
"----"
,
end
=
''
)
k
+=
1
print
()
if
__name__
==
"__main__"
:
env
=
gym
.
make
(
"FrozenLake-v0"
)
states
=
env
.
observation_space
.
n
agent
=
Agent
(
lr
=
LEARNING_RATE
,
gamma
=
DISCOUNT_FACTOR
,
eps_start
=
EPSILON_START
,
eps_end
=
EPSILON_END
,
eps_dec
=
EPSILON_DECAY_RATE
,
n_states
=
states
)
# # #Q learning
q_scores
=
[]
q_avg_score_list
=
[]
for
i
in
range
(
NUMBER_OF_GAMES
):
done
=
False
obs
=
env
.
reset
()
score
=
0
while
not
done
:
action
=
agent
.
choose_action
(
obs
)
next_obs
,
reward
,
done
,
info
=
env
.
step
(
action
)
agent
.
q_learning
(
obs
,
action
,
reward
,
next_obs
)
score
+=
reward
obs
=
next_obs
q_scores
.
append
(
score
)
if
i
%
100
==
0
:
avg_score
=
np
.
mean
(
q_scores
[
-
100
:])
q_avg_score_list
.
append
(
avg_score
)
if
i
%
10_000
==
0
:
print
(
i
)
plt
.
plot
(
q_avg_score_list
,
"r"
)
agent_sarsa
=
Agent
(
lr
=
LEARNING_RATE
,
gamma
=
DISCOUNT_FACTOR
,
eps_start
=
EPSILON_START
,
eps_end
=
EPSILON_END
,
eps_dec
=
EPSILON_DECAY_RATE
,
n_states
=
states
)
#SARSA learning
sarsa_scores
=
[]
sarsa_avg_score_list
=
[]
for
i
in
range
(
NUMBER_OF_GAMES
):
done
=
False
obs
=
env
.
reset
()
action
=
agent_sarsa
.
choose_action
(
obs
)
scores
=
0
while
not
done
:
next_obs
,
reward
,
done
,
info
=
env
.
step
(
action
)
next_action
=
agent_sarsa
.
choose_action
(
next_obs
)
agent_sarsa
.
sarsa_learning
(
obs
,
action
,
reward
,
next_obs
,
next_action
)
scores
+=
reward
obs
=
next_obs
action
=
next_action
sarsa_scores
.
append
(
scores
)
if
i
%
100
==
0
:
avg_score
=
np
.
average
(
sarsa_scores
[
-
100
:])
sarsa_avg_score_list
.
append
(
avg_score
)
if
i
%
10_000
==
0
:
print
(
i
)
plt
.
plot
(
sarsa_avg_score_list
,
"g"
)
plt
.
xlabel
(
'epsiodes'
)
plt
.
ylabel
(
'steps'
)
print
(
"Q LEARNING"
,
"-"
*
30
)
print_table
(
agent
.
q_table
)
print
(
"SARSA"
,
"-"
*
30
)
print_table
(
agent_sarsa
.
q_table
)
plt
.
show
()
frozen-lake/frozenagent.py
0 → 100644
View file @
6aad48b2
import
numpy
as
np
class
Agent
:
def
__init__
(
self
,
lr
,
gamma
,
n_states
,
eps_start
,
eps_end
,
eps_dec
):
self
.
lr
=
lr
self
.
gamma
=
gamma
self
.
n_states
=
n_states
self
.
epsilon
=
eps_start
self
.
eps_min
=
eps_end
self
.
eps_dec
=
eps_dec
self
.
q_table
=
None
self
.
initialize_q_table
()
def
initialize_q_table
(
self
):
self
.
q_table
=
{
state
:
[
0
,
0
,
0
,
0
]
for
state
in
range
(
self
.
n_states
)}
def
choose_action
(
self
,
state
):
if
np
.
random
.
uniform
(
0
,
1
)
<
(
1
-
self
.
epsilon
):
action
=
np
.
argmax
(
self
.
q_table
[
state
])
else
:
action
=
np
.
random
.
choice
([
0
,
1
,
2
,
3
])
return
action
def
decaying_epsilon
(
self
):
self
.
epsilon
*=
self
.
eps_dec
if
self
.
epsilon
>
self
.
eps_min
else
self
.
eps_min
def
q_learning
(
self
,
state
,
action
,
reward
,
next_state
):
next_max_action
=
np
.
argmax
(
self
.
q_table
[
next_state
])
self
.
q_table
[
state
][
action
]
+=
self
.
lr
*
(
reward
+
self
.
gamma
*
self
.
q_table
[
next_state
][
next_max_action
]
-
self
.
q_table
[
state
][
action
])
self
.
decaying_epsilon
()
def
sarsa_learning
(
self
,
state
,
action
,
reward
,
next_state
,
next_action
):
self
.
q_table
[
state
][
action
]
+=
self
.
lr
*
(
reward
+
self
.
gamma
*
self
.
q_table
[
next_state
][
next_action
]
-
self
.
q_table
[
state
][
action
])
self
.
decaying_epsilon
()
# class Agent:
# def __init__(self, env):
# self.env = env
# self.state = None
# self.q_table = None
# self.epsilon = EPSILON
# self.lr_rate = LR_RATE
# def initialize_q_table(self):
# self.q_table = {state: [0.1, 0.1, 0.1, 0.1] for state in range(self.env.observation_space.n)}
# def choose_action(self, state):
# if np.random.uniform(0,1) < (1-self.epsilon):
# action = np.argmax(self.q_table[state])
# else:
# action = np.random.choice([0,1,2,3])
# return action
# def decaying_epsilon(self):
# # if self.epsilon > 0.005:
# self.epsilon *= DECAY_RATE_EPSILON
# def resetAgent(self):
# self.state = self.env.reset()
# def learn_Qlearning(self):
# count = 0
# self.initialize_q_table()
# rewards = []
# avg_rewards = []
# for episode in range(1,NUM_OF_EPISODES):
# self.resetAgent()
# done = False
# while not done:
# action = self.choose_action(self.state)
# next_state, reward, done, info = self.env.step(action)
# if done == True:
# if reward == 0:
# reward = -1
# else:
# reward = STEP_PENALTY
# current_q_value = self.q_table[self.state][action]
# max_next_q_value = np.max(self.q_table[next_state])
# self.q_table[self.state][action] = current_q_value + self.lr_rate*(reward + DISCOUNT * max_next_q_value - current_q_value)
# self.state = next_state
# if reward == 1:
# count += 1
# self.decaying_epsilon()
# rewards.append(reward)
# avg_rewards.append(sum(rewards)/episode)
# print(count)
# print(self.epsilon)
# return avg_rewards, self.q_table
# def learn_SARSA(self):
# self.initialize_q_table()
# done = False
# count = 0
# rewards = []
# avg_reward = []
# for episode in range(1,NUM_OF_EPISODES):
# self.resetAgent()
# action = self.choose_action(self.state)
# done = False
# while not done:
# next_state, reward, done, info = self.env.step(action)
# next_action = self.choose_action(next_state)
# if done == True:
# if reward == 0:
# reward = -1
# else:
# reward = STEP_PENALTY
# current_q_value = self.q_table[self.state][action]
# next_q_value = self.q_table[next_state][next_action]
# self.q_table[self.state][action] = current_q_value + LR_RATE*(reward + DISCOUNT * next_q_value - current_q_value)
# self.state = next_state
# action = next_action
# if reward == 1:
# count += 1
# self.decaying_epsilon()
# rewards.append(reward)
# avg_reward.append(sum(rewards)/episode)
# print(count)
# return avg_reward, self.q_table
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment