-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
88 lines (73 loc) · 2.85 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import tensorflow as tf
import numpy as np
import random
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
def set_global_seeds(i=1):
tf.set_random_seed(i)
np.random.seed(i)
random.seed(i)
def plot(mean_rewards,
std_dev_rewards,
episode_lengths,
summary_every,
dir_path):
def plot_fig(series, name):
mean = np.mean(series, axis=0)
lower = np.percentile(series, 5, axis=0)
upper = np.percentile(series, 95, axis=0)
n = len(mean)
x = np.arange(0, n * summary_every, summary_every)
plt.plot(x, mean)
plt.fill_between(x, lower, upper, color='b', alpha=0.2)
plt.xlabel("Episode")
plt.ylabel(name)
plt.title(name + ' vs. Episode Number')
save_name = dir_path + '_'.join(name.lower().split(' ')) + '.png'
plt.savefig(save_name, dpi=300)
plt.clf()
plot_fig(mean_rewards, 'Mean Reward')
plot_fig(std_dev_rewards, 'Standard Deviation Rewards')
plot_fig(episode_lengths, 'Episode Lengths')
def plot_value_func(estimator, episode, ob_space):
def plot(vals, x1, x2, name):
plt.clf()
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x_grid, x_dot_grid, predicted_vals,
cmap=cm.rainbow, antialiased=True, linewidth=0.001)
# Customize the z axis.
ax.set_zlim(np.amin(predicted_vals), np.amax(predicted_vals))
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.savefig("{0}_surface_{1}.png".format(name, episode), dpi=300)
plt.clf()
contour = plt.contourf(x_grid, x_dot_grid, predicted_vals)
plt.colorbar(contour, shrink=0.5)
plt.savefig("{0}_contour_{1}.png".format(name, episode), dpi=300)
plt.close()
num_points = 250
x_support = np.linspace(ob_space.low[0],
ob_space.high[0],
num=num_points)
x_dot_support = np.linspace(ob_space.low[1],
ob_space.high[1],
num=num_points)
x_grid, x_dot_grid = np.meshgrid(x_support, x_dot_support)
predicted_vals = []
predicted_acs = []
for i in range(num_points):
for j in range(num_points):
state = np.array([x_support[i], x_dot_support[j]]).reshape((-1, 2))
val = estimator.critic(state)
act = estimator.actor(state)
predicted_vals.append(np.squeeze(val))
predicted_acs.append(np.squeeze(act))
predicted_vals = np.array(predicted_vals).reshape((num_points, num_points))
predicted_acs = np.array(predicted_acs).reshape((num_points, num_points))
plot(predicted_vals, x_grid, x_dot_grid, 'value')
plot(predicted_acs, x_grid, x_dot_grid, 'action')