-
Notifications
You must be signed in to change notification settings - Fork 1
/
agent.py
66 lines (43 loc) · 1.57 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import Drone
from panda3d.core import Vec3
class Agent() :
def __init__(self, factor, visualize) :
self.visualize = visualize
self.factor = factor
self.pos = np.zeros(3)
self.lastPos = np.zeros(3)
self.target = np.zeros(3)
self.ep_rew = 0
self.done = False
self.drone = Drone.uav(visualize)
def getReward(self) :
diff1 = np.linalg.norm(self.pos-self.target)
diff2 = np.linalg.norm(self.lastPos-self.target)
r = diff2 - diff1
return r
def getSubState(self) :
self.pos = self.drone.drone.transform.pos/40
return np.array([self.pos, self.target], dtype=np.float32).reshape(6,)
def reset(self) :
self.ep_rew = 0
self.pos = np.random.rand(3)-0.5
self.target = np.random.rand(3)-0.5
self.done = False
self.lastPos = self.pos
self.drone.body.setPos(self.factor*self.pos[0], self.factor*self.pos[1], self.factor*self.pos[2])
self.drone.body.setHpr(0, 0, 0)
self.drone.drone.set_linear_velocity(Vec3(0,0,0))
self.drone.drone.setAngularVelocity(Vec3(0,0,0))
s = self.getSubState()
return s
def step(self, a) :
basis = np.array([0,0,9.81], dtype = np.float)
force = 10*a + basis
force = Vec3(force[0], force[1], force[2])
self.drone.drone.applyCentralForce(force)
r = self.getReward()
s = self.getSubState()
self.lastPos = np.copy(self.pos)
self.ep_rew += r
return s, r , False, {}