-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapproximate_q_learning.cpp
More file actions
113 lines (100 loc) · 3.71 KB
/
Copy pathapproximate_q_learning.cpp
File metadata and controls
113 lines (100 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
Copyright (C) 2017 Meritxell Jordana
Copyright (C) 2017 Marc Sanchez
*/
#include <cstdlib>
#include <math.h>
#include "approximate_q_learning.h"
ApproximateQLearning::ApproximateQLearning(Map *map, const double epsilon,
const double alpha, const double discount) :
ReinforcementAgent(map, 50, epsilon, alpha, discount) {
registerInitialState(map);
}
Direction ApproximateQLearning::getAction() {
observationFunction(*map);
list<Direction> legalMoves = map->getLegalMoves(Enemy);
Direction action = None;
if (legalMoves.size() != 0) {
if (flipCoin(epsilon)) {
action = getRandomDirection(legalMoves);
} else {
action = getPolicy(*map);
}
}
doAction(*map, action);
return action;
}
double ApproximateQLearning::getQValue(QValuesKey key) {
double qValue = 0.0;
std::map<string, double> features = getFeatures(key);
for (std::map<string, double>::iterator a = features.begin(); a != features.end(); ++a) {
qValue += weights[a->first] * a->second;
}
return qValue;
}
void ApproximateQLearning::setQValue(QValuesKey &key, double newValue) {
qvalues[key] = newValue;
}
double ApproximateQLearning::computeValueFromQValues(Map &map) {
list<Direction> legalMoves = map.getLegalMoves(Enemy);
if (legalMoves.size() == 0) return 0.0;
double max = 0.0;
for (list<Direction>::iterator a = legalMoves.begin(); a != legalMoves.end(); ++a) {
double qValue = getQValue(QValuesKey(map, *a));
if (qValue > max) max = qValue;
}
return max;
}
Direction ApproximateQLearning::computeActionFromQValues(Map &map) {
list<Direction> legalMoves = map.getLegalMoves(Enemy);
double bestValue = getValue(map);
list<Direction> bestActions;
for (list<Direction>::iterator a = legalMoves.begin(); a != legalMoves.end(); ++a) {
if (fabs(bestValue - getQValue(QValuesKey(map, *a))) < 0.0000001) {
bestActions.push_back(*a);
}
}
return getRandomDirection(bestActions);
}
void ApproximateQLearning::update(QValuesKey &key, Map &nextState, double reward) {
std::map<string, double> features = getFeatures(key);
double difference = (reward + discount * getValue(nextState)) - getQValue(key);
for (std::map<string, double>::iterator a = features.begin(); a != features.end(); ++a) {
weights[a->first] += alpha * difference * a->second;
}
}
Direction ApproximateQLearning::getPolicy(Map &map) {
return computeActionFromQValues(map);
}
double ApproximateQLearning::getValue(Map &map) {
return computeValueFromQValues(map);
}
bool ApproximateQLearning::flipCoin(double p) {
return (rand() / 1.0) < p;
}
std::map<string, double> ApproximateQLearning::getFeatures(QValuesKey &key) {
std::map<string, double> features;
features["bias"] = 1.0;
Position nextEnemyPosition = key.state.getNextEnemyPosition(key.action);
if (Strategy::manhattanDistance(key.state.getPlayerPosition(),
key.state.getEnemyPosition()) == 1) {
features["#-of-ghosts-1-step-away"] = 1.0;
} else {
features["#-of-ghosts-1-step-away"] = 0.0;
if (key.state.getPositionCellType(nextEnemyPosition) == Food) {
features["eats-food"] = 1.0;
}
}
int dist = key.state.getClosestFoodDistance(nextEnemyPosition);
if (dist > 0) {
features["closest-food"] = (double) dist / key.state.getDimension();
}
divideMapBy(features, 10.0);
return features;
}
void ApproximateQLearning::divideMapBy(std::map<string, double> &features,
double divisor) {
for (std::map<string, double>::iterator a = features.begin(); a != features.end(); ++a) {
features[a->first] = a->second / divisor;
}
}