summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2013-11-05 16:43:36 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2013-11-05 16:43:36 +0000
commit5ff9d6c06021db8efad154660843ed4f3617fd98 (patch)
tree3ac71ea8d66f4c93cf4c0fe7d5c36f4f7361e00c /src
parent119d6128a594bbc64442b9435dc7f1dbebaa7300 (diff)
downloadgnunet-5ff9d6c06021db8efad154660843ed4f3617fd98.tar.gz
gnunet-5ff9d6c06021db8efad154660843ed4f3617fd98.zip
- corrected discount for continuous smdp
Diffstat (limited to 'src')
-rwxr-xr-xsrc/ats/libgnunet_plugin_ats_ril.c121
-rwxr-xr-xsrc/ats/libgnunet_plugin_ats_ril.h3
2 files changed, 82 insertions, 42 deletions
diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c
index f7ae95b20..3d8ab36e0 100755
--- a/src/ats/libgnunet_plugin_ats_ril.c
+++ b/src/ats/libgnunet_plugin_ats_ril.c
@@ -32,9 +32,10 @@
32#define RIL_FEATURES_ADDRESS_COUNT (3 + GNUNET_ATS_QualityPropertiesCount) 32#define RIL_FEATURES_ADDRESS_COUNT (3 + GNUNET_ATS_QualityPropertiesCount)
33#define RIL_FEATURES_NETWORK_COUNT 4 33#define RIL_FEATURES_NETWORK_COUNT 4
34 34
35#define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000) 35#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 500)
36#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 10000)
36#define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q 37#define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q
37#define RIL_DEFAULT_DISCOUNT_FACTOR 0.5 38#define RIL_DEFAULT_DISCOUNT_BETA 0.7
38#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4 39#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
39#define RIL_DEFAULT_TRACE_DECAY 0.6 40#define RIL_DEFAULT_TRACE_DECAY 0.6
40#define RIL_EXPLORE_RATIO 0.1 41#define RIL_EXPLORE_RATIO 0.1
@@ -92,7 +93,7 @@ struct RIL_Learning_Parameters
92 /** 93 /**
93 * Learning discount factor in the TD-update 94 * Learning discount factor in the TD-update
94 */ 95 */
95 float gamma; 96 float beta;
96 97
97 /** 98 /**
98 * Gradient-descent step-size 99 * Gradient-descent step-size
@@ -103,6 +104,16 @@ struct RIL_Learning_Parameters
103 * Trace-decay factor for eligibility traces 104 * Trace-decay factor for eligibility traces
104 */ 105 */
105 float lambda; 106 float lambda;
107
108 /**
109 * Minimal interval time between steps in milliseconds
110 */
111 struct GNUNET_TIME_Relative step_time_min;
112
113 /**
114 * Maximum interval time between steps in milliseconds
115 */
116 struct GNUNET_TIME_Relative step_time_max;
106}; 117};
107 118
108/** 119/**
@@ -248,7 +259,7 @@ struct RIL_Network
248struct GAS_RIL_Handle 259struct GAS_RIL_Handle
249{ 260{
250 /** 261 /**
251 * 262 * The solver-plugin environment of the solver-plugin API
252 */ 263 */
253 struct GNUNET_ATS_PluginEnvironment *plugin_envi; 264 struct GNUNET_ATS_PluginEnvironment *plugin_envi;
254 265
@@ -258,24 +269,29 @@ struct GAS_RIL_Handle
258 struct GNUNET_STATISTICS_Handle *stats; 269 struct GNUNET_STATISTICS_Handle *stats;
259 270
260 /** 271 /**
261 * Number of performed epochs
262 */
263 unsigned long long epoch_count;
264
265 /**
266 * Number of performed steps 272 * Number of performed steps
267 */ 273 */
268 unsigned long long step_count; 274 unsigned long long step_count;
269 275
270 /** 276 /**
271 * Interval time between steps in milliseconds //TODO? Future Work: Heterogeneous stepping among agents 277 * Timestamp for the last time-step
272 */ 278 */
273 struct GNUNET_TIME_Relative step_time; 279 struct GNUNET_TIME_Absolute step_time_last;
274 280
275 /** 281 /**
276 * Task identifier of the next time-step to be executed 282 * Task identifier of the next time-step to be executed
277 */ 283 */
278 GNUNET_SCHEDULER_TaskIdentifier next_step; 284 GNUNET_SCHEDULER_TaskIdentifier step_next_task;
285
286 /**
287 * Variable discount factor, dependent on time between steps
288 */
289 double discount_variable;
290
291 /**
292 * Integrated variable discount factor, dependent on time between steps
293 */
294 double discount_integrated;
279 295
280 /** 296 /**
281 * Lock for bulk operations 297 * Lock for bulk operations
@@ -469,11 +485,12 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
469 double delta; 485 double delta;
470 double *theta = agent->W[agent->a_old]; 486 double *theta = agent->W[agent->a_old];
471 487
472 delta = reward + agent_estimate_q (agent, s_next, a_prime) 488 delta = agent->envi->discount_integrated * reward; //reward
473 - agent_estimate_q (agent, agent->s_old, agent->a_old); 489 delta += agent->envi->discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
490 delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
474 for (i = 0; i < agent->m; i++) 491 for (i = 0; i < agent->m; i++)
475 { 492 {
476 theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i]; 493 theta[i] += agent->envi->parameters.alpha * delta * agent->e[i];
477 } 494 }
478} 495}
479 496
@@ -481,7 +498,7 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
481 * Changes the eligibility trace vector e in various manners: 498 * Changes the eligibility trace vector e in various manners:
482 * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces 499 * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
483 * RIL_E_REPLACE - resets each component to 1 as in replacing traces 500 * RIL_E_REPLACE - resets each component to 1 as in replacing traces
484 * RIL_E_SET - multiplies e with gamma and lambda as in the update rule 501 * RIL_E_SET - multiplies e with discount factor and lambda as in the update rule
485 * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing 502 * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
486 * 503 *
487 * @param agent the agent handle 504 * @param agent the agent handle
@@ -492,8 +509,6 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification
492{ 509{
493 int i; 510 int i;
494 double *e = agent->e; 511 double *e = agent->e;
495 double gamma = agent->envi->parameters.gamma;
496 double lambda = agent->envi->parameters.lambda;
497 512
498 for (i = 0; i < agent->m; i++) 513 for (i = 0; i < agent->m; i++)
499 { 514 {
@@ -506,7 +521,7 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification
506 e[i] = 1; 521 e[i] = 1;
507 break; 522 break;
508 case RIL_E_SET: 523 case RIL_E_SET:
509 e[i] = gamma * lambda; 524 e[i] = agent->envi->discount_variable * agent->envi->parameters.lambda;
510 break; 525 break;
511 case RIL_E_ZERO: 526 case RIL_E_ZERO:
512 e[i] = 0; 527 e[i] = 0;
@@ -1068,14 +1083,16 @@ agent_step (struct RIL_Peer_Agent *agent)
1068} 1083}
1069 1084
1070/** 1085/**
1071 * Triggers one epoch of agent decisions 1086 * Triggers one step per agent
1072 * @param solver 1087 * @param solver
1073 */ 1088 */
1074static int 1089static int
1075ril_epoch (struct GAS_RIL_Handle *solver) 1090ril_step (struct GAS_RIL_Handle *solver)
1076{ 1091{
1077 //TODO! add multiple steps per epoch
1078 struct RIL_Peer_Agent *cur; 1092 struct RIL_Peer_Agent *cur;
1093 struct GNUNET_TIME_Absolute time_now;
1094 struct GNUNET_TIME_Relative time_delta;
1095 double tau;
1079 1096
1080 if (GNUNET_YES == solver->bulk_lock) 1097 if (GNUNET_YES == solver->bulk_lock)
1081 { 1098 {
@@ -1084,6 +1101,22 @@ ril_epoch (struct GAS_RIL_Handle *solver)
1084 } 1101 }
1085 1102
1086 ril_inform(solver, GAS_OP_SOLVE_START, GAS_STAT_SUCCESS); 1103 ril_inform(solver, GAS_OP_SOLVE_START, GAS_STAT_SUCCESS);
1104
1105 if (0 == solver->step_count) {
1106 solver->step_time_last = GNUNET_TIME_absolute_get ();
1107 }
1108
1109 //calculate tau, i.e. how many real valued time units have passed, one time unit is one minimum time step
1110 time_now = GNUNET_TIME_absolute_get ();
1111 time_delta = GNUNET_TIME_absolute_get_difference(solver->step_time_last, time_now);
1112 tau = ((double) time_delta.rel_value_us) / ((double) solver->parameters.step_time_min.rel_value_us);
1113 memcpy(&solver->step_time_last, &time_now, sizeof(struct GNUNET_TIME_Absolute));
1114
1115 //calculate reward discounts (once per step for all agents)
1116 solver->discount_variable = pow(M_E, ((-1.) * ((double) solver->parameters.beta) * tau));
1117 solver->discount_integrated = (1 - solver->discount_variable) / ((double) solver->parameters.beta);
1118
1119 //trigger one step per active agent
1087 for (cur = solver->agents_head; NULL != cur; cur = cur->next) 1120 for (cur = solver->agents_head; NULL != cur; cur = cur->next)
1088 { 1121 {
1089 if (cur->is_active && cur->address_inuse) 1122 if (cur->is_active && cur->address_inuse)
@@ -1097,7 +1130,7 @@ ril_epoch (struct GAS_RIL_Handle *solver)
1097} 1130}
1098 1131
1099/** 1132/**
1100 * Cycles through all agents and lets the active ones do a step. Schedules the next step. 1133 * Triggers one multi-agent step and schedules the next one.
1101 * 1134 *
1102 * @param cls the solver handle 1135 * @param cls the solver handle
1103 * @param tc the task context for the scheduler 1136 * @param tc the task context for the scheduler
@@ -1109,10 +1142,11 @@ ril_periodic_step (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
1109 1142
1110 LOG(GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count); 1143 LOG(GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count);
1111 1144
1112 ril_epoch(solver); 1145 ril_step(solver);
1146 solver->step_count += 1;
1113 1147
1114 solver->epoch_count += 1; 1148 //TODO! next step scheduling depending on how many resources are left
1115 solver->next_step = GNUNET_SCHEDULER_add_delayed (solver->step_time, &ril_periodic_step, solver); 1149 solver->step_next_task = GNUNET_SCHEDULER_add_delayed (solver->parameters.step_time_max, &ril_periodic_step, solver);
1116} 1150}
1117 1151
1118/** 1152/**
@@ -1315,7 +1349,7 @@ GAS_ril_address_change_preference (void *solver,
1315 "API_address_change_preference() Preference '%s' for peer '%s' changed to %.2f \n", 1349 "API_address_change_preference() Preference '%s' for peer '%s' changed to %.2f \n",
1316 GNUNET_ATS_print_preference_type (kind), GNUNET_i2s (peer), pref_rel); 1350 GNUNET_ATS_print_preference_type (kind), GNUNET_i2s (peer), pref_rel);
1317 1351
1318 ril_epoch(solver); 1352 ril_step(solver);
1319} 1353}
1320 1354
1321/** 1355/**
@@ -1343,9 +1377,14 @@ libgnunet_plugin_ats_ril_init (void *cls)
1343 GNUNET_assert(NULL != env->get_property); 1377 GNUNET_assert(NULL != env->get_property);
1344 1378
1345 if (GNUNET_OK 1379 if (GNUNET_OK
1346 != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", "RIL_STEP_TIME", &solver->step_time)) 1380 != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", "RIL_STEP_TIME_MIN", &solver->parameters.step_time_min))
1381 {
1382 solver->parameters.step_time_min = RIL_DEFAULT_STEP_TIME_MIN;
1383 }
1384 if (GNUNET_OK
1385 != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", "RIL_STEP_TIME_MAX", &solver->parameters.step_time_max))
1347 { 1386 {
1348 solver->step_time = RIL_DEFAULT_STEP_TIME; 1387 solver->parameters.step_time_max = RIL_DEFAULT_STEP_TIME_MAX;
1349 } 1388 }
1350 if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_ALGORITHM", &string) 1389 if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_ALGORITHM", &string)
1351 && NULL != string && 0 == strcmp (string, "SARSA")) 1390 && NULL != string && 0 == strcmp (string, "SARSA"))
@@ -1357,13 +1396,13 @@ libgnunet_plugin_ats_ril_init (void *cls)
1357 solver->parameters.algorithm = RIL_DEFAULT_ALGORITHM; 1396 solver->parameters.algorithm = RIL_DEFAULT_ALGORITHM;
1358 } 1397 }
1359 if (GNUNET_OK 1398 if (GNUNET_OK
1360 == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", "RIL_DISCOUNT_FACTOR", &tmp)) 1399 == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", "RIL_DISCOUNT_BETA", &tmp))
1361 { 1400 {
1362 solver->parameters.gamma = (double) tmp / 100; 1401 solver->parameters.beta = (double) tmp / 100;
1363 } 1402 }
1364 else 1403 else
1365 { 1404 {
1366 solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR; 1405 solver->parameters.beta = RIL_DEFAULT_DISCOUNT_BETA;
1367 } 1406 }
1368 if (GNUNET_OK 1407 if (GNUNET_OK
1369 == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &tmp)) 1408 == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &tmp))
@@ -1411,7 +1450,7 @@ libgnunet_plugin_ats_ril_init (void *cls)
1411 cur->bw_out_assigned = 0; 1450 cur->bw_out_assigned = 0;
1412 } 1451 }
1413 1452
1414 solver->next_step = GNUNET_SCHEDULER_add_delayed ( 1453 solver->step_next_task = GNUNET_SCHEDULER_add_delayed (
1415 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000), 1454 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
1416 &ril_periodic_step, solver); 1455 &ril_periodic_step, solver);
1417 1456
@@ -1441,7 +1480,7 @@ libgnunet_plugin_ats_ril_done (void *cls)
1441 cur_agent = next_agent; 1480 cur_agent = next_agent;
1442 } 1481 }
1443 1482
1444 GNUNET_SCHEDULER_cancel (s->next_step); 1483 GNUNET_SCHEDULER_cancel (s->step_next_task);
1445 GNUNET_free(s->network_entries); 1484 GNUNET_free(s->network_entries);
1446 GNUNET_free(s); 1485 GNUNET_free(s);
1447 1486
@@ -1525,7 +1564,7 @@ GAS_ril_address_add (void *solver, struct ATS_Address *address, uint32_t network
1525 envi_set_active_suggestion (s, agent, address, min_bw, min_bw, GNUNET_NO); 1564 envi_set_active_suggestion (s, agent, address, min_bw, min_bw, GNUNET_NO);
1526 } 1565 }
1527 1566
1528 ril_epoch(s); 1567 ril_step(s);
1529 1568
1530 LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_add() Added %s %s address %p for peer '%s'\n", 1569 LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_add() Added %s %s address %p for peer '%s'\n",
1531 address->active ? "active" : "inactive", address->plugin, address->addr, 1570 address->active ? "active" : "inactive", address->plugin, address->addr,
@@ -1643,7 +1682,7 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
1643 } 1682 }
1644 } 1683 }
1645 1684
1646 ril_epoch(solver); 1685 ril_step(solver);
1647 1686
1648 LOG(GNUNET_ERROR_TYPE_DEBUG, "Address deleted\n"); 1687 LOG(GNUNET_ERROR_TYPE_DEBUG, "Address deleted\n");
1649} 1688}
@@ -1669,7 +1708,7 @@ GAS_ril_address_property_changed (void *solver,
1669 "to %.2f \n", GNUNET_ATS_print_property_type (type), GNUNET_i2s (&address->peer), 1708 "to %.2f \n", GNUNET_ATS_print_property_type (type), GNUNET_i2s (&address->peer),
1670 address->addr, rel_value); 1709 address->addr, rel_value);
1671 1710
1672 ril_epoch(solver); 1711 ril_step(solver);
1673} 1712}
1674 1713
1675/** 1714/**
@@ -1708,7 +1747,7 @@ void
1708GAS_ril_address_inuse_changed (void *solver, struct ATS_Address *address, int in_use) 1747GAS_ril_address_inuse_changed (void *solver, struct ATS_Address *address, int in_use)
1709{ 1748{
1710 /* Nothing to do here. 1749 /* Nothing to do here.
1711 * Possible TODO? Future Work: Use usage as state vector 1750 * Possible TODO? Future Work: Potentially add usage variable to state vector
1712 */ 1751 */
1713 LOG(GNUNET_ERROR_TYPE_DEBUG, 1752 LOG(GNUNET_ERROR_TYPE_DEBUG,
1714 "API_address_inuse_changed() Usage for %s address of peer '%s' changed to %s\n", 1753 "API_address_inuse_changed() Usage for %s address of peer '%s' changed to %s\n",
@@ -1829,7 +1868,7 @@ GAS_ril_bulk_stop (void *solver)
1829 1868
1830 if (0 < s->bulk_changes) 1869 if (0 < s->bulk_changes)
1831 { 1870 {
1832 ril_epoch (solver); 1871 ril_step (solver);
1833 s->bulk_changes = 0; 1872 s->bulk_changes = 0;
1834 } 1873 }
1835} 1874}
@@ -1880,7 +1919,7 @@ GAS_ril_get_preferred_address (void *solver, const struct GNUNET_PeerIdentity *p
1880 GNUNET_i2s (peer)); 1919 GNUNET_i2s (peer));
1881 } 1920 }
1882 1921
1883 ril_epoch(s); 1922 ril_step(s);
1884 1923
1885 return agent->address_inuse; 1924 return agent->address_inuse;
1886} 1925}
@@ -1924,7 +1963,7 @@ GAS_ril_stop_get_preferred_address (void *solver, const struct GNUNET_PeerIdenti
1924 envi_set_active_suggestion (s, agent, agent->address_inuse, agent->bw_in, agent->bw_out, 1963 envi_set_active_suggestion (s, agent, agent->address_inuse, agent->bw_in, agent->bw_out,
1925 GNUNET_YES); 1964 GNUNET_YES);
1926 1965
1927 ril_epoch(s); 1966 ril_step(s);
1928 1967
1929 LOG(GNUNET_ERROR_TYPE_DEBUG, 1968 LOG(GNUNET_ERROR_TYPE_DEBUG,
1930 "API_stop_get_preferred_address() Paused agent for peer '%s' with %s address\n", 1969 "API_stop_get_preferred_address() Paused agent for peer '%s' with %s address\n",
diff --git a/src/ats/libgnunet_plugin_ats_ril.h b/src/ats/libgnunet_plugin_ats_ril.h
index 6f60af52b..fd787cf4c 100755
--- a/src/ats/libgnunet_plugin_ats_ril.h
+++ b/src/ats/libgnunet_plugin_ats_ril.h
@@ -25,7 +25,8 @@
25 * @author Matthias Wachs 25 * @author Matthias Wachs
26 */ 26 */
27#include "platform.h" 27#include "platform.h"
28#include "float.h" 28#include <float.h>
29#include <math.h>
29#include "gnunet_ats_plugin.h" 30#include "gnunet_ats_plugin.h"
30#include "gnunet-service-ats_addresses.h" 31#include "gnunet-service-ats_addresses.h"
31 32