aboutsummaryrefslogtreecommitdiff
path: root/src/ats
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2013-12-04 19:21:05 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2013-12-04 19:21:05 +0000
commita77de892778f22d687edb8ff9009dc67b2660691 (patch)
tree45699ff658bb7336dea42c3c5624501b8e252621 /src/ats
parent239310367ce5a29fa2ab641694270f6c9b90ef61 (diff)
downloadgnunet-a77de892778f22d687edb8ff9009dc67b2660691.tar.gz
gnunet-a77de892778f22d687edb8ff9009dc67b2660691.zip
added simulation
Diffstat (limited to 'src/ats')
-rwxr-xr-xsrc/ats/libgnunet_plugin_ats_ril.c90
1 files changed, 52 insertions, 38 deletions
diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c
index 7cc09950a..603e93a4d 100755
--- a/src/ats/libgnunet_plugin_ats_ril.c
+++ b/src/ats/libgnunet_plugin_ats_ril.c
@@ -45,6 +45,8 @@
45#define RIL_DEFAULT_EXPLORE_RATIO 0.1 45#define RIL_DEFAULT_EXPLORE_RATIO 0.1
46#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5 46#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
47 47
48#define RIL_INC_DEC_STEP_SIZE 1
49
48/** 50/**
49 * ATS reinforcement learning solver 51 * ATS reinforcement learning solver
50 * 52 *
@@ -68,7 +70,7 @@ enum RIL_Action_Type
68 RIL_ACTION_BW_OUT_HLV = -5, 70 RIL_ACTION_BW_OUT_HLV = -5,
69 RIL_ACTION_BW_OUT_INC = -6, 71 RIL_ACTION_BW_OUT_INC = -6,
70 RIL_ACTION_BW_OUT_DEC = -7, 72 RIL_ACTION_BW_OUT_DEC = -7,
71 RIL_ACTION_TYPE_NUM = 2 73 RIL_ACTION_TYPE_NUM = 1
72}; 74};
73 75
74enum RIL_Algorithm 76enum RIL_Algorithm
@@ -363,6 +365,11 @@ struct GAS_RIL_Handle
363 * Shutdown 365 * Shutdown
364 */ 366 */
365 int done; 367 int done;
368
369 /**
370 * Simulate steps, i.e. schedule steps immediately
371 */
372 unsigned long long simulate;
366}; 373};
367 374
368/* 375/*
@@ -396,8 +403,7 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
396 403
397 if (isinf(result)) 404 if (isinf(result))
398 { 405 {
399 GNUNET_assert(GNUNET_NO); 406 return isinf(result) * UINT32_MAX; //TODO! fix
400 return isinf(result) * (DBL_MAX / 2); //TODO! fix
401 } 407 }
402 408
403 return result; 409 return result;
@@ -537,13 +543,14 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
537 delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value 543 delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
538 delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step 544 delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
539 545
540// LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f y*Q(s+1,a+1) = %f Q(s,a) = %f\n, y = %f\n", 546 LOG(GNUNET_ERROR_TYPE_INFO, "update() Step# %llu Q(s,a): %f a: %f r: %f y: %f Q(s+1,a+1) = %f delta: %f\n",
541// agent->envi->global_discount_integrated * reward, 547 agent->step_count,
542// agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime), 548 agent_estimate_q (agent, agent->s_old, agent->a_old),
543// agent_estimate_q (agent, agent->s_old, agent->a_old), 549 agent->envi->parameters.alpha,
544// agent->envi->global_discount_variable); 550 reward,
545// 551 agent->envi->global_discount_variable,
546// LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta); 552 agent_estimate_q (agent, s_next, a_prime),
553 delta);
547 554
548 for (i = 0; i < agent->m; i++) 555 for (i = 0; i < agent->m; i++)
549 { 556 {
@@ -764,17 +771,17 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
764 state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available; 771 state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
765 if (net->bw_in_assigned > net->bw_in_available) 772 if (net->bw_in_assigned > net->bw_in_available)
766 { 773 {
767 state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available; 774 state[1] = 1;// net->bw_in_available;
768 } 775 }
769 else 776 else
770 { 777 {
771 state[1] = 0; 778 state[1] = 0;
772 } 779 }
773 LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]); 780 LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[0] = %f\n", state[0]);
774 LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]); 781 LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[1] = %f\n", state[1]);
775 782
776 LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]); 783 LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
777 LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]); 784 LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
778 785
779 786
780 //get peer features 787 //get peer features
@@ -1072,7 +1079,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
1072 1079
1073 if (direction_in) 1080 if (direction_in)
1074 { 1081 {
1075 new_bw = agent->bw_in + (1 * MIN_BW); 1082 new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1076 if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth) 1083 if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
1077 new_bw = GNUNET_ATS_MaxBandwidth; 1084 new_bw = GNUNET_ATS_MaxBandwidth;
1078 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, 1085 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
@@ -1080,7 +1087,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
1080 } 1087 }
1081 else 1088 else
1082 { 1089 {
1083 new_bw = agent->bw_out + (1 * MIN_BW); 1090 new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1084 if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth) 1091 if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
1085 new_bw = GNUNET_ATS_MaxBandwidth; 1092 new_bw = GNUNET_ATS_MaxBandwidth;
1086 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, 1093 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
@@ -1104,7 +1111,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
1104 1111
1105 if (direction_in) 1112 if (direction_in)
1106 { 1113 {
1107 new_bw = agent->bw_in - (1 * MIN_BW); 1114 new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1108 if (new_bw < MIN_BW || new_bw > agent->bw_in) 1115 if (new_bw < MIN_BW || new_bw > agent->bw_in)
1109 new_bw = MIN_BW; 1116 new_bw = MIN_BW;
1110 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out, 1117 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
@@ -1112,7 +1119,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
1112 } 1119 }
1113 else 1120 else
1114 { 1121 {
1115 new_bw = agent->bw_out - (1 * MIN_BW); 1122 new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1116 if (new_bw < MIN_BW || new_bw > agent->bw_out) 1123 if (new_bw < MIN_BW || new_bw > agent->bw_out)
1117 new_bw = MIN_BW; 1124 new_bw = MIN_BW;
1118 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw, 1125 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
@@ -1220,6 +1227,7 @@ static void
1220agent_step (struct RIL_Peer_Agent *agent) 1227agent_step (struct RIL_Peer_Agent *agent)
1221{ 1228{
1222 int a_next = RIL_ACTION_INVALID; 1229 int a_next = RIL_ACTION_INVALID;
1230 int explore;
1223 double *s_next; 1231 double *s_next;
1224 double reward; 1232 double reward;
1225 1233
@@ -1229,22 +1237,12 @@ agent_step (struct RIL_Peer_Agent *agent)
1229 1237
1230 s_next = envi_get_state (agent->envi, agent); 1238 s_next = envi_get_state (agent->envi, agent);
1231 reward = envi_get_reward (agent->envi, agent); 1239 reward = envi_get_reward (agent->envi, agent);
1232 1240 explore = agent_decide_exploration (agent);
1233 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: A: %d R: %f IN %llu OUT %llu\n",
1234 agent->step_count,
1235 agent->a_old,
1236 reward,
1237 agent->bw_in/1024,
1238 agent->bw_out/1024);
1239 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Best A: %d Q(s,a): %f \n",
1240 agent->step_count,
1241 agent_get_action_best (agent, s_next),
1242 agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
1243 1241
1244 switch (agent->envi->parameters.algorithm) 1242 switch (agent->envi->parameters.algorithm)
1245 { 1243 {
1246 case RIL_ALGO_SARSA: 1244 case RIL_ALGO_SARSA:
1247 if (agent_decide_exploration (agent)) 1245 if (explore)
1248 { 1246 {
1249 a_next = agent_get_action_explore (agent, s_next); 1247 a_next = agent_get_action_explore (agent, s_next);
1250 } 1248 }
@@ -1267,7 +1265,7 @@ agent_step (struct RIL_Peer_Agent *agent)
1267 //updates weights with best action, disregarding actually selected action (off-policy), if not first step 1265 //updates weights with best action, disregarding actually selected action (off-policy), if not first step
1268 agent_update_weights (agent, reward, s_next, a_next); 1266 agent_update_weights (agent, reward, s_next, a_next);
1269 } 1267 }
1270 if (agent_decide_exploration (agent)) 1268 if (explore)
1271 { 1269 {
1272 a_next = agent_get_action_explore (agent, s_next); 1270 a_next = agent_get_action_explore (agent, s_next);
1273 agent_modify_eligibility (agent, RIL_E_ZERO, NULL); 1271 agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
@@ -1284,6 +1282,13 @@ agent_step (struct RIL_Peer_Agent *agent)
1284 1282
1285 agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next); 1283 agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
1286 1284
1285 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n",
1286 agent->step_count,
1287 reward,
1288 agent->bw_in/1024,
1289 agent->bw_out/1024,
1290 a_next);
1291
1287 envi_do_action (agent->envi, agent, a_next); 1292 envi_do_action (agent->envi, agent, a_next);
1288 1293
1289 GNUNET_free(agent->s_old); 1294 GNUNET_free(agent->s_old);
@@ -1416,7 +1421,7 @@ ril_calculate_discount (struct GAS_RIL_Handle *solver)
1416 double tau; 1421 double tau;
1417 1422
1418 // MDP case - remove when debugged 1423 // MDP case - remove when debugged
1419 if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us) 1424 if (solver->simulate)
1420 { 1425 {
1421 solver->global_discount_variable = solver->parameters.gamma; 1426 solver->global_discount_variable = solver->parameters.gamma;
1422 solver->global_discount_integrated = 1; 1427 solver->global_discount_integrated = 1;
@@ -1484,6 +1489,11 @@ ril_step_schedule_next (struct GAS_RIL_Handle *solver)
1484 1489
1485 time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y); 1490 time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
1486 1491
1492 if (solver->simulate)
1493 {
1494 time_next = GNUNET_TIME_UNIT_ZERO;
1495 }
1496
1487 if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done)) 1497 if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
1488 { 1498 {
1489 solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task, 1499 solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
@@ -1583,7 +1593,7 @@ agent_w_start (struct RIL_Peer_Agent *agent)
1583 for (k = 0; k < agent->m; k++) 1593 for (k = 0; k < agent->m; k++)
1584 { 1594 {
1585 if (0 == count) { 1595 if (0 == count) {
1586 agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX); 1596 agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
1587 } 1597 }
1588 else { 1598 else {
1589 for (other = agent->envi->agents_head; NULL != other; other = other->next) 1599 for (other = agent->envi->agents_head; NULL != other; other = other->next)
@@ -1878,6 +1888,10 @@ libgnunet_plugin_ats_ril_init (void *cls)
1878 { 1888 {
1879 solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE; 1889 solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
1880 } 1890 }
1891 if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
1892 {
1893 solver->simulate = GNUNET_NO;
1894 }
1881 1895
1882 env->sf.s_add = &GAS_ril_address_add; 1896 env->sf.s_add = &GAS_ril_address_add;
1883 env->sf.s_address_update_property = &GAS_ril_address_property_changed; 1897 env->sf.s_address_update_property = &GAS_ril_address_property_changed;
@@ -1905,16 +1919,16 @@ libgnunet_plugin_ats_ril_init (void *cls)
1905 cur->type = env->networks[c]; 1919 cur->type = env->networks[c];
1906 cur->bw_in_available = env->in_quota[c]; 1920 cur->bw_in_available = env->in_quota[c];
1907 cur->bw_out_available = env->out_quota[c]; 1921 cur->bw_out_available = env->out_quota[c];
1908 LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024); 1922 LOG(GNUNET_ERROR_TYPE_INFO, "init() Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
1909 } 1923 }
1910 1924
1911 LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n"); 1925 LOG(GNUNET_ERROR_TYPE_INFO, "init() Parameters:\n");
1912 LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n", 1926 LOG(GNUNET_ERROR_TYPE_INFO, "init() Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
1913 solver->parameters.algorithm ? "Q" : "SARSA", 1927 solver->parameters.algorithm ? "Q" : "SARSA",
1914 solver->parameters.alpha, 1928 solver->parameters.alpha,
1915 solver->parameters.beta, 1929 solver->parameters.beta,
1916 solver->parameters.lambda); 1930 solver->parameters.lambda);
1917 LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n", 1931 LOG(GNUNET_ERROR_TYPE_INFO, "init() explore = %f, global_share = %f\n",
1918 solver->parameters.explore_ratio, 1932 solver->parameters.explore_ratio,
1919 solver->parameters.reward_global_share); 1933 solver->parameters.reward_global_share);
1920 1934