diff options
author | Fabian Oehlmann <oehlmann@in.tum.de> | 2013-12-04 19:21:05 +0000 |
---|---|---|
committer | Fabian Oehlmann <oehlmann@in.tum.de> | 2013-12-04 19:21:05 +0000 |
commit | a77de892778f22d687edb8ff9009dc67b2660691 (patch) | |
tree | 45699ff658bb7336dea42c3c5624501b8e252621 /src/ats | |
parent | 239310367ce5a29fa2ab641694270f6c9b90ef61 (diff) | |
download | gnunet-a77de892778f22d687edb8ff9009dc67b2660691.tar.gz gnunet-a77de892778f22d687edb8ff9009dc67b2660691.zip |
added simulation
Diffstat (limited to 'src/ats')
-rwxr-xr-x | src/ats/libgnunet_plugin_ats_ril.c | 90 |
1 files changed, 52 insertions, 38 deletions
diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c index 7cc09950a..603e93a4d 100755 --- a/src/ats/libgnunet_plugin_ats_ril.c +++ b/src/ats/libgnunet_plugin_ats_ril.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #define RIL_DEFAULT_EXPLORE_RATIO 0.1 | 45 | #define RIL_DEFAULT_EXPLORE_RATIO 0.1 |
46 | #define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5 | 46 | #define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5 |
47 | 47 | ||
48 | #define RIL_INC_DEC_STEP_SIZE 1 | ||
49 | |||
48 | /** | 50 | /** |
49 | * ATS reinforcement learning solver | 51 | * ATS reinforcement learning solver |
50 | * | 52 | * |
@@ -68,7 +70,7 @@ enum RIL_Action_Type | |||
68 | RIL_ACTION_BW_OUT_HLV = -5, | 70 | RIL_ACTION_BW_OUT_HLV = -5, |
69 | RIL_ACTION_BW_OUT_INC = -6, | 71 | RIL_ACTION_BW_OUT_INC = -6, |
70 | RIL_ACTION_BW_OUT_DEC = -7, | 72 | RIL_ACTION_BW_OUT_DEC = -7, |
71 | RIL_ACTION_TYPE_NUM = 2 | 73 | RIL_ACTION_TYPE_NUM = 1 |
72 | }; | 74 | }; |
73 | 75 | ||
74 | enum RIL_Algorithm | 76 | enum RIL_Algorithm |
@@ -363,6 +365,11 @@ struct GAS_RIL_Handle | |||
363 | * Shutdown | 365 | * Shutdown |
364 | */ | 366 | */ |
365 | int done; | 367 | int done; |
368 | |||
369 | /** | ||
370 | * Simulate steps, i.e. schedule steps immediately | ||
371 | */ | ||
372 | unsigned long long simulate; | ||
366 | }; | 373 | }; |
367 | 374 | ||
368 | /* | 375 | /* |
@@ -396,8 +403,7 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action) | |||
396 | 403 | ||
397 | if (isinf(result)) | 404 | if (isinf(result)) |
398 | { | 405 | { |
399 | GNUNET_assert(GNUNET_NO); | 406 | return isinf(result) * UINT32_MAX; //TODO! fix |
400 | return isinf(result) * (DBL_MAX / 2); //TODO! fix | ||
401 | } | 407 | } |
402 | 408 | ||
403 | return result; | 409 | return result; |
@@ -537,13 +543,14 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex | |||
537 | delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value | 543 | delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value |
538 | delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step | 544 | delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step |
539 | 545 | ||
540 | // LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f y*Q(s+1,a+1) = %f Q(s,a) = %f\n, y = %f\n", | 546 | LOG(GNUNET_ERROR_TYPE_INFO, "update() Step# %llu Q(s,a): %f a: %f r: %f y: %f Q(s+1,a+1) = %f delta: %f\n", |
541 | // agent->envi->global_discount_integrated * reward, | 547 | agent->step_count, |
542 | // agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime), | 548 | agent_estimate_q (agent, agent->s_old, agent->a_old), |
543 | // agent_estimate_q (agent, agent->s_old, agent->a_old), | 549 | agent->envi->parameters.alpha, |
544 | // agent->envi->global_discount_variable); | 550 | reward, |
545 | // | 551 | agent->envi->global_discount_variable, |
546 | // LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta); | 552 | agent_estimate_q (agent, s_next, a_prime), |
553 | delta); | ||
547 | 554 | ||
548 | for (i = 0; i < agent->m; i++) | 555 | for (i = 0; i < agent->m; i++) |
549 | { | 556 | { |
@@ -764,17 +771,17 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
764 | state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available; | 771 | state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available; |
765 | if (net->bw_in_assigned > net->bw_in_available) | 772 | if (net->bw_in_assigned > net->bw_in_available) |
766 | { | 773 | { |
767 | state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available; | 774 | state[1] = 1;// net->bw_in_available; |
768 | } | 775 | } |
769 | else | 776 | else |
770 | { | 777 | { |
771 | state[1] = 0; | 778 | state[1] = 0; |
772 | } | 779 | } |
773 | LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]); | 780 | LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[0] = %f\n", state[0]); |
774 | LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]); | 781 | LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[1] = %f\n", state[1]); |
775 | 782 | ||
776 | LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]); | 783 | LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]); |
777 | LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]); | 784 | LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]); |
778 | 785 | ||
779 | 786 | ||
780 | //get peer features | 787 | //get peer features |
@@ -1072,7 +1079,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, | |||
1072 | 1079 | ||
1073 | if (direction_in) | 1080 | if (direction_in) |
1074 | { | 1081 | { |
1075 | new_bw = agent->bw_in + (1 * MIN_BW); | 1082 | new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW); |
1076 | if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth) | 1083 | if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth) |
1077 | new_bw = GNUNET_ATS_MaxBandwidth; | 1084 | new_bw = GNUNET_ATS_MaxBandwidth; |
1078 | envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, | 1085 | envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, |
@@ -1080,7 +1087,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, | |||
1080 | } | 1087 | } |
1081 | else | 1088 | else |
1082 | { | 1089 | { |
1083 | new_bw = agent->bw_out + (1 * MIN_BW); | 1090 | new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW); |
1084 | if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth) | 1091 | if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth) |
1085 | new_bw = GNUNET_ATS_MaxBandwidth; | 1092 | new_bw = GNUNET_ATS_MaxBandwidth; |
1086 | envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, | 1093 | envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, |
@@ -1104,7 +1111,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, | |||
1104 | 1111 | ||
1105 | if (direction_in) | 1112 | if (direction_in) |
1106 | { | 1113 | { |
1107 | new_bw = agent->bw_in - (1 * MIN_BW); | 1114 | new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW); |
1108 | if (new_bw < MIN_BW || new_bw > agent->bw_in) | 1115 | if (new_bw < MIN_BW || new_bw > agent->bw_in) |
1109 | new_bw = MIN_BW; | 1116 | new_bw = MIN_BW; |
1110 | envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out, | 1117 | envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out, |
@@ -1112,7 +1119,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, | |||
1112 | } | 1119 | } |
1113 | else | 1120 | else |
1114 | { | 1121 | { |
1115 | new_bw = agent->bw_out - (1 * MIN_BW); | 1122 | new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW); |
1116 | if (new_bw < MIN_BW || new_bw > agent->bw_out) | 1123 | if (new_bw < MIN_BW || new_bw > agent->bw_out) |
1117 | new_bw = MIN_BW; | 1124 | new_bw = MIN_BW; |
1118 | envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw, | 1125 | envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw, |
@@ -1220,6 +1227,7 @@ static void | |||
1220 | agent_step (struct RIL_Peer_Agent *agent) | 1227 | agent_step (struct RIL_Peer_Agent *agent) |
1221 | { | 1228 | { |
1222 | int a_next = RIL_ACTION_INVALID; | 1229 | int a_next = RIL_ACTION_INVALID; |
1230 | int explore; | ||
1223 | double *s_next; | 1231 | double *s_next; |
1224 | double reward; | 1232 | double reward; |
1225 | 1233 | ||
@@ -1229,22 +1237,12 @@ agent_step (struct RIL_Peer_Agent *agent) | |||
1229 | 1237 | ||
1230 | s_next = envi_get_state (agent->envi, agent); | 1238 | s_next = envi_get_state (agent->envi, agent); |
1231 | reward = envi_get_reward (agent->envi, agent); | 1239 | reward = envi_get_reward (agent->envi, agent); |
1232 | 1240 | explore = agent_decide_exploration (agent); | |
1233 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: A: %d R: %f IN %llu OUT %llu\n", | ||
1234 | agent->step_count, | ||
1235 | agent->a_old, | ||
1236 | reward, | ||
1237 | agent->bw_in/1024, | ||
1238 | agent->bw_out/1024); | ||
1239 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Best A: %d Q(s,a): %f \n", | ||
1240 | agent->step_count, | ||
1241 | agent_get_action_best (agent, s_next), | ||
1242 | agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next))); | ||
1243 | 1241 | ||
1244 | switch (agent->envi->parameters.algorithm) | 1242 | switch (agent->envi->parameters.algorithm) |
1245 | { | 1243 | { |
1246 | case RIL_ALGO_SARSA: | 1244 | case RIL_ALGO_SARSA: |
1247 | if (agent_decide_exploration (agent)) | 1245 | if (explore) |
1248 | { | 1246 | { |
1249 | a_next = agent_get_action_explore (agent, s_next); | 1247 | a_next = agent_get_action_explore (agent, s_next); |
1250 | } | 1248 | } |
@@ -1267,7 +1265,7 @@ agent_step (struct RIL_Peer_Agent *agent) | |||
1267 | //updates weights with best action, disregarding actually selected action (off-policy), if not first step | 1265 | //updates weights with best action, disregarding actually selected action (off-policy), if not first step |
1268 | agent_update_weights (agent, reward, s_next, a_next); | 1266 | agent_update_weights (agent, reward, s_next, a_next); |
1269 | } | 1267 | } |
1270 | if (agent_decide_exploration (agent)) | 1268 | if (explore) |
1271 | { | 1269 | { |
1272 | a_next = agent_get_action_explore (agent, s_next); | 1270 | a_next = agent_get_action_explore (agent, s_next); |
1273 | agent_modify_eligibility (agent, RIL_E_ZERO, NULL); | 1271 | agent_modify_eligibility (agent, RIL_E_ZERO, NULL); |
@@ -1284,6 +1282,13 @@ agent_step (struct RIL_Peer_Agent *agent) | |||
1284 | 1282 | ||
1285 | agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next); | 1283 | agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next); |
1286 | 1284 | ||
1285 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n", | ||
1286 | agent->step_count, | ||
1287 | reward, | ||
1288 | agent->bw_in/1024, | ||
1289 | agent->bw_out/1024, | ||
1290 | a_next); | ||
1291 | |||
1287 | envi_do_action (agent->envi, agent, a_next); | 1292 | envi_do_action (agent->envi, agent, a_next); |
1288 | 1293 | ||
1289 | GNUNET_free(agent->s_old); | 1294 | GNUNET_free(agent->s_old); |
@@ -1416,7 +1421,7 @@ ril_calculate_discount (struct GAS_RIL_Handle *solver) | |||
1416 | double tau; | 1421 | double tau; |
1417 | 1422 | ||
1418 | // MDP case - remove when debugged | 1423 | // MDP case - remove when debugged |
1419 | if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us) | 1424 | if (solver->simulate) |
1420 | { | 1425 | { |
1421 | solver->global_discount_variable = solver->parameters.gamma; | 1426 | solver->global_discount_variable = solver->parameters.gamma; |
1422 | solver->global_discount_integrated = 1; | 1427 | solver->global_discount_integrated = 1; |
@@ -1484,6 +1489,11 @@ ril_step_schedule_next (struct GAS_RIL_Handle *solver) | |||
1484 | 1489 | ||
1485 | time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y); | 1490 | time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y); |
1486 | 1491 | ||
1492 | if (solver->simulate) | ||
1493 | { | ||
1494 | time_next = GNUNET_TIME_UNIT_ZERO; | ||
1495 | } | ||
1496 | |||
1487 | if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done)) | 1497 | if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done)) |
1488 | { | 1498 | { |
1489 | solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task, | 1499 | solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task, |
@@ -1583,7 +1593,7 @@ agent_w_start (struct RIL_Peer_Agent *agent) | |||
1583 | for (k = 0; k < agent->m; k++) | 1593 | for (k = 0; k < agent->m; k++) |
1584 | { | 1594 | { |
1585 | if (0 == count) { | 1595 | if (0 == count) { |
1586 | agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX); | 1596 | agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX); |
1587 | } | 1597 | } |
1588 | else { | 1598 | else { |
1589 | for (other = agent->envi->agents_head; NULL != other; other = other->next) | 1599 | for (other = agent->envi->agents_head; NULL != other; other = other->next) |
@@ -1878,6 +1888,10 @@ libgnunet_plugin_ats_ril_init (void *cls) | |||
1878 | { | 1888 | { |
1879 | solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE; | 1889 | solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE; |
1880 | } | 1890 | } |
1891 | if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate)) | ||
1892 | { | ||
1893 | solver->simulate = GNUNET_NO; | ||
1894 | } | ||
1881 | 1895 | ||
1882 | env->sf.s_add = &GAS_ril_address_add; | 1896 | env->sf.s_add = &GAS_ril_address_add; |
1883 | env->sf.s_address_update_property = &GAS_ril_address_property_changed; | 1897 | env->sf.s_address_update_property = &GAS_ril_address_property_changed; |
@@ -1905,16 +1919,16 @@ libgnunet_plugin_ats_ril_init (void *cls) | |||
1905 | cur->type = env->networks[c]; | 1919 | cur->type = env->networks[c]; |
1906 | cur->bw_in_available = env->in_quota[c]; | 1920 | cur->bw_in_available = env->in_quota[c]; |
1907 | cur->bw_out_available = env->out_quota[c]; | 1921 | cur->bw_out_available = env->out_quota[c]; |
1908 | LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024); | 1922 | LOG(GNUNET_ERROR_TYPE_INFO, "init() Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024); |
1909 | } | 1923 | } |
1910 | 1924 | ||
1911 | LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n"); | 1925 | LOG(GNUNET_ERROR_TYPE_INFO, "init() Parameters:\n"); |
1912 | LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n", | 1926 | LOG(GNUNET_ERROR_TYPE_INFO, "init() Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n", |
1913 | solver->parameters.algorithm ? "Q" : "SARSA", | 1927 | solver->parameters.algorithm ? "Q" : "SARSA", |
1914 | solver->parameters.alpha, | 1928 | solver->parameters.alpha, |
1915 | solver->parameters.beta, | 1929 | solver->parameters.beta, |
1916 | solver->parameters.lambda); | 1930 | solver->parameters.lambda); |
1917 | LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n", | 1931 | LOG(GNUNET_ERROR_TYPE_INFO, "init() explore = %f, global_share = %f\n", |
1918 | solver->parameters.explore_ratio, | 1932 | solver->parameters.explore_ratio, |
1919 | solver->parameters.reward_global_share); | 1933 | solver->parameters.reward_global_share); |
1920 | 1934 | ||