diff options
author | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-16 19:02:54 +0000 |
---|---|---|
committer | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-16 19:02:54 +0000 |
commit | a53b100e3e326970708e62c7660f09d40aae58d7 (patch) | |
tree | 6f4fcb57971be629425a5922b6d0a02833936f5d /src | |
parent | 5650ff38f1263a52c29511673aee1c849ae1fd8e (diff) | |
download | gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.tar.gz gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.zip |
minor action-selection improvement
Diffstat (limited to 'src')
-rwxr-xr-x | src/ats/plugin_ats_ril.c | 148 |
1 files changed, 125 insertions, 23 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c index be42c9dc7..d29767a49 100755 --- a/src/ats/plugin_ats_ril.c +++ b/src/ats/plugin_ats_ril.c | |||
@@ -507,6 +507,66 @@ agent_address_get (struct RIL_Peer_Agent *agent, struct ATS_Address *address) | |||
507 | } | 507 | } |
508 | 508 | ||
509 | 509 | ||
510 | static int | ||
511 | agent_action_is_possible (struct RIL_Peer_Agent *agent, int action) | ||
512 | { | ||
513 | int address_index; | ||
514 | |||
515 | switch (action) | ||
516 | { | ||
517 | case RIL_ACTION_NOTHING: | ||
518 | return GNUNET_YES; | ||
519 | break; | ||
520 | case RIL_ACTION_BW_IN_INC: | ||
521 | case RIL_ACTION_BW_IN_DBL: | ||
522 | if (agent->bw_in >= RIL_MAX_BW) | ||
523 | return GNUNET_NO; | ||
524 | else | ||
525 | return GNUNET_YES; | ||
526 | break; | ||
527 | case RIL_ACTION_BW_IN_DEC: | ||
528 | case RIL_ACTION_BW_IN_HLV: | ||
529 | if (agent->bw_in <= RIL_MIN_BW) | ||
530 | return GNUNET_NO; | ||
531 | else | ||
532 | return GNUNET_YES; | ||
533 | break; | ||
534 | case RIL_ACTION_BW_OUT_INC: | ||
535 | case RIL_ACTION_BW_OUT_DBL: | ||
536 | if (agent->bw_out >= RIL_MAX_BW) | ||
537 | return GNUNET_NO; | ||
538 | else | ||
539 | return GNUNET_YES; | ||
540 | break; | ||
541 | case RIL_ACTION_BW_OUT_DEC: | ||
542 | case RIL_ACTION_BW_OUT_HLV: | ||
543 | if (agent->bw_out <= RIL_MIN_BW) | ||
544 | return GNUNET_NO; | ||
545 | else | ||
546 | return GNUNET_YES; | ||
547 | break; | ||
548 | default: | ||
549 | if ((action >= RIL_ACTION_TYPE_NUM) && (action < agent->n)) //switch address action | ||
550 | { | ||
551 | address_index = action - RIL_ACTION_TYPE_NUM; | ||
552 | |||
553 | GNUNET_assert(address_index >= 0); | ||
554 | GNUNET_assert( | ||
555 | address_index <= agent_address_get_index (agent, agent->addresses_tail->address_naked)); | ||
556 | |||
557 | if ((agent_address_get_index(agent, agent->address_inuse) == address_index) || | ||
558 | agent->address_inuse->active) | ||
559 | return GNUNET_NO; | ||
560 | else | ||
561 | return GNUNET_YES; | ||
562 | break; | ||
563 | } | ||
564 | // error - action does not exist | ||
565 | GNUNET_assert(GNUNET_NO); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | |||
510 | /** | 570 | /** |
511 | * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the | 571 | * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the |
512 | * most reward in the future) | 572 | * most reward in the future) |
@@ -519,20 +579,20 @@ static int | |||
519 | agent_get_action_max (struct RIL_Peer_Agent *agent, double *state) | 579 | agent_get_action_max (struct RIL_Peer_Agent *agent, double *state) |
520 | { | 580 | { |
521 | int i; | 581 | int i; |
522 | int num_actions; | ||
523 | int max_i = RIL_ACTION_INVALID; | 582 | int max_i = RIL_ACTION_INVALID; |
524 | double cur_q; | 583 | double cur_q; |
525 | double max_q = -DBL_MAX; | 584 | double max_q = -DBL_MAX; |
526 | 585 | ||
527 | num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n; | 586 | for (i = 0; i < agent->n; i++) |
528 | |||
529 | for (i = 0; i < num_actions; i++) | ||
530 | { | 587 | { |
531 | cur_q = agent_estimate_q (agent, state, i); | 588 | if (agent_action_is_possible(agent, i)) |
532 | if (cur_q > max_q) | ||
533 | { | 589 | { |
534 | max_q = cur_q; | 590 | cur_q = agent_estimate_q (agent, state, i); |
535 | max_i = i; | 591 | if (cur_q > max_q) |
592 | { | ||
593 | max_q = cur_q; | ||
594 | max_i = i; | ||
595 | } | ||
536 | } | 596 | } |
537 | } | 597 | } |
538 | 598 | ||
@@ -542,6 +602,44 @@ agent_get_action_max (struct RIL_Peer_Agent *agent, double *state) | |||
542 | } | 602 | } |
543 | 603 | ||
544 | 604 | ||
605 | static int | ||
606 | agent_get_action_random (struct RIL_Peer_Agent *agent) | ||
607 | { | ||
608 | int i; | ||
609 | int is_possible[agent->n]; | ||
610 | int sum = 0; | ||
611 | int r; | ||
612 | |||
613 | for (i = 0; i<agent->n; i++) | ||
614 | { | ||
615 | if (agent_action_is_possible(agent, i)) | ||
616 | { | ||
617 | is_possible[i] = GNUNET_YES; | ||
618 | sum++; | ||
619 | } | ||
620 | else | ||
621 | { | ||
622 | is_possible[i] = GNUNET_NO; | ||
623 | } | ||
624 | } | ||
625 | |||
626 | r = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, sum); | ||
627 | |||
628 | sum = -1; | ||
629 | for (i = 0; i<agent->n; i++) | ||
630 | { | ||
631 | if (is_possible[i]) | ||
632 | { | ||
633 | sum++; | ||
634 | if (sum == r) | ||
635 | return i; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | GNUNET_assert(GNUNET_NO); | ||
640 | } | ||
641 | |||
642 | |||
545 | /** | 643 | /** |
546 | * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a | 644 | * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a |
547 | * | 645 | * |
@@ -787,7 +885,7 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
787 | x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor; | 885 | x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor; |
788 | d[0] = x[0]-y[0]; | 886 | d[0] = x[0]-y[0]; |
789 | d[1] = x[1]-y[1]; | 887 | d[1] = x[1]-y[1]; |
790 | sigma = (((double) max_bw / 2) * M_SQRT2) / (double) solver->parameters.rbf_divisor; | 888 | sigma = (((double) max_bw / (double) solver->parameters.rbf_divisor) / 2.0) * M_SQRT2; |
791 | f = exp(-((d[0]*d[0] + d[1]*d[1]) / (2 * sigma * sigma))); | 889 | f = exp(-((d[0]*d[0] + d[1]*d[1]) / (2 * sigma * sigma))); |
792 | state[m++] = f; | 890 | state[m++] = f; |
793 | } | 891 | } |
@@ -978,7 +1076,7 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
978 | 1076 | ||
979 | if (delta != 0) | 1077 | if (delta != 0) |
980 | { | 1078 | { |
981 | agent->nop_bonus = abs(delta) * 0; | 1079 | agent->nop_bonus = 0; |
982 | } | 1080 | } |
983 | 1081 | ||
984 | LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization); | 1082 | LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization); |
@@ -1216,15 +1314,12 @@ static int | |||
1216 | agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) | 1314 | agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) |
1217 | { | 1315 | { |
1218 | int action; | 1316 | int action; |
1219 | int num_actions; | ||
1220 | double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | 1317 | double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, |
1221 | UINT32_MAX) / (double) UINT32_MAX; | 1318 | UINT32_MAX) / (double) UINT32_MAX; |
1222 | 1319 | ||
1223 | num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n; | ||
1224 | |||
1225 | if (r < agent->envi->parameters.explore_ratio) //explore | 1320 | if (r < agent->envi->parameters.explore_ratio) //explore |
1226 | { | 1321 | { |
1227 | action = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, num_actions); | 1322 | action = agent_get_action_random(agent); |
1228 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) | 1323 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) |
1229 | { | 1324 | { |
1230 | agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action); | 1325 | agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action); |
@@ -1257,29 +1352,36 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state) | |||
1257 | { | 1352 | { |
1258 | int i; | 1353 | int i; |
1259 | int a_max; | 1354 | int a_max; |
1260 | int num_actions; | ||
1261 | double eqt[agent->n]; | 1355 | double eqt[agent->n]; |
1262 | double p[agent->n]; | 1356 | double p[agent->n]; |
1263 | double sum = 0; | 1357 | double sum = 0; |
1264 | double r; | 1358 | double r; |
1265 | 1359 | ||
1266 | num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n; | ||
1267 | |||
1268 | a_max = agent_get_action_max(agent, state); | 1360 | a_max = agent_get_action_max(agent, state); |
1269 | 1361 | ||
1270 | for (i=0; i<num_actions; i++) | 1362 | for (i=0; i<agent->n; i++) |
1271 | { | 1363 | { |
1272 | eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); | 1364 | if (agent_action_is_possible(agent, i)) |
1273 | sum += eqt[i]; | 1365 | { |
1366 | eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); | ||
1367 | sum += eqt[i]; | ||
1368 | } | ||
1274 | } | 1369 | } |
1275 | for (i=0; i<num_actions; i++) | 1370 | for (i=0; i<agent->n; i++) |
1276 | { | 1371 | { |
1277 | p[i] = eqt[i]/sum; | 1372 | if (agent_action_is_possible(agent, i)) |
1373 | { | ||
1374 | p[i] = eqt[i]/sum; | ||
1375 | } | ||
1376 | else | ||
1377 | { | ||
1378 | p[i] = 0; | ||
1379 | } | ||
1278 | } | 1380 | } |
1279 | r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | 1381 | r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, |
1280 | UINT32_MAX) / (double) UINT32_MAX; | 1382 | UINT32_MAX) / (double) UINT32_MAX; |
1281 | sum = 0; | 1383 | sum = 0; |
1282 | for (i=0; i<num_actions; i++) | 1384 | for (i=0; i<agent->n; i++) |
1283 | { | 1385 | { |
1284 | if (sum + p[i] > r) | 1386 | if (sum + p[i] > r) |
1285 | { | 1387 | { |