aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2014-01-16 19:02:54 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2014-01-16 19:02:54 +0000
commita53b100e3e326970708e62c7660f09d40aae58d7 (patch)
tree6f4fcb57971be629425a5922b6d0a02833936f5d /src
parent5650ff38f1263a52c29511673aee1c849ae1fd8e (diff)
downloadgnunet-a53b100e3e326970708e62c7660f09d40aae58d7.tar.gz
gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.zip
minor action-selection improvement
Diffstat (limited to 'src')
-rwxr-xr-xsrc/ats/plugin_ats_ril.c148
1 files changed, 125 insertions, 23 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c
index be42c9dc7..d29767a49 100755
--- a/src/ats/plugin_ats_ril.c
+++ b/src/ats/plugin_ats_ril.c
@@ -507,6 +507,66 @@ agent_address_get (struct RIL_Peer_Agent *agent, struct ATS_Address *address)
507} 507}
508 508
509 509
510static int
511agent_action_is_possible (struct RIL_Peer_Agent *agent, int action)
512{
513 int address_index;
514
515 switch (action)
516 {
517 case RIL_ACTION_NOTHING:
518 return GNUNET_YES;
519 break;
520 case RIL_ACTION_BW_IN_INC:
521 case RIL_ACTION_BW_IN_DBL:
522 if (agent->bw_in >= RIL_MAX_BW)
523 return GNUNET_NO;
524 else
525 return GNUNET_YES;
526 break;
527 case RIL_ACTION_BW_IN_DEC:
528 case RIL_ACTION_BW_IN_HLV:
529 if (agent->bw_in <= RIL_MIN_BW)
530 return GNUNET_NO;
531 else
532 return GNUNET_YES;
533 break;
534 case RIL_ACTION_BW_OUT_INC:
535 case RIL_ACTION_BW_OUT_DBL:
536 if (agent->bw_out >= RIL_MAX_BW)
537 return GNUNET_NO;
538 else
539 return GNUNET_YES;
540 break;
541 case RIL_ACTION_BW_OUT_DEC:
542 case RIL_ACTION_BW_OUT_HLV:
543 if (agent->bw_out <= RIL_MIN_BW)
544 return GNUNET_NO;
545 else
546 return GNUNET_YES;
547 break;
548 default:
549 if ((action >= RIL_ACTION_TYPE_NUM) && (action < agent->n)) //switch address action
550 {
551 address_index = action - RIL_ACTION_TYPE_NUM;
552
553 GNUNET_assert(address_index >= 0);
554 GNUNET_assert(
555 address_index <= agent_address_get_index (agent, agent->addresses_tail->address_naked));
556
557 if ((agent_address_get_index(agent, agent->address_inuse) == address_index) ||
558 agent->address_inuse->active)
559 return GNUNET_NO;
560 else
561 return GNUNET_YES;
562 break;
563 }
564 // error - action does not exist
565 GNUNET_assert(GNUNET_NO);
566 }
567}
568
569
510/** 570/**
511 * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the 571 * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
512 * most reward in the future) 572 * most reward in the future)
@@ -519,20 +579,20 @@ static int
519agent_get_action_max (struct RIL_Peer_Agent *agent, double *state) 579agent_get_action_max (struct RIL_Peer_Agent *agent, double *state)
520{ 580{
521 int i; 581 int i;
522 int num_actions;
523 int max_i = RIL_ACTION_INVALID; 582 int max_i = RIL_ACTION_INVALID;
524 double cur_q; 583 double cur_q;
525 double max_q = -DBL_MAX; 584 double max_q = -DBL_MAX;
526 585
527 num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n; 586 for (i = 0; i < agent->n; i++)
528
529 for (i = 0; i < num_actions; i++)
530 { 587 {
531 cur_q = agent_estimate_q (agent, state, i); 588 if (agent_action_is_possible(agent, i))
532 if (cur_q > max_q)
533 { 589 {
534 max_q = cur_q; 590 cur_q = agent_estimate_q (agent, state, i);
535 max_i = i; 591 if (cur_q > max_q)
592 {
593 max_q = cur_q;
594 max_i = i;
595 }
536 } 596 }
537 } 597 }
538 598
@@ -542,6 +602,44 @@ agent_get_action_max (struct RIL_Peer_Agent *agent, double *state)
542} 602}
543 603
544 604
605static int
606agent_get_action_random (struct RIL_Peer_Agent *agent)
607{
608 int i;
609 int is_possible[agent->n];
610 int sum = 0;
611 int r;
612
613 for (i = 0; i<agent->n; i++)
614 {
615 if (agent_action_is_possible(agent, i))
616 {
617 is_possible[i] = GNUNET_YES;
618 sum++;
619 }
620 else
621 {
622 is_possible[i] = GNUNET_NO;
623 }
624 }
625
626 r = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, sum);
627
628 sum = -1;
629 for (i = 0; i<agent->n; i++)
630 {
631 if (is_possible[i])
632 {
633 sum++;
634 if (sum == r)
635 return i;
636 }
637 }
638
639 GNUNET_assert(GNUNET_NO);
640}
641
642
545/** 643/**
546 * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a 644 * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
547 * 645 *
@@ -787,7 +885,7 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
787 x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor; 885 x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor;
788 d[0] = x[0]-y[0]; 886 d[0] = x[0]-y[0];
789 d[1] = x[1]-y[1]; 887 d[1] = x[1]-y[1];
790 sigma = (((double) max_bw / 2) * M_SQRT2) / (double) solver->parameters.rbf_divisor; 888 sigma = (((double) max_bw / (double) solver->parameters.rbf_divisor) / 2.0) * M_SQRT2;
791 f = exp(-((d[0]*d[0] + d[1]*d[1]) / (2 * sigma * sigma))); 889 f = exp(-((d[0]*d[0] + d[1]*d[1]) / (2 * sigma * sigma)));
792 state[m++] = f; 890 state[m++] = f;
793 } 891 }
@@ -978,7 +1076,7 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
978 1076
979 if (delta != 0) 1077 if (delta != 0)
980 { 1078 {
981 agent->nop_bonus = abs(delta) * 0; 1079 agent->nop_bonus = 0;
982 } 1080 }
983 1081
984 LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization); 1082 LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization);
@@ -1216,15 +1314,12 @@ static int
1216agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) 1314agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state)
1217{ 1315{
1218 int action; 1316 int action;
1219 int num_actions;
1220 double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 1317 double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
1221 UINT32_MAX) / (double) UINT32_MAX; 1318 UINT32_MAX) / (double) UINT32_MAX;
1222 1319
1223 num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
1224
1225 if (r < agent->envi->parameters.explore_ratio) //explore 1320 if (r < agent->envi->parameters.explore_ratio) //explore
1226 { 1321 {
1227 action = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, num_actions); 1322 action = agent_get_action_random(agent);
1228 if (RIL_ALGO_Q == agent->envi->parameters.algorithm) 1323 if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1229 { 1324 {
1230 agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action); 1325 agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);
@@ -1257,29 +1352,36 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state)
1257{ 1352{
1258 int i; 1353 int i;
1259 int a_max; 1354 int a_max;
1260 int num_actions;
1261 double eqt[agent->n]; 1355 double eqt[agent->n];
1262 double p[agent->n]; 1356 double p[agent->n];
1263 double sum = 0; 1357 double sum = 0;
1264 double r; 1358 double r;
1265 1359
1266 num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
1267
1268 a_max = agent_get_action_max(agent, state); 1360 a_max = agent_get_action_max(agent, state);
1269 1361
1270 for (i=0; i<num_actions; i++) 1362 for (i=0; i<agent->n; i++)
1271 { 1363 {
1272 eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); 1364 if (agent_action_is_possible(agent, i))
1273 sum += eqt[i]; 1365 {
1366 eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);
1367 sum += eqt[i];
1368 }
1274 } 1369 }
1275 for (i=0; i<num_actions; i++) 1370 for (i=0; i<agent->n; i++)
1276 { 1371 {
1277 p[i] = eqt[i]/sum; 1372 if (agent_action_is_possible(agent, i))
1373 {
1374 p[i] = eqt[i]/sum;
1375 }
1376 else
1377 {
1378 p[i] = 0;
1379 }
1278 } 1380 }
1279 r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 1381 r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
1280 UINT32_MAX) / (double) UINT32_MAX; 1382 UINT32_MAX) / (double) UINT32_MAX;
1281 sum = 0; 1383 sum = 0;
1282 for (i=0; i<num_actions; i++) 1384 for (i=0; i<agent->n; i++)
1283 { 1385 {
1284 if (sum + p[i] > r) 1386 if (sum + p[i] > r)
1285 { 1387 {