diff options
author | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-25 13:51:27 +0000 |
---|---|---|
committer | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-25 13:51:27 +0000 |
commit | 9bd3837cec8372bb8fc012548629ac543932f240 (patch) | |
tree | 682679c520cfe7c96a11aef411910e639dd60624 /src/ats/plugin_ats_ril.c | |
parent | 8a741e7ef0f3eefbcb8eb990b47df699132b3873 (diff) | |
download | gnunet-9bd3837cec8372bb8fc012548629ac543932f240.tar.gz gnunet-9bd3837cec8372bb8fc012548629ac543932f240.zip |
-eligibility trace fixes
Diffstat (limited to 'src/ats/plugin_ats_ril.c')
-rwxr-xr-x | src/ats/plugin_ats_ril.c | 60 |
1 files changed, 32 insertions, 28 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c index 944e4a5fe..22fd1e365 100755 --- a/src/ats/plugin_ats_ril.c +++ b/src/ats/plugin_ats_ril.c | |||
@@ -683,8 +683,9 @@ static void | |||
683 | agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a_prime) | 683 | agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a_prime) |
684 | { | 684 | { |
685 | int i; | 685 | int i; |
686 | int k; | ||
686 | double delta; | 687 | double delta; |
687 | double *theta = agent->W[agent->a_old]; | 688 | double **theta = agent->W; |
688 | 689 | ||
689 | delta = agent->envi->global_discount_integrated * reward; //reward | 690 | delta = agent->envi->global_discount_integrated * reward; //reward |
690 | delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value | 691 | delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value |
@@ -699,14 +700,17 @@ agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a | |||
699 | // agent_q (agent, s_next, a_prime), | 700 | // agent_q (agent, s_next, a_prime), |
700 | // delta); | 701 | // delta); |
701 | 702 | ||
702 | for (i = 0; i < agent->m; i++) | 703 | for (k = 0; k < agent->n; k++) |
703 | { | 704 | { |
704 | // LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n", | 705 | for (i = 0; i < agent->m; i++) |
705 | // agent->envi->parameters.alpha, | 706 | { |
706 | // delta, | 707 | // LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n", |
707 | // i, | 708 | // agent->envi->parameters.alpha, |
708 | // agent->e[i]); | 709 | // delta, |
709 | theta[i] += agent->envi->parameters.alpha * delta * agent->E[agent->a_old][i]; | 710 | // i, |
711 | // agent->e[i]); | ||
712 | theta[k][i] += agent->envi->parameters.alpha * delta * agent->E[k][i]; | ||
713 | } | ||
710 | } | 714 | } |
711 | } | 715 | } |
712 | 716 | ||
@@ -739,7 +743,7 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent, | |||
739 | agent->E[action][i] += feature[i]; | 743 | agent->E[action][i] += feature[i]; |
740 | break; | 744 | break; |
741 | case RIL_E_REPLACE: | 745 | case RIL_E_REPLACE: |
742 | agent->E[action][i] = (agent->envi->global_discount_variable * agent->envi->parameters.lambda * agent->E[action][i]) > feature[i] ? agent->E[action][i] : feature[i]; | 746 | agent->E[action][i] = agent->E[action][i] > feature[i] ? agent->E[action][i] : feature[i]; |
743 | break; | 747 | break; |
744 | case RIL_E_DISCOUNT: | 748 | case RIL_E_DISCOUNT: |
745 | for (k = 0; k < agent->n; k++) | 749 | for (k = 0; k < agent->n; k++) |
@@ -1004,23 +1008,23 @@ ril_network_get_social_welfare (struct GAS_RIL_Handle *solver, struct RIL_Scope | |||
1004 | return 1; | 1008 | return 1; |
1005 | } | 1009 | } |
1006 | 1010 | ||
1007 | static double | 1011 | //static double |
1008 | envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | 1012 | //envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) |
1009 | { | 1013 | //{ |
1010 | struct RIL_Scope *net; | 1014 | // struct RIL_Scope *net; |
1011 | double util_ratio_in; | 1015 | // double util_ratio_in; |
1012 | double util_ratio_out; | 1016 | // double util_ratio_out; |
1013 | double util_ratio_max; | 1017 | // double util_ratio_max; |
1014 | double sigmoid_x; | 1018 | // double sigmoid_x; |
1015 | 1019 | // | |
1016 | net = agent->address_inuse->solver_information; | 1020 | // net = agent->address_inuse->solver_information; |
1017 | 1021 | // | |
1018 | util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available; | 1022 | // util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available; |
1019 | util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available; | 1023 | // util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available; |
1020 | util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out); | 1024 | // util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out); |
1021 | sigmoid_x = util_ratio_max - 1; | 1025 | // sigmoid_x = util_ratio_max - 1; |
1022 | return 1 - (1 / (1 + exp(5 * sigmoid_x))); | 1026 | // return 1 - (1 / (1 + exp(5 * sigmoid_x))); |
1023 | } | 1027 | //} |
1024 | 1028 | ||
1025 | static double | 1029 | static double |
1026 | envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | 1030 | envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) |
@@ -1032,7 +1036,6 @@ envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
1032 | 1036 | ||
1033 | net = agent->address_inuse->solver_information; | 1037 | net = agent->address_inuse->solver_information; |
1034 | 1038 | ||
1035 | //TODO make sure in tests to have utilization property updated | ||
1036 | if (net->bw_in_utilized > net->bw_in_available) | 1039 | if (net->bw_in_utilized > net->bw_in_available) |
1037 | { | 1040 | { |
1038 | over_in = net->bw_in_utilized - net->bw_in_available; | 1041 | over_in = net->bw_in_utilized - net->bw_in_available; |
@@ -1087,7 +1090,8 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
1087 | 1090 | ||
1088 | steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0; | 1091 | steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0; |
1089 | 1092 | ||
1090 | pen_share = envi_penalty_share(solver, agent); | 1093 | //pen_share = envi_penalty_share(solver, agent); TODO revert |
1094 | pen_share = 0.5; | ||
1091 | penalty = envi_get_penalty(solver, agent); | 1095 | penalty = envi_get_penalty(solver, agent); |
1092 | 1096 | ||
1093 | reward = delta + steady; | 1097 | reward = delta + steady; |