-eligibility trace fixes

author: Fabian Oehlmann <oehlmann@in.tum.de> 2014-01-25 13:51:27 +0000
committer: Fabian Oehlmann <oehlmann@in.tum.de> 2014-01-25 13:51:27 +0000
commit: 9bd3837cec8372bb8fc012548629ac543932f240 (patch)
tree: 682679c520cfe7c96a11aef411910e639dd60624 /src/ats/plugin_ats_ril.c
parent: 8a741e7ef0f3eefbcb8eb990b47df699132b3873 (diff)
download: gnunet-9bd3837cec8372bb8fc012548629ac543932f240.tar.gz
gnunet-9bd3837cec8372bb8fc012548629ac543932f240.zip
1 files changed, 32 insertions, 28 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c
index 944e4a5fe..22fd1e365 100755
--- a/src/ats/plugin_ats_ril.c
+++ b/src/ats/plugin_ats_ril.c
@@ -683,8 +683,9 @@ static void
 agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a_prime)
 {
  int i;
+  int k;
  double delta;
-  double *theta = agent->W[agent->a_old];
+  double **theta = agent->W;
  delta = agent->envi->global_discount_integrated * reward; //reward
  delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value
@@ -699,14 +700,17 @@ agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a
 //      agent_q (agent, s_next, a_prime),
 //      delta);
-  for (i = 0; i < agent->m; i++)
+  for (k = 0; k < agent->n; k++)
  {
-//    LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f   delta = %f   e[%d] = %f\n",
+    for (i = 0; i < agent->m; i++)
-//        agent->envi->parameters.alpha,
+    {
-//        delta,
+  //    LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f   delta = %f   e[%d] = %f\n",
-//        i,
+  //        agent->envi->parameters.alpha,
-//        agent->e[i]);
+  //        delta,
-    theta[i] += agent->envi->parameters.alpha * delta * agent->E[agent->a_old][i];
+  //        i,
+  //        agent->e[i]);
+      theta[k][i] += agent->envi->parameters.alpha * delta * agent->E[k][i];
+    }
  }
 }
@@ -739,7 +743,7 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent,
      agent->E[action][i] += feature[i];
      break;
    case RIL_E_REPLACE:
-      agent->E[action][i] =  (agent->envi->global_discount_variable * agent->envi->parameters.lambda * agent->E[action][i]) > feature[i] ? agent->E[action][i] : feature[i];
+      agent->E[action][i] = agent->E[action][i] > feature[i] ? agent->E[action][i] : feature[i];
      break;
    case RIL_E_DISCOUNT:
      for (k = 0; k < agent->n; k++)
@@ -1004,23 +1008,23 @@ ril_network_get_social_welfare (struct GAS_RIL_Handle *solver, struct RIL_Scope
  return 1;
 }
-static double
+//static double
-envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+//envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
-{
+//{
-  struct RIL_Scope *net;
+//  struct RIL_Scope *net;
-  double util_ratio_in;
+//  double util_ratio_in;
-  double util_ratio_out;
+//  double util_ratio_out;
-  double util_ratio_max;
+//  double util_ratio_max;
-  double sigmoid_x;
+//  double sigmoid_x;
+//
-  net = agent->address_inuse->solver_information;
+//  net = agent->address_inuse->solver_information;
+//
-  util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;
+//  util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;
-  util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;
+//  util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;
-  util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);
+//  util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);
-  sigmoid_x = util_ratio_max - 1;
+//  sigmoid_x = util_ratio_max - 1;
-  return 1 - (1 / (1 + exp(5 * sigmoid_x)));
+//  return 1 - (1 / (1 + exp(5 * sigmoid_x)));
-}
+//}
 static double
 envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
@@ -1032,7 +1036,6 @@ envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
  net = agent->address_inuse->solver_information;
-  //TODO make sure in tests to have utilization property updated
  if (net->bw_in_utilized > net->bw_in_available)
  {
    over_in = net->bw_in_utilized - net->bw_in_available;
@@ -1087,7 +1090,8 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
  steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;
-  pen_share = envi_penalty_share(solver, agent);
+  //pen_share = envi_penalty_share(solver, agent); TODO revert
+  pen_share = 0.5;
  penalty = envi_get_penalty(solver, agent);
  reward = delta + steady;
author	Fabian Oehlmann <oehlmann@in.tum.de>	2014-01-25 13:51:27 +0000
committer	Fabian Oehlmann <oehlmann@in.tum.de>	2014-01-25 13:51:27 +0000
commit	9bd3837cec8372bb8fc012548629ac543932f240 (patch)
tree	682679c520cfe7c96a11aef411910e639dd60624 /src/ats/plugin_ats_ril.c
parent	8a741e7ef0f3eefbcb8eb990b47df699132b3873 (diff)
download	gnunet-9bd3837cec8372bb8fc012548629ac543932f240.tar.gz gnunet-9bd3837cec8372bb8fc012548629ac543932f240.zip

diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c index 944e4a5fe..22fd1e365 100755 --- a/src/ats/plugin_ats_ril.c +++ b/src/ats/plugin_ats_ril.c
@@ -683,8 +683,9 @@ static void
683	agent_update (struct RIL_Peer_Agent agent, double reward, double s_next, int a_prime)	683	agent_update (struct RIL_Peer_Agent agent, double reward, double s_next, int a_prime)
684	{	684	{
685	int i;	685	int i;
		686	int k;
686	double delta;	687	double delta;
687	double *theta = agent->W[agent->a_old];	688	double **theta = agent->W;
688		689
689	delta = agent->envi->global_discount_integrated * reward; //reward	690	delta = agent->envi->global_discount_integrated * reward; //reward
690	delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value	691	delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value
@@ -699,14 +700,17 @@ agent_update (struct RIL_Peer_Agent agent, double reward, double s_next, int a
699	// agent_q (agent, s_next, a_prime),	700	// agent_q (agent, s_next, a_prime),
700	// delta);	701	// delta);
701		702
702	for (i = 0; i < agent->m; i++)	703	for (k = 0; k < agent->n; k++)
703	{	704	{
704	// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",	705	for (i = 0; i < agent->m; i++)
705	// agent->envi->parameters.alpha,	706	{
706	// delta,	707	// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
707	// i,	708	// agent->envi->parameters.alpha,
708	// agent->e[i]);	709	// delta,
709	theta[i] += agent->envi->parameters.alpha * delta * agent->E[agent->a_old][i];	710	// i,
		711	// agent->e[i]);
		712	theta[k][i] += agent->envi->parameters.alpha * delta * agent->E[k][i];
		713	}
710	}	714	}
711	}	715	}
712		716
@@ -739,7 +743,7 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent,
739	agent->E[action][i] += feature[i];	743	agent->E[action][i] += feature[i];
740	break;	744	break;
741	case RIL_E_REPLACE:	745	case RIL_E_REPLACE:
742	agent->E[action][i] = (agent->envi->global_discount_variable * agent->envi->parameters.lambda * agent->E[action][i]) > feature[i] ? agent->E[action][i] : feature[i];	746	agent->E[action][i] = agent->E[action][i] > feature[i] ? agent->E[action][i] : feature[i];
743	break;	747	break;
744	case RIL_E_DISCOUNT:	748	case RIL_E_DISCOUNT:
745	for (k = 0; k < agent->n; k++)	749	for (k = 0; k < agent->n; k++)
@@ -1004,23 +1008,23 @@ ril_network_get_social_welfare (struct GAS_RIL_Handle *solver, struct RIL_Scope
1004	return 1;	1008	return 1;
1005	}	1009	}
1006		1010
1007	static double	1011	//static double
1008	envi_penalty_share (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)	1012	//envi_penalty_share (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
1009	{	1013	//{
1010	struct RIL_Scope *net;	1014	// struct RIL_Scope *net;
1011	double util_ratio_in;	1015	// double util_ratio_in;
1012	double util_ratio_out;	1016	// double util_ratio_out;
1013	double util_ratio_max;	1017	// double util_ratio_max;
1014	double sigmoid_x;	1018	// double sigmoid_x;
1015		1019	//
1016	net = agent->address_inuse->solver_information;	1020	// net = agent->address_inuse->solver_information;
1017		1021	//
1018	util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;	1022	// util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;
1019	util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;	1023	// util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;
1020	util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);	1024	// util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);
1021	sigmoid_x = util_ratio_max - 1;	1025	// sigmoid_x = util_ratio_max - 1;
1022	return 1 - (1 / (1 + exp(5 * sigmoid_x)));	1026	// return 1 - (1 / (1 + exp(5 * sigmoid_x)));
1023	}	1027	//}
1024		1028
1025	static double	1029	static double
1026	envi_get_penalty (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)	1030	envi_get_penalty (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
@@ -1032,7 +1036,6 @@ envi_get_penalty (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
1032		1036
1033	net = agent->address_inuse->solver_information;	1037	net = agent->address_inuse->solver_information;
1034		1038
1035	//TODO make sure in tests to have utilization property updated
1036	if (net->bw_in_utilized > net->bw_in_available)	1039	if (net->bw_in_utilized > net->bw_in_available)
1037	{	1040	{
1038	over_in = net->bw_in_utilized - net->bw_in_available;	1041	over_in = net->bw_in_utilized - net->bw_in_available;
@@ -1087,7 +1090,8 @@ envi_get_reward (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
1087		1090
1088	steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;	1091	steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;
1089		1092
1090	pen_share = envi_penalty_share(solver, agent);	1093	//pen_share = envi_penalty_share(solver, agent); TODO revert
		1094	pen_share = 0.5;
1091	penalty = envi_get_penalty(solver, agent);	1095	penalty = envi_get_penalty(solver, agent);
1092		1096
1093	reward = delta + steady;	1097	reward = delta + steady;