added simulation

author: Fabian Oehlmann <oehlmann@in.tum.de> 2013-12-04 19:21:05 +0000
committer: Fabian Oehlmann <oehlmann@in.tum.de> 2013-12-04 19:21:05 +0000
commit: a77de892778f22d687edb8ff9009dc67b2660691 (patch)
tree: 45699ff658bb7336dea42c3c5624501b8e252621 /src/ats
parent: 239310367ce5a29fa2ab641694270f6c9b90ef61 (diff)
download: gnunet-a77de892778f22d687edb8ff9009dc67b2660691.tar.gz
gnunet-a77de892778f22d687edb8ff9009dc67b2660691.zip
1 files changed, 52 insertions, 38 deletions
diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c
index 7cc09950a..603e93a4d 100755
--- a/src/ats/libgnunet_plugin_ats_ril.c
+++ b/src/ats/libgnunet_plugin_ats_ril.c
@@ -45,6 +45,8 @@
 #define RIL_DEFAULT_EXPLORE_RATIO 0.1
 #define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
+#define RIL_INC_DEC_STEP_SIZE 1
 /**
 * ATS reinforcement learning solver
 *
@@ -68,7 +70,7 @@ enum RIL_Action_Type
  RIL_ACTION_BW_OUT_HLV = -5,
  RIL_ACTION_BW_OUT_INC = -6,
  RIL_ACTION_BW_OUT_DEC = -7,
-  RIL_ACTION_TYPE_NUM = 2
+  RIL_ACTION_TYPE_NUM = 1
 };
 enum RIL_Algorithm
@@ -363,6 +365,11 @@ struct GAS_RIL_Handle
   * Shutdown
   */
  int done;
+  /**
+   * Simulate steps, i.e. schedule steps immediately
+   */
+  unsigned long long simulate;
 };
 /*
@@ -396,8 +403,7 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
  if (isinf(result))
  {
-    GNUNET_assert(GNUNET_NO);
+    return isinf(result) * UINT32_MAX; //TODO! fix
-    return isinf(result) * (DBL_MAX / 2); //TODO! fix
  }
  return result;
@@ -537,13 +543,14 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
  delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
  delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
-//  LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f   y*Q(s+1,a+1) = %f   Q(s,a) = %f\n, y = %f\n",
+  LOG(GNUNET_ERROR_TYPE_INFO, "update()   Step# %llu  Q(s,a): %f  a: %f  r: %f  y: %f  Q(s+1,a+1) = %f  delta: %f\n",
-//      agent->envi->global_discount_integrated * reward,
+      agent->step_count,
-//      agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime),
+      agent_estimate_q (agent, agent->s_old, agent->a_old),
-//      agent_estimate_q (agent, agent->s_old, agent->a_old),
+      agent->envi->parameters.alpha,
-//      agent->envi->global_discount_variable);
+      reward,
-//
+      agent->envi->global_discount_variable,
-//  LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta);
+      agent_estimate_q (agent, s_next, a_prime),
+      delta);
  for (i = 0; i < agent->m; i++)
  {
@@ -764,17 +771,17 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
  state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
  if (net->bw_in_assigned > net->bw_in_available)
  {
-    state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available;
+    state[1] = 1;// net->bw_in_available;
  }
  else
  {
    state[1] = 0;
  }
-  LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  state[0] = %f\n", state[0]);
-  LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  state[1] = %f\n", state[1]);
-  LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
-  LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
  //get peer features
@@ -1072,7 +1079,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
  if (direction_in)
  {
-    new_bw = agent->bw_in + (1 * MIN_BW);
+    new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
    if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
      new_bw = GNUNET_ATS_MaxBandwidth;
    envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
@@ -1080,7 +1087,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
  }
  else
  {
-    new_bw = agent->bw_out + (1 * MIN_BW);
+    new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
    if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
      new_bw = GNUNET_ATS_MaxBandwidth;
    envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
@@ -1104,7 +1111,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
  if (direction_in)
  {
-    new_bw = agent->bw_in - (1 * MIN_BW);
+    new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
    if (new_bw < MIN_BW || new_bw > agent->bw_in)
      new_bw = MIN_BW;
    envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
@@ -1112,7 +1119,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
  }
  else
  {
-    new_bw = agent->bw_out - (1 * MIN_BW);
+    new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
    if (new_bw < MIN_BW || new_bw > agent->bw_out)
      new_bw = MIN_BW;
    envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
@@ -1220,6 +1227,7 @@ static void
 agent_step (struct RIL_Peer_Agent *agent)
 {
  int a_next = RIL_ACTION_INVALID;
+  int explore;
  double *s_next;
  double reward;
@@ -1229,22 +1237,12 @@ agent_step (struct RIL_Peer_Agent *agent)
  s_next = envi_get_state (agent->envi, agent);
  reward = envi_get_reward (agent->envi, agent);
+  explore = agent_decide_exploration (agent);
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu:   A: %d   R: %f  IN %llu   OUT %llu\n",
-      agent->step_count,
-      agent->a_old,
-      reward,
-      agent->bw_in/1024,
-      agent->bw_out/1024);
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu:   Best A: %d   Q(s,a): %f \n",
-        agent->step_count,
-        agent_get_action_best (agent, s_next),
-        agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
  switch (agent->envi->parameters.algorithm)
  {
  case RIL_ALGO_SARSA:
-    if (agent_decide_exploration (agent))
+    if (explore)
    {
      a_next = agent_get_action_explore (agent, s_next);
    }
@@ -1267,7 +1265,7 @@ agent_step (struct RIL_Peer_Agent *agent)
      //updates weights with best action, disregarding actually selected action (off-policy), if not first step
      agent_update_weights (agent, reward, s_next, a_next);
    }
-    if (agent_decide_exploration (agent))
+    if (explore)
    {
      a_next = agent_get_action_explore (agent, s_next);
      agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
@@ -1284,6 +1282,13 @@ agent_step (struct RIL_Peer_Agent *agent)
  agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step()  Step# %llu  R: %f  IN %llu  OUT %llu  A: %d\n",
+        agent->step_count,
+        reward,
+        agent->bw_in/1024,
+        agent->bw_out/1024,
+        a_next);
  envi_do_action (agent->envi, agent, a_next);
  GNUNET_free(agent->s_old);
@@ -1416,7 +1421,7 @@ ril_calculate_discount (struct GAS_RIL_Handle *solver)
  double tau;
  // MDP case - remove when debugged
-  if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us)
+  if (solver->simulate)
  {
    solver->global_discount_variable = solver->parameters.gamma;
    solver->global_discount_integrated = 1;
@@ -1484,6 +1489,11 @@ ril_step_schedule_next (struct GAS_RIL_Handle *solver)
  time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
+  if (solver->simulate)
+  {
+    time_next = GNUNET_TIME_UNIT_ZERO;
+  }
  if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
  {
    solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
@@ -1583,7 +1593,7 @@ agent_w_start (struct RIL_Peer_Agent *agent)
    for (k = 0; k < agent->m; k++)
    {
      if (0 == count) {
-        agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
+        agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
      }
      else {
        for (other = agent->envi->agents_head; NULL != other; other = other->next)
@@ -1878,6 +1888,10 @@ libgnunet_plugin_ats_ril_init (void *cls)
  {
    solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
  }
+  if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
+  {
+    solver->simulate = GNUNET_NO;
+  }
  env->sf.s_add = &GAS_ril_address_add;
  env->sf.s_address_update_property = &GAS_ril_address_property_changed;
@@ -1905,16 +1919,16 @@ libgnunet_plugin_ats_ril_init (void *cls)
    cur->type = env->networks[c];
    cur->bw_in_available = env->in_quota[c];
    cur->bw_out_available = env->out_quota[c];
-    LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network:  IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
+    LOG(GNUNET_ERROR_TYPE_INFO, "init()  Quotas for %s network:  IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
  }
-  LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  Parameters:\n");
-  LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
      solver->parameters.algorithm ? "Q" : "SARSA",
      solver->parameters.alpha,
      solver->parameters.beta,
      solver->parameters.lambda);
-  LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n",
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  explore = %f, global_share = %f\n",
      solver->parameters.explore_ratio,
      solver->parameters.reward_global_share);
author	Fabian Oehlmann <oehlmann@in.tum.de>	2013-12-04 19:21:05 +0000
committer	Fabian Oehlmann <oehlmann@in.tum.de>	2013-12-04 19:21:05 +0000
commit	a77de892778f22d687edb8ff9009dc67b2660691 (patch)
tree	45699ff658bb7336dea42c3c5624501b8e252621 /src/ats
parent	239310367ce5a29fa2ab641694270f6c9b90ef61 (diff)
download	gnunet-a77de892778f22d687edb8ff9009dc67b2660691.tar.gz gnunet-a77de892778f22d687edb8ff9009dc67b2660691.zip

diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c index 7cc09950a..603e93a4d 100755 --- a/src/ats/libgnunet_plugin_ats_ril.c +++ b/src/ats/libgnunet_plugin_ats_ril.c
@@ -45,6 +45,8 @@
45	#define RIL_DEFAULT_EXPLORE_RATIO 0.1	45	#define RIL_DEFAULT_EXPLORE_RATIO 0.1
46	#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5	46	#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
47		47
		48	#define RIL_INC_DEC_STEP_SIZE 1
		49
48	/**	50	/**
49	* ATS reinforcement learning solver	51	* ATS reinforcement learning solver
50	*	52	*
@@ -68,7 +70,7 @@ enum RIL_Action_Type
68	RIL_ACTION_BW_OUT_HLV = -5,	70	RIL_ACTION_BW_OUT_HLV = -5,
69	RIL_ACTION_BW_OUT_INC = -6,	71	RIL_ACTION_BW_OUT_INC = -6,
70	RIL_ACTION_BW_OUT_DEC = -7,	72	RIL_ACTION_BW_OUT_DEC = -7,
71	RIL_ACTION_TYPE_NUM = 2	73	RIL_ACTION_TYPE_NUM = 1
72	};	74	};
73		75
74	enum RIL_Algorithm	76	enum RIL_Algorithm
@@ -363,6 +365,11 @@ struct GAS_RIL_Handle
363	* Shutdown	365	* Shutdown
364	*/	366	*/
365	int done;	367	int done;
		368
		369	/**
		370	* Simulate steps, i.e. schedule steps immediately
		371	*/
		372	unsigned long long simulate;
366	};	373	};
367		374
368	/*	375	/*
@@ -396,8 +403,7 @@ agent_estimate_q (struct RIL_Peer_Agent agent, double state, int action)
396		403
397	if (isinf(result))	404	if (isinf(result))
398	{	405	{
399	GNUNET_assert(GNUNET_NO);	406	return isinf(result) * UINT32_MAX; //TODO! fix
400	return isinf(result) * (DBL_MAX / 2); //TODO! fix
401	}	407	}
402		408
403	return result;	409	return result;
@@ -537,13 +543,14 @@ agent_update_weights (struct RIL_Peer_Agent agent, double reward, double s_nex
537	delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value	543	delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
538	delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step	544	delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
539		545
540	// LOG(GNUNET_ERROR_TYPE_INFO, "Yr = %f yQ(s+1,a+1) = %f Q(s,a) = %f\n, y = %f\n",	546	LOG(GNUNET_ERROR_TYPE_INFO, "update() Step# %llu Q(s,a): %f a: %f r: %f y: %f Q(s+1,a+1) = %f delta: %f\n",
541	// agent->envi->global_discount_integrated * reward,	547	agent->step_count,
542	// agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime),	548	agent_estimate_q (agent, agent->s_old, agent->a_old),
543	// agent_estimate_q (agent, agent->s_old, agent->a_old),	549	agent->envi->parameters.alpha,
544	// agent->envi->global_discount_variable);	550	reward,
545	//	551	agent->envi->global_discount_variable,
546	// LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta);	552	agent_estimate_q (agent, s_next, a_prime),
		553	delta);
547		554
548	for (i = 0; i < agent->m; i++)	555	for (i = 0; i < agent->m; i++)
549	{	556	{
@@ -764,17 +771,17 @@ envi_get_state (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
764	state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;	771	state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
765	if (net->bw_in_assigned > net->bw_in_available)	772	if (net->bw_in_assigned > net->bw_in_available)
766	{	773	{
767	state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available;	774	state[1] = 1;// net->bw_in_available;
768	}	775	}
769	else	776	else
770	{	777	{
771	state[1] = 0;	778	state[1] = 0;
772	}	779	}
773	LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]);	780	LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[0] = %f\n", state[0]);
774	LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]);	781	LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[1] = %f\n", state[1]);
775		782
776	LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);	783	LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
777	LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);	784	LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
778		785
779		786
780	//get peer features	787	//get peer features
@@ -1072,7 +1079,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent,
1072		1079
1073	if (direction_in)	1080	if (direction_in)
1074	{	1081	{
1075	new_bw = agent->bw_in + (1 * MIN_BW);	1082	new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1076	if (new_bw < agent->bw_in \|\| new_bw > GNUNET_ATS_MaxBandwidth)	1083	if (new_bw < agent->bw_in \|\| new_bw > GNUNET_ATS_MaxBandwidth)
1077	new_bw = GNUNET_ATS_MaxBandwidth;	1084	new_bw = GNUNET_ATS_MaxBandwidth;
1078	envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,	1085	envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
@@ -1080,7 +1087,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent,
1080	}	1087	}
1081	else	1088	else
1082	{	1089	{
1083	new_bw = agent->bw_out + (1 * MIN_BW);	1090	new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1084	if (new_bw < agent->bw_out \|\| new_bw > GNUNET_ATS_MaxBandwidth)	1091	if (new_bw < agent->bw_out \|\| new_bw > GNUNET_ATS_MaxBandwidth)
1085	new_bw = GNUNET_ATS_MaxBandwidth;	1092	new_bw = GNUNET_ATS_MaxBandwidth;
1086	envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,	1093	envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
@@ -1104,7 +1111,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent,
1104		1111
1105	if (direction_in)	1112	if (direction_in)
1106	{	1113	{
1107	new_bw = agent->bw_in - (1 * MIN_BW);	1114	new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1108	if (new_bw < MIN_BW \|\| new_bw > agent->bw_in)	1115	if (new_bw < MIN_BW \|\| new_bw > agent->bw_in)
1109	new_bw = MIN_BW;	1116	new_bw = MIN_BW;
1110	envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,	1117	envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
@@ -1112,7 +1119,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent,
1112	}	1119	}
1113	else	1120	else
1114	{	1121	{
1115	new_bw = agent->bw_out - (1 * MIN_BW);	1122	new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
1116	if (new_bw < MIN_BW \|\| new_bw > agent->bw_out)	1123	if (new_bw < MIN_BW \|\| new_bw > agent->bw_out)
1117	new_bw = MIN_BW;	1124	new_bw = MIN_BW;
1118	envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,	1125	envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
@@ -1220,6 +1227,7 @@ static void
1220	agent_step (struct RIL_Peer_Agent *agent)	1227	agent_step (struct RIL_Peer_Agent *agent)
1221	{	1228	{
1222	int a_next = RIL_ACTION_INVALID;	1229	int a_next = RIL_ACTION_INVALID;
		1230	int explore;
1223	double *s_next;	1231	double *s_next;
1224	double reward;	1232	double reward;
1225		1233
@@ -1229,22 +1237,12 @@ agent_step (struct RIL_Peer_Agent *agent)
1229		1237
1230	s_next = envi_get_state (agent->envi, agent);	1238	s_next = envi_get_state (agent->envi, agent);
1231	reward = envi_get_reward (agent->envi, agent);	1239	reward = envi_get_reward (agent->envi, agent);
1232		1240	explore = agent_decide_exploration (agent);
1233	GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: A: %d R: %f IN %llu OUT %llu\n",
1234	agent->step_count,
1235	agent->a_old,
1236	reward,
1237	agent->bw_in/1024,
1238	agent->bw_out/1024);
1239	GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Best A: %d Q(s,a): %f \n",
1240	agent->step_count,
1241	agent_get_action_best (agent, s_next),
1242	agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
1243		1241
1244	switch (agent->envi->parameters.algorithm)	1242	switch (agent->envi->parameters.algorithm)
1245	{	1243	{
1246	case RIL_ALGO_SARSA:	1244	case RIL_ALGO_SARSA:
1247	if (agent_decide_exploration (agent))	1245	if (explore)
1248	{	1246	{
1249	a_next = agent_get_action_explore (agent, s_next);	1247	a_next = agent_get_action_explore (agent, s_next);
1250	}	1248	}
@@ -1267,7 +1265,7 @@ agent_step (struct RIL_Peer_Agent *agent)
1267	//updates weights with best action, disregarding actually selected action (off-policy), if not first step	1265	//updates weights with best action, disregarding actually selected action (off-policy), if not first step
1268	agent_update_weights (agent, reward, s_next, a_next);	1266	agent_update_weights (agent, reward, s_next, a_next);
1269	}	1267	}
1270	if (agent_decide_exploration (agent))	1268	if (explore)
1271	{	1269	{
1272	a_next = agent_get_action_explore (agent, s_next);	1270	a_next = agent_get_action_explore (agent, s_next);
1273	agent_modify_eligibility (agent, RIL_E_ZERO, NULL);	1271	agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
@@ -1284,6 +1282,13 @@ agent_step (struct RIL_Peer_Agent *agent)
1284		1282
1285	agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);	1283	agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
1286		1284
		1285	GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n",
		1286	agent->step_count,
		1287	reward,
		1288	agent->bw_in/1024,
		1289	agent->bw_out/1024,
		1290	a_next);
		1291
1287	envi_do_action (agent->envi, agent, a_next);	1292	envi_do_action (agent->envi, agent, a_next);
1288		1293
1289	GNUNET_free(agent->s_old);	1294	GNUNET_free(agent->s_old);
@@ -1416,7 +1421,7 @@ ril_calculate_discount (struct GAS_RIL_Handle *solver)
1416	double tau;	1421	double tau;
1417		1422
1418	// MDP case - remove when debugged	1423	// MDP case - remove when debugged
1419	if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us)	1424	if (solver->simulate)
1420	{	1425	{
1421	solver->global_discount_variable = solver->parameters.gamma;	1426	solver->global_discount_variable = solver->parameters.gamma;
1422	solver->global_discount_integrated = 1;	1427	solver->global_discount_integrated = 1;
@@ -1484,6 +1489,11 @@ ril_step_schedule_next (struct GAS_RIL_Handle *solver)
1484		1489
1485	time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);	1490	time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
1486		1491
		1492	if (solver->simulate)
		1493	{
		1494	time_next = GNUNET_TIME_UNIT_ZERO;
		1495	}
		1496
1487	if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))	1497	if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
1488	{	1498	{
1489	solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,	1499	solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
@@ -1583,7 +1593,7 @@ agent_w_start (struct RIL_Peer_Agent *agent)
1583	for (k = 0; k < agent->m; k++)	1593	for (k = 0; k < agent->m; k++)
1584	{	1594	{
1585	if (0 == count) {	1595	if (0 == count) {
1586	agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);	1596	agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
1587	}	1597	}
1588	else {	1598	else {
1589	for (other = agent->envi->agents_head; NULL != other; other = other->next)	1599	for (other = agent->envi->agents_head; NULL != other; other = other->next)
@@ -1878,6 +1888,10 @@ libgnunet_plugin_ats_ril_init (void *cls)
1878	{	1888	{
1879	solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;	1889	solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
1880	}	1890	}
		1891	if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
		1892	{
		1893	solver->simulate = GNUNET_NO;
		1894	}
1881		1895
1882	env->sf.s_add = &GAS_ril_address_add;	1896	env->sf.s_add = &GAS_ril_address_add;
1883	env->sf.s_address_update_property = &GAS_ril_address_property_changed;	1897	env->sf.s_address_update_property = &GAS_ril_address_property_changed;
@@ -1905,16 +1919,16 @@ libgnunet_plugin_ats_ril_init (void *cls)
1905	cur->type = env->networks[c];	1919	cur->type = env->networks[c];
1906	cur->bw_in_available = env->in_quota[c];	1920	cur->bw_in_available = env->in_quota[c];
1907	cur->bw_out_available = env->out_quota[c];	1921	cur->bw_out_available = env->out_quota[c];
1908	LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);	1922	LOG(GNUNET_ERROR_TYPE_INFO, "init() Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
1909	}	1923	}
1910		1924
1911	LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");	1925	LOG(GNUNET_ERROR_TYPE_INFO, "init() Parameters:\n");
1912	LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",	1926	LOG(GNUNET_ERROR_TYPE_INFO, "init() Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
1913	solver->parameters.algorithm ? "Q" : "SARSA",	1927	solver->parameters.algorithm ? "Q" : "SARSA",
1914	solver->parameters.alpha,	1928	solver->parameters.alpha,
1915	solver->parameters.beta,	1929	solver->parameters.beta,
1916	solver->parameters.lambda);	1930	solver->parameters.lambda);
1917	LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n",	1931	LOG(GNUNET_ERROR_TYPE_INFO, "init() explore = %f, global_share = %f\n",
1918	solver->parameters.explore_ratio,	1932	solver->parameters.explore_ratio,
1919	solver->parameters.reward_global_share);	1933	solver->parameters.reward_global_share);
1920		1934