minor action-selection improvement

author: Fabian Oehlmann <oehlmann@in.tum.de> 2014-01-16 19:02:54 +0000
committer: Fabian Oehlmann <oehlmann@in.tum.de> 2014-01-16 19:02:54 +0000
commit: a53b100e3e326970708e62c7660f09d40aae58d7 (patch)
tree: 6f4fcb57971be629425a5922b6d0a02833936f5d /src
parent: 5650ff38f1263a52c29511673aee1c849ae1fd8e (diff)
download: gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.tar.gz
gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.zip
1 files changed, 125 insertions, 23 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c
index be42c9dc7..d29767a49 100755
--- a/src/ats/plugin_ats_ril.c
+++ b/src/ats/plugin_ats_ril.c
@@ -507,6 +507,66 @@ agent_address_get (struct RIL_Peer_Agent *agent, struct ATS_Address *address)
 }
+static int
+agent_action_is_possible (struct RIL_Peer_Agent *agent, int action)
+{
+  int address_index;
+  switch (action)
+  {
+  case RIL_ACTION_NOTHING:
+    return GNUNET_YES;
+    break;
+  case RIL_ACTION_BW_IN_INC:
+  case RIL_ACTION_BW_IN_DBL:
+    if (agent->bw_in >= RIL_MAX_BW)
+      return GNUNET_NO;
+    else
+      return GNUNET_YES;
+    break;
+  case RIL_ACTION_BW_IN_DEC:
+  case RIL_ACTION_BW_IN_HLV:
+    if (agent->bw_in <= RIL_MIN_BW)
+      return GNUNET_NO;
+    else
+      return GNUNET_YES;
+    break;
+  case RIL_ACTION_BW_OUT_INC:
+  case RIL_ACTION_BW_OUT_DBL:
+    if (agent->bw_out >= RIL_MAX_BW)
+      return GNUNET_NO;
+    else
+      return GNUNET_YES;
+    break;
+  case RIL_ACTION_BW_OUT_DEC:
+  case RIL_ACTION_BW_OUT_HLV:
+    if (agent->bw_out <= RIL_MIN_BW)
+      return GNUNET_NO;
+    else
+      return GNUNET_YES;
+    break;
+  default:
+    if ((action >= RIL_ACTION_TYPE_NUM) && (action < agent->n)) //switch address action
+    {
+      address_index = action - RIL_ACTION_TYPE_NUM;
+      GNUNET_assert(address_index >= 0);
+      GNUNET_assert(
+          address_index <= agent_address_get_index (agent, agent->addresses_tail->address_naked));
+      if ((agent_address_get_index(agent, agent->address_inuse) == address_index) ||
+          agent->address_inuse->active)
+        return GNUNET_NO;
+      else
+        return GNUNET_YES;
+      break;
+    }
+    // error - action does not exist
+    GNUNET_assert(GNUNET_NO);
+  }
+}
 /**
 * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
 * most reward in the future)
@@ -519,20 +579,20 @@ static int
 agent_get_action_max (struct RIL_Peer_Agent *agent, double *state)
 {
  int i;
-  int num_actions;
  int max_i = RIL_ACTION_INVALID;
  double cur_q;
  double max_q = -DBL_MAX;
-  num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
+  for (i = 0; i < agent->n; i++)
-  for (i = 0; i < num_actions; i++)
  {
-    cur_q = agent_estimate_q (agent, state, i);
+    if (agent_action_is_possible(agent, i))
-    if (cur_q > max_q)
    {
-      max_q = cur_q;
+      cur_q = agent_estimate_q (agent, state, i);
-      max_i = i;
+      if (cur_q > max_q)
+      {
+        max_q = cur_q;
+        max_i = i;
+      }
    }
  }
@@ -542,6 +602,44 @@ agent_get_action_max (struct RIL_Peer_Agent *agent, double *state)
 }
+static int
+agent_get_action_random (struct RIL_Peer_Agent *agent)
+{
+  int i;
+  int is_possible[agent->n];
+  int sum = 0;
+  int r;
+  for (i = 0; i<agent->n; i++)
+  {
+    if (agent_action_is_possible(agent, i))
+    {
+      is_possible[i] = GNUNET_YES;
+      sum++;
+    }
+    else
+    {
+      is_possible[i] = GNUNET_NO;
+    }
+  }
+  r = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, sum);
+  sum = -1;
+  for (i = 0; i<agent->n; i++)
+  {
+    if (is_possible[i])
+    {
+      sum++;
+      if (sum == r)
+        return i;
+    }
+  }
+  GNUNET_assert(GNUNET_NO);
+}
 /**
 * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
 *
@@ -787,7 +885,7 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
      x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor;
      d[0] = x[0]-y[0];
      d[1] = x[1]-y[1];
-      sigma = (((double) max_bw / 2) * M_SQRT2) / (double) solver->parameters.rbf_divisor;
+      sigma = (((double) max_bw / (double) solver->parameters.rbf_divisor) / 2.0) * M_SQRT2;
      f = exp(-((d[0]*d[0] + d[1]*d[1]) / (2 * sigma * sigma)));
      state[m++] = f;
    }
@@ -978,7 +1076,7 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
  if (delta != 0)
  {
-    agent->nop_bonus = abs(delta) * 0;
+    agent->nop_bonus = 0;
  }
  LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization);
@@ -1216,15 +1314,12 @@ static int
 agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state)
 {
  int action;
-  int num_actions;
  double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
        UINT32_MAX) / (double) UINT32_MAX;
-  num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
  if (r < agent->envi->parameters.explore_ratio) //explore
  {
-    action = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, num_actions);
+    action = agent_get_action_random(agent);
    if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
    {
      agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);
@@ -1257,29 +1352,36 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state)
 {
  int i;
  int a_max;
-  int num_actions;
  double eqt[agent->n];
  double p[agent->n];
  double sum = 0;
  double r;
-  num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
  a_max = agent_get_action_max(agent, state);
-  for (i=0; i<num_actions; i++)
+  for (i=0; i<agent->n; i++)
  {
-    eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);
+    if (agent_action_is_possible(agent, i))
-    sum += eqt[i];
+    {
+      eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);
+      sum += eqt[i];
+    }
  }
-  for (i=0; i<num_actions; i++)
+  for (i=0; i<agent->n; i++)
  {
-    p[i] = eqt[i]/sum;
+    if (agent_action_is_possible(agent, i))
+    {
+      p[i] = eqt[i]/sum;
+    }
+    else
+    {
+      p[i] = 0;
+    }
  }
  r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
      UINT32_MAX) / (double) UINT32_MAX;
  sum = 0;
-  for (i=0; i<num_actions; i++)
+  for (i=0; i<agent->n; i++)
  {
    if (sum + p[i] > r)
    {
author	Fabian Oehlmann <oehlmann@in.tum.de>	2014-01-16 19:02:54 +0000
committer	Fabian Oehlmann <oehlmann@in.tum.de>	2014-01-16 19:02:54 +0000
commit	a53b100e3e326970708e62c7660f09d40aae58d7 (patch)
tree	6f4fcb57971be629425a5922b6d0a02833936f5d /src
parent	5650ff38f1263a52c29511673aee1c849ae1fd8e (diff)
download	gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.tar.gz gnunet-a53b100e3e326970708e62c7660f09d40aae58d7.zip

diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c index be42c9dc7..d29767a49 100755 --- a/src/ats/plugin_ats_ril.c +++ b/src/ats/plugin_ats_ril.c
@@ -507,6 +507,66 @@ agent_address_get (struct RIL_Peer_Agent agent, struct ATS_Address address)
507	}	507	}
508		508
509		509
		510	static int
		511	agent_action_is_possible (struct RIL_Peer_Agent *agent, int action)
		512	{
		513	int address_index;
		514
		515	switch (action)
		516	{
		517	case RIL_ACTION_NOTHING:
		518	return GNUNET_YES;
		519	break;
		520	case RIL_ACTION_BW_IN_INC:
		521	case RIL_ACTION_BW_IN_DBL:
		522	if (agent->bw_in >= RIL_MAX_BW)
		523	return GNUNET_NO;
		524	else
		525	return GNUNET_YES;
		526	break;
		527	case RIL_ACTION_BW_IN_DEC:
		528	case RIL_ACTION_BW_IN_HLV:
		529	if (agent->bw_in <= RIL_MIN_BW)
		530	return GNUNET_NO;
		531	else
		532	return GNUNET_YES;
		533	break;
		534	case RIL_ACTION_BW_OUT_INC:
		535	case RIL_ACTION_BW_OUT_DBL:
		536	if (agent->bw_out >= RIL_MAX_BW)
		537	return GNUNET_NO;
		538	else
		539	return GNUNET_YES;
		540	break;
		541	case RIL_ACTION_BW_OUT_DEC:
		542	case RIL_ACTION_BW_OUT_HLV:
		543	if (agent->bw_out <= RIL_MIN_BW)
		544	return GNUNET_NO;
		545	else
		546	return GNUNET_YES;
		547	break;
		548	default:
		549	if ((action >= RIL_ACTION_TYPE_NUM) && (action < agent->n)) //switch address action
		550	{
		551	address_index = action - RIL_ACTION_TYPE_NUM;
		552
		553	GNUNET_assert(address_index >= 0);
		554	GNUNET_assert(
		555	address_index <= agent_address_get_index (agent, agent->addresses_tail->address_naked));
		556
		557	if ((agent_address_get_index(agent, agent->address_inuse) == address_index) \|\|
		558	agent->address_inuse->active)
		559	return GNUNET_NO;
		560	else
		561	return GNUNET_YES;
		562	break;
		563	}
		564	// error - action does not exist
		565	GNUNET_assert(GNUNET_NO);
		566	}
		567	}
		568
		569
510	/**	570	/**
511	* Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the	571	* Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
512	* most reward in the future)	572	* most reward in the future)
@@ -519,20 +579,20 @@ static int
519	agent_get_action_max (struct RIL_Peer_Agent agent, double state)	579	agent_get_action_max (struct RIL_Peer_Agent agent, double state)
520	{	580	{
521	int i;	581	int i;
522	int num_actions;
523	int max_i = RIL_ACTION_INVALID;	582	int max_i = RIL_ACTION_INVALID;
524	double cur_q;	583	double cur_q;
525	double max_q = -DBL_MAX;	584	double max_q = -DBL_MAX;
526		585
527	num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;	586	for (i = 0; i < agent->n; i++)
528
529	for (i = 0; i < num_actions; i++)
530	{	587	{
531	cur_q = agent_estimate_q (agent, state, i);	588	if (agent_action_is_possible(agent, i))
532	if (cur_q > max_q)
533	{	589	{
534	max_q = cur_q;	590	cur_q = agent_estimate_q (agent, state, i);
535	max_i = i;	591	if (cur_q > max_q)
		592	{
		593	max_q = cur_q;
		594	max_i = i;
		595	}
536	}	596	}
537	}	597	}
538		598
@@ -542,6 +602,44 @@ agent_get_action_max (struct RIL_Peer_Agent agent, double state)
542	}	602	}
543		603
544		604
		605	static int
		606	agent_get_action_random (struct RIL_Peer_Agent *agent)
		607	{
		608	int i;
		609	int is_possible[agent->n];
		610	int sum = 0;
		611	int r;
		612
		613	for (i = 0; i<agent->n; i++)
		614	{
		615	if (agent_action_is_possible(agent, i))
		616	{
		617	is_possible[i] = GNUNET_YES;
		618	sum++;
		619	}
		620	else
		621	{
		622	is_possible[i] = GNUNET_NO;
		623	}
		624	}
		625
		626	r = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, sum);
		627
		628	sum = -1;
		629	for (i = 0; i<agent->n; i++)
		630	{
		631	if (is_possible[i])
		632	{
		633	sum++;
		634	if (sum == r)
		635	return i;
		636	}
		637	}
		638
		639	GNUNET_assert(GNUNET_NO);
		640	}
		641
		642
545	/**	643	/**
546	* Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a	644	* Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
547	*	645	*
@@ -787,7 +885,7 @@ envi_get_state (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
787	x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor;	885	x[1] = (double) k * (double) max_bw / (double) solver->parameters.rbf_divisor;
788	d[0] = x[0]-y[0];	886	d[0] = x[0]-y[0];
789	d[1] = x[1]-y[1];	887	d[1] = x[1]-y[1];
790	sigma = (((double) max_bw / 2) * M_SQRT2) / (double) solver->parameters.rbf_divisor;	888	sigma = (((double) max_bw / (double) solver->parameters.rbf_divisor) / 2.0) * M_SQRT2;
791	f = exp(-((d[0]d[0] + d[1]d[1]) / (2 * sigma * sigma)));	889	f = exp(-((d[0]d[0] + d[1]d[1]) / (2 * sigma * sigma)));
792	state[m++] = f;	890	state[m++] = f;
793	}	891	}
@@ -978,7 +1076,7 @@ envi_get_reward (struct GAS_RIL_Handle solver, struct RIL_Peer_Agent agent)
978		1076
979	if (delta != 0)	1077	if (delta != 0)
980	{	1078	{
981	agent->nop_bonus = abs(delta) * 0;	1079	agent->nop_bonus = 0;
982	}	1080	}
983		1081
984	LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization);	1082	LOG(GNUNET_ERROR_TYPE_DEBUG, "utility: %f, welfare: %f, objective, overutilization: %d\n", agent_get_utility (agent), net->social_welfare, objective, overutilization);
@@ -1216,15 +1314,12 @@ static int
1216	agent_select_egreedy (struct RIL_Peer_Agent agent, double state)	1314	agent_select_egreedy (struct RIL_Peer_Agent agent, double state)
1217	{	1315	{
1218	int action;	1316	int action;
1219	int num_actions;
1220	double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,	1317	double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
1221	UINT32_MAX) / (double) UINT32_MAX;	1318	UINT32_MAX) / (double) UINT32_MAX;
1222		1319
1223	num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
1224
1225	if (r < agent->envi->parameters.explore_ratio) //explore	1320	if (r < agent->envi->parameters.explore_ratio) //explore
1226	{	1321	{
1227	action = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, num_actions);	1322	action = agent_get_action_random(agent);
1228	if (RIL_ALGO_Q == agent->envi->parameters.algorithm)	1323	if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1229	{	1324	{
1230	agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);	1325	agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);
@@ -1257,29 +1352,36 @@ agent_select_softmax (struct RIL_Peer_Agent agent, double state)
1257	{	1352	{
1258	int i;	1353	int i;
1259	int a_max;	1354	int a_max;
1260	int num_actions;
1261	double eqt[agent->n];	1355	double eqt[agent->n];
1262	double p[agent->n];	1356	double p[agent->n];
1263	double sum = 0;	1357	double sum = 0;
1264	double r;	1358	double r;
1265		1359
1266	num_actions = agent->address_inuse->used ? RIL_ACTION_TYPE_NUM : agent->n;
1267
1268	a_max = agent_get_action_max(agent, state);	1360	a_max = agent_get_action_max(agent, state);
1269		1361
1270	for (i=0; i<num_actions; i++)	1362	for (i=0; i<agent->n; i++)
1271	{	1363	{
1272	eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);	1364	if (agent_action_is_possible(agent, i))
1273	sum += eqt[i];	1365	{
		1366	eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);
		1367	sum += eqt[i];
		1368	}
1274	}	1369	}
1275	for (i=0; i<num_actions; i++)	1370	for (i=0; i<agent->n; i++)
1276	{	1371	{
1277	p[i] = eqt[i]/sum;	1372	if (agent_action_is_possible(agent, i))
		1373	{
		1374	p[i] = eqt[i]/sum;
		1375	}
		1376	else
		1377	{
		1378	p[i] = 0;
		1379	}
1278	}	1380	}
1279	r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,	1381	r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
1280	UINT32_MAX) / (double) UINT32_MAX;	1382	UINT32_MAX) / (double) UINT32_MAX;
1281	sum = 0;	1383	sum = 0;
1282	for (i=0; i<num_actions; i++)	1384	for (i=0; i<agent->n; i++)
1283	{	1385	{
1284	if (sum + p[i] > r)	1386	if (sum + p[i] > r)
1285	{	1387	{