aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2014-01-02 22:23:18 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2014-01-02 22:23:18 +0000
commit56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1 (patch)
tree3f892860c8bb9757224710495442188137a1b340 /src
parent5f99a37bf86289e37db785cea41bec6d799d54cd (diff)
downloadgnunet-56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1.tar.gz
gnunet-56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1.zip
eligibility traces fix
Diffstat (limited to 'src')
-rwxr-xr-xsrc/ats/plugin_ats_ril.c109
1 files changed, 63 insertions, 46 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c
index 3f15a9918..a57c2068b 100755
--- a/src/ats/plugin_ats_ril.c
+++ b/src/ats/plugin_ats_ril.c
@@ -72,7 +72,7 @@ enum RIL_Action_Type
72 RIL_ACTION_BW_OUT_HLV = -5, 72 RIL_ACTION_BW_OUT_HLV = -5,
73 RIL_ACTION_BW_OUT_INC = -6, 73 RIL_ACTION_BW_OUT_INC = -6,
74 RIL_ACTION_BW_OUT_DEC = -7, 74 RIL_ACTION_BW_OUT_DEC = -7,
75 RIL_ACTION_TYPE_NUM = 1 75 RIL_ACTION_TYPE_NUM = 2
76}; 76};
77 77
78enum RIL_Algorithm 78enum RIL_Algorithm
@@ -89,7 +89,7 @@ enum RIL_Select
89 89
90enum RIL_E_Modification 90enum RIL_E_Modification
91{ 91{
92 RIL_E_SET, 92 RIL_E_UPDATE,
93 RIL_E_ZERO, 93 RIL_E_ZERO,
94 RIL_E_ACCUMULATE, 94 RIL_E_ACCUMULATE,
95 RIL_E_REPLACE 95 RIL_E_REPLACE
@@ -240,9 +240,9 @@ struct RIL_Peer_Agent
240 int a_old; 240 int a_old;
241 241
242 /** 242 /**
243 * Eligibility trace vector 243 * Eligibility traces
244 */ 244 */
245 double * e; 245 double ** E;
246 246
247 /** 247 /**
248 * Address in use 248 * Address in use
@@ -580,45 +580,49 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
580// delta, 580// delta,
581// i, 581// i,
582// agent->e[i]); 582// agent->e[i]);
583 theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->e[i]; 583 theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->E[a_prime][i];
584 } 584 }
585} 585}
586 586
587 587
588/** 588/**
589 * Changes the eligibility trace vector e in various manners: 589 * Changes the eligibility trace vector e in various manners:
590 * #RIL_E_ACCUMULATE - adds @a f to each component as in accumulating eligibility traces 590 * #RIL_E_ACCUMULATE - adds @a feature to each component as in accumulating eligibility traces
591 * #RIL_E_REPLACE - resets each component to @a f as in replacing traces 591 * #RIL_E_REPLACE - resets each component to @a feature as in replacing traces
592 * #RIL_E_SET - multiplies e with discount factor and lambda as in the update rule 592 * #RIL_E_SET - multiplies e with discount factor and lambda as in the update rule
593 * #RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing 593 * #RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
594 * 594 *
595 * @param agent the agent handle 595 * @param agent the agent handle
596 * @param mod the kind of modification 596 * @param mod the kind of modification
597 * @param f how much to change 597 * @param feature the feature vector
598 */ 598 */
599static void 599static void
600agent_modify_eligibility (struct RIL_Peer_Agent *agent, 600agent_modify_eligibility (struct RIL_Peer_Agent *agent,
601 enum RIL_E_Modification mod, 601 enum RIL_E_Modification mod,
602 double *f) 602 double *feature,
603 int action)
603{ 604{
604 int i; 605 int i;
605 double *e = agent->e; 606 int k;
606 607
607 for (i = 0; i < agent->m; i++) 608 for (i = 0; i < agent->m; i++)
608 { 609 {
609 switch (mod) 610 switch (mod)
610 { 611 {
611 case RIL_E_ACCUMULATE: 612 case RIL_E_ACCUMULATE:
612 e[i] += f[i]; 613 agent->E[action][i] += feature[i];
613 break; 614 break;
614 case RIL_E_REPLACE: 615 case RIL_E_REPLACE:
615 e[i] = f[i]; 616 agent->E[action][i] = agent->E[action][i]+feature[i] > 1 ? 1 : agent->E[action][i]+feature[i]; //TODO? Maybe remove as only accumulating traces really apply
616 break; 617 break;
617 case RIL_E_SET: 618 case RIL_E_UPDATE:
618 e[i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda; 619 agent->E[action][i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda;
619 break; 620 break;
620 case RIL_E_ZERO: 621 case RIL_E_ZERO:
621 e[i] = 0; 622 for (k = 0; k < agent->n; k++)
623 {
624 agent->E[k][i] = 0;
625 }
622 break; 626 break;
623 } 627 }
624 } 628 }
@@ -769,7 +773,7 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
769 int i; 773 int i;
770 int k; 774 int k;
771 775
772 state = GNUNET_malloc (sizeof(agent->m)); 776 state = GNUNET_malloc (sizeof(double) * agent->m);
773 777
774 y[0] = (double) agent->bw_out; 778 y[0] = (double) agent->bw_out;
775 y[1] = (double) agent->bw_in; 779 y[1] = (double) agent->bw_in;
@@ -964,6 +968,7 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
964 968
965 unsigned long long objective; 969 unsigned long long objective;
966 970
971 LOG(GNUNET_ERROR_TYPE_INFO, "address: %x\n", agent->address_inuse);
967 net = agent->address_inuse->solver_information; 972 net = agent->address_inuse->solver_information;
968 if (net->bw_in_assigned > net->bw_in_available) 973 if (net->bw_in_assigned > net->bw_in_available)
969 { 974 {
@@ -1197,21 +1202,25 @@ envi_do_action (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int
1197static int 1202static int
1198agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) 1203agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state)
1199{ 1204{
1205 int action;
1206
1200 if (agent_decide_exploration(agent)) 1207 if (agent_decide_exploration(agent))
1201 { 1208 {
1209 action = agent_get_action_explore(agent, state);
1202 if (RIL_ALGO_Q == agent->envi->parameters.algorithm) 1210 if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1203 { 1211 {
1204 agent_modify_eligibility(agent, RIL_E_ZERO, NULL); 1212 agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);
1205 } 1213 }
1206 return agent_get_action_explore(agent, state); 1214 return action;
1207 } 1215 }
1208 else 1216 else
1209 { 1217 {
1218 action = agent_get_action_best(agent, state);
1210 if (RIL_ALGO_Q == agent->envi->parameters.algorithm) 1219 if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1211 { 1220 {
1212 agent_modify_eligibility(agent, RIL_E_SET, NULL); 1221 agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, action);
1213 } 1222 }
1214 return agent_get_action_best(agent, state); 1223 return action;
1215 } 1224 }
1216} 1225}
1217 1226
@@ -1234,11 +1243,6 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state)
1234 double sum = 0; 1243 double sum = 0;
1235 double r; 1244 double r;
1236 1245
1237 if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1238 {
1239 agent_modify_eligibility(agent, RIL_E_SET, NULL);
1240 }
1241
1242 for (i=0; i<agent->n; i++) 1246 for (i=0; i<agent->n; i++)
1243 { 1247 {
1244 eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); 1248 eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature);
@@ -1255,6 +1259,10 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state)
1255 { 1259 {
1256 if (sum + p[i] > r) 1260 if (sum + p[i] > r)
1257 { 1261 {
1262 if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
1263 {
1264 agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, i);
1265 }
1258 return i; 1266 return i;
1259 } 1267 }
1260 sum += p[i]; 1268 sum += p[i];
@@ -1307,7 +1315,7 @@ agent_step (struct RIL_Peer_Agent *agent)
1307 //updates weights with selected action (on-policy), if not first step 1315 //updates weights with selected action (on-policy), if not first step
1308 agent_update_weights (agent, reward, s_next, a_next); 1316 agent_update_weights (agent, reward, s_next, a_next);
1309 } 1317 }
1310 agent_modify_eligibility (agent, RIL_E_SET, s_next); 1318 agent_modify_eligibility (agent, RIL_E_UPDATE, s_next, a_next);
1311 break; 1319 break;
1312 1320
1313 case RIL_ALGO_Q: 1321 case RIL_ALGO_Q:
@@ -1323,7 +1331,7 @@ agent_step (struct RIL_Peer_Agent *agent)
1323 1331
1324 GNUNET_assert(RIL_ACTION_INVALID != a_next); 1332 GNUNET_assert(RIL_ACTION_INVALID != a_next);
1325 1333
1326 agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next); 1334 agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next, a_next);
1327 1335
1328// GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n", 1336// GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n",
1329// agent->step_count, 1337// agent->step_count,
@@ -1674,15 +1682,16 @@ agent_init (void *s, const struct GNUNET_PeerIdentity *peer)
1674 agent->n = RIL_ACTION_TYPE_NUM; 1682 agent->n = RIL_ACTION_TYPE_NUM;
1675 agent->m = 0; 1683 agent->m = 0;
1676 agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n); 1684 agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
1685 agent->E = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
1677 for (i = 0; i < agent->n; i++) 1686 for (i = 0; i < agent->n; i++)
1678 { 1687 {
1679 agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); 1688 agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
1689 agent->E[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
1680 } 1690 }
1681 agent_w_start(agent); 1691 agent_w_start(agent);
1682 agent->a_old = RIL_ACTION_INVALID; 1692 agent->a_old = RIL_ACTION_INVALID;
1683 agent->s_old = GNUNET_malloc (sizeof (double) * agent->m); 1693 agent->s_old = GNUNET_malloc (sizeof (double) * agent->m);
1684 agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m); 1694 agent->address_inuse = NULL;
1685 agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
1686 1695
1687 return agent; 1696 return agent;
1688} 1697}
@@ -1700,11 +1709,12 @@ agent_die (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
1700 1709
1701 for (i = 0; i < agent->n; i++) 1710 for (i = 0; i < agent->n; i++)
1702 { 1711 {
1703 GNUNET_free(agent->W[i]); 1712 GNUNET_free_non_null(agent->W[i]);
1713 GNUNET_free_non_null(agent->E[i]);
1704 } 1714 }
1705 GNUNET_free(agent->W); 1715 GNUNET_free_non_null(agent->W);
1706 GNUNET_free(agent->e); 1716 GNUNET_free_non_null(agent->E);
1707 GNUNET_free(agent->s_old); 1717 GNUNET_free_non_null(agent->s_old);
1708 GNUNET_free(agent); 1718 GNUNET_free(agent);
1709} 1719}
1710 1720
@@ -1780,7 +1790,7 @@ ril_cut_from_vector (void **old,
1780 unsigned int bytes_hole; 1790 unsigned int bytes_hole;
1781 unsigned int bytes_after; 1791 unsigned int bytes_after;
1782 1792
1783 GNUNET_assert(old_length > hole_length); 1793 GNUNET_assert(old_length >= hole_length);
1784 GNUNET_assert(old_length >= (hole_start + hole_length)); 1794 GNUNET_assert(old_length >= (hole_start + hole_length));
1785 1795
1786 size = element_size * (old_length - hole_length); 1796 size = element_size * (old_length - hole_length);
@@ -2119,17 +2129,23 @@ GAS_ril_address_add (void *solver, struct ATS_Address *address, uint32_t network
2119 n_old = agent->n; 2129 n_old = agent->n;
2120 2130
2121 GNUNET_array_grow(agent->W, agent->n, n_new); 2131 GNUNET_array_grow(agent->W, agent->n, n_new);
2132 agent->n = n_old;
2133 GNUNET_array_grow(agent->E, agent->n, n_new);
2122 for (i = 0; i < n_new; i++) 2134 for (i = 0; i < n_new; i++)
2123 { 2135 {
2124 if (i < n_old) 2136 if (i < n_old)
2125 { 2137 {
2126 agent->m = m_old; 2138 agent->m = m_old;
2127 GNUNET_array_grow(agent->W[i], agent->m, m_new); 2139 GNUNET_array_grow(agent->W[i], agent->m, m_new);
2140 agent->m = m_old;
2141 GNUNET_array_grow(agent->E[i], agent->m, m_new);
2128 } 2142 }
2129 else 2143 else
2130 { 2144 {
2131 zero = 0; 2145 zero = 0;
2132 GNUNET_array_grow(agent->W[i], zero, m_new); 2146 GNUNET_array_grow(agent->W[i], zero, m_new);
2147 zero = 0;
2148 GNUNET_array_grow(agent->E[i], zero, m_new);
2133 } 2149 }
2134 } 2150 }
2135 2151
@@ -2137,9 +2153,6 @@ GAS_ril_address_add (void *solver, struct ATS_Address *address, uint32_t network
2137 agent->m = m_old; 2153 agent->m = m_old;
2138 GNUNET_array_grow(agent->s_old, agent->m, m_new); 2154 GNUNET_array_grow(agent->s_old, agent->m, m_new);
2139 2155
2140 agent->m = m_old;
2141 GNUNET_array_grow(agent->e, agent->m, m_new);
2142
2143 ril_try_unblock_agent(s, agent, GNUNET_NO); 2156 ril_try_unblock_agent(s, agent, GNUNET_NO);
2144 2157
2145 ril_step (s); 2158 ril_step (s);
@@ -2204,18 +2217,25 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
2204 m_new = agent->m - ((s->parameters.divisor+1) * (s->parameters.divisor+1)); 2217 m_new = agent->m - ((s->parameters.divisor+1) * (s->parameters.divisor+1));
2205 n_new = agent->n - 1; 2218 n_new = agent->n - 1;
2206 2219
2207 LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
2208
2209 for (i = 0; i < agent->n; i++) 2220 for (i = 0; i < agent->n; i++)
2210 { 2221 {
2222 LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
2211 ril_cut_from_vector ((void **) &agent->W[i], sizeof(double), 2223 ril_cut_from_vector ((void **) &agent->W[i], sizeof(double),
2212 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), 2224 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
2213 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); 2225 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
2226 LOG(GNUNET_ERROR_TYPE_DEBUG, "sec\n");
2227 ril_cut_from_vector ((void **) &agent->E[i], sizeof(double),
2228 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
2229 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
2214 } 2230 }
2215 GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]); 2231 GNUNET_free_non_null(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
2216 LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n"); 2232 GNUNET_free_non_null(agent->E[RIL_ACTION_TYPE_NUM + address_index]);
2233 LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
2217 ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index, 2234 ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index,
2218 1, agent->n); 2235 1, agent->n);
2236 LOG(GNUNET_ERROR_TYPE_DEBUG, "fourth\n");
2237 ril_cut_from_vector ((void **) &agent->E, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index,
2238 1, agent->n);
2219 //correct last action 2239 //correct last action
2220 if (agent->a_old > (RIL_ACTION_TYPE_NUM + address_index)) 2240 if (agent->a_old > (RIL_ACTION_TYPE_NUM + address_index))
2221 { 2241 {
@@ -2225,14 +2245,11 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
2225 { 2245 {
2226 agent->a_old = RIL_ACTION_INVALID; 2246 agent->a_old = RIL_ACTION_INVALID;
2227 } 2247 }
2228 //decrease old state vector and eligibility vector 2248 //decrease old state vector
2229 LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n"); 2249 LOG(GNUNET_ERROR_TYPE_DEBUG, "fifth\n");
2230 ril_cut_from_vector ((void **) &agent->s_old, sizeof(double), 2250 ril_cut_from_vector ((void **) &agent->s_old, sizeof(double),
2231 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), 2251 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
2232 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); 2252 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
2233 ril_cut_from_vector ((void **) &agent->e, sizeof(double),
2234 address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
2235 ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
2236 agent->m = m_new; 2253 agent->m = m_new;
2237 agent->n = n_new; 2254 agent->n = n_new;
2238 2255