diff options
author | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-02 22:23:18 +0000 |
---|---|---|
committer | Fabian Oehlmann <oehlmann@in.tum.de> | 2014-01-02 22:23:18 +0000 |
commit | 56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1 (patch) | |
tree | 3f892860c8bb9757224710495442188137a1b340 /src | |
parent | 5f99a37bf86289e37db785cea41bec6d799d54cd (diff) | |
download | gnunet-56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1.tar.gz gnunet-56fd299ee48c317fbe5731cb1ec8c02ef6fb45f1.zip |
eligibility traces fix
Diffstat (limited to 'src')
-rwxr-xr-x | src/ats/plugin_ats_ril.c | 109 |
1 files changed, 63 insertions, 46 deletions
diff --git a/src/ats/plugin_ats_ril.c b/src/ats/plugin_ats_ril.c index 3f15a9918..a57c2068b 100755 --- a/src/ats/plugin_ats_ril.c +++ b/src/ats/plugin_ats_ril.c | |||
@@ -72,7 +72,7 @@ enum RIL_Action_Type | |||
72 | RIL_ACTION_BW_OUT_HLV = -5, | 72 | RIL_ACTION_BW_OUT_HLV = -5, |
73 | RIL_ACTION_BW_OUT_INC = -6, | 73 | RIL_ACTION_BW_OUT_INC = -6, |
74 | RIL_ACTION_BW_OUT_DEC = -7, | 74 | RIL_ACTION_BW_OUT_DEC = -7, |
75 | RIL_ACTION_TYPE_NUM = 1 | 75 | RIL_ACTION_TYPE_NUM = 2 |
76 | }; | 76 | }; |
77 | 77 | ||
78 | enum RIL_Algorithm | 78 | enum RIL_Algorithm |
@@ -89,7 +89,7 @@ enum RIL_Select | |||
89 | 89 | ||
90 | enum RIL_E_Modification | 90 | enum RIL_E_Modification |
91 | { | 91 | { |
92 | RIL_E_SET, | 92 | RIL_E_UPDATE, |
93 | RIL_E_ZERO, | 93 | RIL_E_ZERO, |
94 | RIL_E_ACCUMULATE, | 94 | RIL_E_ACCUMULATE, |
95 | RIL_E_REPLACE | 95 | RIL_E_REPLACE |
@@ -240,9 +240,9 @@ struct RIL_Peer_Agent | |||
240 | int a_old; | 240 | int a_old; |
241 | 241 | ||
242 | /** | 242 | /** |
243 | * Eligibility trace vector | 243 | * Eligibility traces |
244 | */ | 244 | */ |
245 | double * e; | 245 | double ** E; |
246 | 246 | ||
247 | /** | 247 | /** |
248 | * Address in use | 248 | * Address in use |
@@ -580,45 +580,49 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex | |||
580 | // delta, | 580 | // delta, |
581 | // i, | 581 | // i, |
582 | // agent->e[i]); | 582 | // agent->e[i]); |
583 | theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->e[i]; | 583 | theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->E[a_prime][i]; |
584 | } | 584 | } |
585 | } | 585 | } |
586 | 586 | ||
587 | 587 | ||
588 | /** | 588 | /** |
589 | * Changes the eligibility trace vector e in various manners: | 589 | * Changes the eligibility trace vector e in various manners: |
590 | * #RIL_E_ACCUMULATE - adds @a f to each component as in accumulating eligibility traces | 590 | * #RIL_E_ACCUMULATE - adds @a feature to each component as in accumulating eligibility traces |
591 | * #RIL_E_REPLACE - resets each component to @a f as in replacing traces | 591 | * #RIL_E_REPLACE - resets each component to @a feature as in replacing traces |
592 | * #RIL_E_SET - multiplies e with discount factor and lambda as in the update rule | 592 | * #RIL_E_SET - multiplies e with discount factor and lambda as in the update rule |
593 | * #RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing | 593 | * #RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing |
594 | * | 594 | * |
595 | * @param agent the agent handle | 595 | * @param agent the agent handle |
596 | * @param mod the kind of modification | 596 | * @param mod the kind of modification |
597 | * @param f how much to change | 597 | * @param feature the feature vector |
598 | */ | 598 | */ |
599 | static void | 599 | static void |
600 | agent_modify_eligibility (struct RIL_Peer_Agent *agent, | 600 | agent_modify_eligibility (struct RIL_Peer_Agent *agent, |
601 | enum RIL_E_Modification mod, | 601 | enum RIL_E_Modification mod, |
602 | double *f) | 602 | double *feature, |
603 | int action) | ||
603 | { | 604 | { |
604 | int i; | 605 | int i; |
605 | double *e = agent->e; | 606 | int k; |
606 | 607 | ||
607 | for (i = 0; i < agent->m; i++) | 608 | for (i = 0; i < agent->m; i++) |
608 | { | 609 | { |
609 | switch (mod) | 610 | switch (mod) |
610 | { | 611 | { |
611 | case RIL_E_ACCUMULATE: | 612 | case RIL_E_ACCUMULATE: |
612 | e[i] += f[i]; | 613 | agent->E[action][i] += feature[i]; |
613 | break; | 614 | break; |
614 | case RIL_E_REPLACE: | 615 | case RIL_E_REPLACE: |
615 | e[i] = f[i]; | 616 | agent->E[action][i] = agent->E[action][i]+feature[i] > 1 ? 1 : agent->E[action][i]+feature[i]; //TODO? Maybe remove as only accumulating traces really apply |
616 | break; | 617 | break; |
617 | case RIL_E_SET: | 618 | case RIL_E_UPDATE: |
618 | e[i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda; | 619 | agent->E[action][i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda; |
619 | break; | 620 | break; |
620 | case RIL_E_ZERO: | 621 | case RIL_E_ZERO: |
621 | e[i] = 0; | 622 | for (k = 0; k < agent->n; k++) |
623 | { | ||
624 | agent->E[k][i] = 0; | ||
625 | } | ||
622 | break; | 626 | break; |
623 | } | 627 | } |
624 | } | 628 | } |
@@ -769,7 +773,7 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
769 | int i; | 773 | int i; |
770 | int k; | 774 | int k; |
771 | 775 | ||
772 | state = GNUNET_malloc (sizeof(agent->m)); | 776 | state = GNUNET_malloc (sizeof(double) * agent->m); |
773 | 777 | ||
774 | y[0] = (double) agent->bw_out; | 778 | y[0] = (double) agent->bw_out; |
775 | y[1] = (double) agent->bw_in; | 779 | y[1] = (double) agent->bw_in; |
@@ -964,6 +968,7 @@ envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
964 | 968 | ||
965 | unsigned long long objective; | 969 | unsigned long long objective; |
966 | 970 | ||
971 | LOG(GNUNET_ERROR_TYPE_INFO, "address: %x\n", agent->address_inuse); | ||
967 | net = agent->address_inuse->solver_information; | 972 | net = agent->address_inuse->solver_information; |
968 | if (net->bw_in_assigned > net->bw_in_available) | 973 | if (net->bw_in_assigned > net->bw_in_available) |
969 | { | 974 | { |
@@ -1197,21 +1202,25 @@ envi_do_action (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int | |||
1197 | static int | 1202 | static int |
1198 | agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) | 1203 | agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state) |
1199 | { | 1204 | { |
1205 | int action; | ||
1206 | |||
1200 | if (agent_decide_exploration(agent)) | 1207 | if (agent_decide_exploration(agent)) |
1201 | { | 1208 | { |
1209 | action = agent_get_action_explore(agent, state); | ||
1202 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) | 1210 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) |
1203 | { | 1211 | { |
1204 | agent_modify_eligibility(agent, RIL_E_ZERO, NULL); | 1212 | agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action); |
1205 | } | 1213 | } |
1206 | return agent_get_action_explore(agent, state); | 1214 | return action; |
1207 | } | 1215 | } |
1208 | else | 1216 | else |
1209 | { | 1217 | { |
1218 | action = agent_get_action_best(agent, state); | ||
1210 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) | 1219 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) |
1211 | { | 1220 | { |
1212 | agent_modify_eligibility(agent, RIL_E_SET, NULL); | 1221 | agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, action); |
1213 | } | 1222 | } |
1214 | return agent_get_action_best(agent, state); | 1223 | return action; |
1215 | } | 1224 | } |
1216 | } | 1225 | } |
1217 | 1226 | ||
@@ -1234,11 +1243,6 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state) | |||
1234 | double sum = 0; | 1243 | double sum = 0; |
1235 | double r; | 1244 | double r; |
1236 | 1245 | ||
1237 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) | ||
1238 | { | ||
1239 | agent_modify_eligibility(agent, RIL_E_SET, NULL); | ||
1240 | } | ||
1241 | |||
1242 | for (i=0; i<agent->n; i++) | 1246 | for (i=0; i<agent->n; i++) |
1243 | { | 1247 | { |
1244 | eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); | 1248 | eqt[i] = exp(agent_estimate_q(agent,state,i) / agent->envi->parameters.temperature); |
@@ -1255,6 +1259,10 @@ agent_select_softmax (struct RIL_Peer_Agent *agent, double *state) | |||
1255 | { | 1259 | { |
1256 | if (sum + p[i] > r) | 1260 | if (sum + p[i] > r) |
1257 | { | 1261 | { |
1262 | if (RIL_ALGO_Q == agent->envi->parameters.algorithm) | ||
1263 | { | ||
1264 | agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, i); | ||
1265 | } | ||
1258 | return i; | 1266 | return i; |
1259 | } | 1267 | } |
1260 | sum += p[i]; | 1268 | sum += p[i]; |
@@ -1307,7 +1315,7 @@ agent_step (struct RIL_Peer_Agent *agent) | |||
1307 | //updates weights with selected action (on-policy), if not first step | 1315 | //updates weights with selected action (on-policy), if not first step |
1308 | agent_update_weights (agent, reward, s_next, a_next); | 1316 | agent_update_weights (agent, reward, s_next, a_next); |
1309 | } | 1317 | } |
1310 | agent_modify_eligibility (agent, RIL_E_SET, s_next); | 1318 | agent_modify_eligibility (agent, RIL_E_UPDATE, s_next, a_next); |
1311 | break; | 1319 | break; |
1312 | 1320 | ||
1313 | case RIL_ALGO_Q: | 1321 | case RIL_ALGO_Q: |
@@ -1323,7 +1331,7 @@ agent_step (struct RIL_Peer_Agent *agent) | |||
1323 | 1331 | ||
1324 | GNUNET_assert(RIL_ACTION_INVALID != a_next); | 1332 | GNUNET_assert(RIL_ACTION_INVALID != a_next); |
1325 | 1333 | ||
1326 | agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next); | 1334 | agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next, a_next); |
1327 | 1335 | ||
1328 | // GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n", | 1336 | // GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n", |
1329 | // agent->step_count, | 1337 | // agent->step_count, |
@@ -1674,15 +1682,16 @@ agent_init (void *s, const struct GNUNET_PeerIdentity *peer) | |||
1674 | agent->n = RIL_ACTION_TYPE_NUM; | 1682 | agent->n = RIL_ACTION_TYPE_NUM; |
1675 | agent->m = 0; | 1683 | agent->m = 0; |
1676 | agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n); | 1684 | agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n); |
1685 | agent->E = (double **) GNUNET_malloc (sizeof (double *) * agent->n); | ||
1677 | for (i = 0; i < agent->n; i++) | 1686 | for (i = 0; i < agent->n; i++) |
1678 | { | 1687 | { |
1679 | agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); | 1688 | agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); |
1689 | agent->E[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); | ||
1680 | } | 1690 | } |
1681 | agent_w_start(agent); | 1691 | agent_w_start(agent); |
1682 | agent->a_old = RIL_ACTION_INVALID; | 1692 | agent->a_old = RIL_ACTION_INVALID; |
1683 | agent->s_old = GNUNET_malloc (sizeof (double) * agent->m); | 1693 | agent->s_old = GNUNET_malloc (sizeof (double) * agent->m); |
1684 | agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m); | 1694 | agent->address_inuse = NULL; |
1685 | agent_modify_eligibility (agent, RIL_E_ZERO, NULL); | ||
1686 | 1695 | ||
1687 | return agent; | 1696 | return agent; |
1688 | } | 1697 | } |
@@ -1700,11 +1709,12 @@ agent_die (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) | |||
1700 | 1709 | ||
1701 | for (i = 0; i < agent->n; i++) | 1710 | for (i = 0; i < agent->n; i++) |
1702 | { | 1711 | { |
1703 | GNUNET_free(agent->W[i]); | 1712 | GNUNET_free_non_null(agent->W[i]); |
1713 | GNUNET_free_non_null(agent->E[i]); | ||
1704 | } | 1714 | } |
1705 | GNUNET_free(agent->W); | 1715 | GNUNET_free_non_null(agent->W); |
1706 | GNUNET_free(agent->e); | 1716 | GNUNET_free_non_null(agent->E); |
1707 | GNUNET_free(agent->s_old); | 1717 | GNUNET_free_non_null(agent->s_old); |
1708 | GNUNET_free(agent); | 1718 | GNUNET_free(agent); |
1709 | } | 1719 | } |
1710 | 1720 | ||
@@ -1780,7 +1790,7 @@ ril_cut_from_vector (void **old, | |||
1780 | unsigned int bytes_hole; | 1790 | unsigned int bytes_hole; |
1781 | unsigned int bytes_after; | 1791 | unsigned int bytes_after; |
1782 | 1792 | ||
1783 | GNUNET_assert(old_length > hole_length); | 1793 | GNUNET_assert(old_length >= hole_length); |
1784 | GNUNET_assert(old_length >= (hole_start + hole_length)); | 1794 | GNUNET_assert(old_length >= (hole_start + hole_length)); |
1785 | 1795 | ||
1786 | size = element_size * (old_length - hole_length); | 1796 | size = element_size * (old_length - hole_length); |
@@ -2119,17 +2129,23 @@ GAS_ril_address_add (void *solver, struct ATS_Address *address, uint32_t network | |||
2119 | n_old = agent->n; | 2129 | n_old = agent->n; |
2120 | 2130 | ||
2121 | GNUNET_array_grow(agent->W, agent->n, n_new); | 2131 | GNUNET_array_grow(agent->W, agent->n, n_new); |
2132 | agent->n = n_old; | ||
2133 | GNUNET_array_grow(agent->E, agent->n, n_new); | ||
2122 | for (i = 0; i < n_new; i++) | 2134 | for (i = 0; i < n_new; i++) |
2123 | { | 2135 | { |
2124 | if (i < n_old) | 2136 | if (i < n_old) |
2125 | { | 2137 | { |
2126 | agent->m = m_old; | 2138 | agent->m = m_old; |
2127 | GNUNET_array_grow(agent->W[i], agent->m, m_new); | 2139 | GNUNET_array_grow(agent->W[i], agent->m, m_new); |
2140 | agent->m = m_old; | ||
2141 | GNUNET_array_grow(agent->E[i], agent->m, m_new); | ||
2128 | } | 2142 | } |
2129 | else | 2143 | else |
2130 | { | 2144 | { |
2131 | zero = 0; | 2145 | zero = 0; |
2132 | GNUNET_array_grow(agent->W[i], zero, m_new); | 2146 | GNUNET_array_grow(agent->W[i], zero, m_new); |
2147 | zero = 0; | ||
2148 | GNUNET_array_grow(agent->E[i], zero, m_new); | ||
2133 | } | 2149 | } |
2134 | } | 2150 | } |
2135 | 2151 | ||
@@ -2137,9 +2153,6 @@ GAS_ril_address_add (void *solver, struct ATS_Address *address, uint32_t network | |||
2137 | agent->m = m_old; | 2153 | agent->m = m_old; |
2138 | GNUNET_array_grow(agent->s_old, agent->m, m_new); | 2154 | GNUNET_array_grow(agent->s_old, agent->m, m_new); |
2139 | 2155 | ||
2140 | agent->m = m_old; | ||
2141 | GNUNET_array_grow(agent->e, agent->m, m_new); | ||
2142 | |||
2143 | ril_try_unblock_agent(s, agent, GNUNET_NO); | 2156 | ril_try_unblock_agent(s, agent, GNUNET_NO); |
2144 | 2157 | ||
2145 | ril_step (s); | 2158 | ril_step (s); |
@@ -2204,18 +2217,25 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o | |||
2204 | m_new = agent->m - ((s->parameters.divisor+1) * (s->parameters.divisor+1)); | 2217 | m_new = agent->m - ((s->parameters.divisor+1) * (s->parameters.divisor+1)); |
2205 | n_new = agent->n - 1; | 2218 | n_new = agent->n - 1; |
2206 | 2219 | ||
2207 | LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n"); | ||
2208 | |||
2209 | for (i = 0; i < agent->n; i++) | 2220 | for (i = 0; i < agent->n; i++) |
2210 | { | 2221 | { |
2222 | LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n"); | ||
2211 | ril_cut_from_vector ((void **) &agent->W[i], sizeof(double), | 2223 | ril_cut_from_vector ((void **) &agent->W[i], sizeof(double), |
2212 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), | 2224 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), |
2213 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); | 2225 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); |
2226 | LOG(GNUNET_ERROR_TYPE_DEBUG, "sec\n"); | ||
2227 | ril_cut_from_vector ((void **) &agent->E[i], sizeof(double), | ||
2228 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), | ||
2229 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); | ||
2214 | } | 2230 | } |
2215 | GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]); | 2231 | GNUNET_free_non_null(agent->W[RIL_ACTION_TYPE_NUM + address_index]); |
2216 | LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n"); | 2232 | GNUNET_free_non_null(agent->E[RIL_ACTION_TYPE_NUM + address_index]); |
2233 | LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n"); | ||
2217 | ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index, | 2234 | ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index, |
2218 | 1, agent->n); | 2235 | 1, agent->n); |
2236 | LOG(GNUNET_ERROR_TYPE_DEBUG, "fourth\n"); | ||
2237 | ril_cut_from_vector ((void **) &agent->E, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index, | ||
2238 | 1, agent->n); | ||
2219 | //correct last action | 2239 | //correct last action |
2220 | if (agent->a_old > (RIL_ACTION_TYPE_NUM + address_index)) | 2240 | if (agent->a_old > (RIL_ACTION_TYPE_NUM + address_index)) |
2221 | { | 2241 | { |
@@ -2225,14 +2245,11 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o | |||
2225 | { | 2245 | { |
2226 | agent->a_old = RIL_ACTION_INVALID; | 2246 | agent->a_old = RIL_ACTION_INVALID; |
2227 | } | 2247 | } |
2228 | //decrease old state vector and eligibility vector | 2248 | //decrease old state vector |
2229 | LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n"); | 2249 | LOG(GNUNET_ERROR_TYPE_DEBUG, "fifth\n"); |
2230 | ril_cut_from_vector ((void **) &agent->s_old, sizeof(double), | 2250 | ril_cut_from_vector ((void **) &agent->s_old, sizeof(double), |
2231 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), | 2251 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), |
2232 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); | 2252 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); |
2233 | ril_cut_from_vector ((void **) &agent->e, sizeof(double), | ||
2234 | address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)), | ||
2235 | ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m); | ||
2236 | agent->m = m_new; | 2253 | agent->m = m_new; |
2237 | agent->n = n_new; | 2254 | agent->n = n_new; |
2238 | 2255 | ||