diff options
Diffstat (limited to 'src/ats')
-rwxr-xr-x | src/ats/gnunet-service-ats-solver_ril.c | 310 |
1 files changed, 257 insertions, 53 deletions
diff --git a/src/ats/gnunet-service-ats-solver_ril.c b/src/ats/gnunet-service-ats-solver_ril.c index d0524292e..0bf74af17 100755 --- a/src/ats/gnunet-service-ats-solver_ril.c +++ b/src/ats/gnunet-service-ats-solver_ril.c | |||
@@ -25,6 +25,7 @@ | |||
25 | * @author Matthias Wachs | 25 | * @author Matthias Wachs |
26 | */ | 26 | */ |
27 | #include "platform.h" | 27 | #include "platform.h" |
28 | #include "float.h" | ||
28 | #include "gnunet_util_lib.h" | 29 | #include "gnunet_util_lib.h" |
29 | #include "gnunet-service-ats_addresses.h" | 30 | #include "gnunet-service-ats_addresses.h" |
30 | #include "gnunet_statistics_service.h" | 31 | #include "gnunet_statistics_service.h" |
@@ -33,6 +34,7 @@ | |||
33 | #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5 | 34 | #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5 |
34 | #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4 | 35 | #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4 |
35 | #define RIL_DEFAULT_TRACE_DECAY 0.6 | 36 | #define RIL_DEFAULT_TRACE_DECAY 0.6 |
37 | #define RIL_EXPLORE_RATIO 0.1 | ||
36 | 38 | ||
37 | /** | 39 | /** |
38 | * ATS reinforcement learning solver | 40 | * ATS reinforcement learning solver |
@@ -40,6 +42,14 @@ | |||
40 | * General description | 42 | * General description |
41 | */ | 43 | */ |
42 | 44 | ||
45 | enum RIL_Action | ||
46 | { | ||
47 | RIL_BW_DBL = 0, | ||
48 | RIL_BW_HLV = 1, | ||
49 | RIL_NUM_ACTIONS = 2 | ||
50 | }; | ||
51 | //TODO add the rest of the actions | ||
52 | |||
43 | /** | 53 | /** |
44 | * Global learning parameters | 54 | * Global learning parameters |
45 | */ | 55 | */ |
@@ -74,29 +84,59 @@ struct RIL_Peer_Agent | |||
74 | struct RIL_Peer_Agent *prev; | 84 | struct RIL_Peer_Agent *prev; |
75 | 85 | ||
76 | /** | 86 | /** |
87 | * Environment handle | ||
88 | */ | ||
89 | struct GAS_RIL_Handle *envi; | ||
90 | |||
91 | /** | ||
77 | * Peer ID | 92 | * Peer ID |
78 | */ | 93 | */ |
79 | struct GNUNET_PeerIdentity peer; | 94 | struct GNUNET_PeerIdentity peer; |
80 | 95 | ||
81 | /** | 96 | /** |
97 | * Whether the agent is active or not | ||
98 | */ | ||
99 | int active; | ||
100 | |||
101 | /** | ||
102 | * Number of performed time-steps | ||
103 | */ | ||
104 | unsigned long long step_count; | ||
105 | |||
106 | /** | ||
82 | * Experience matrix W | 107 | * Experience matrix W |
83 | */ | 108 | */ |
84 | double ** W; | 109 | double ** W; |
85 | 110 | ||
86 | /** | 111 | /** |
112 | * Number of rows of W / Number of state-vector features | ||
113 | */ | ||
114 | int m; | ||
115 | |||
116 | /** | ||
117 | * Number of columns of W / Number of actions | ||
118 | */ | ||
119 | int n; | ||
120 | |||
121 | /** | ||
87 | * Last perceived state feature vector | 122 | * Last perceived state feature vector |
88 | */ | 123 | */ |
89 | double * s_t; | 124 | double * s_old; |
90 | 125 | ||
91 | /** | 126 | /** |
92 | * Last chosen action | 127 | * Last chosen action |
93 | */ | 128 | */ |
94 | double * a_t; | 129 | int a_old; |
95 | 130 | ||
96 | /** | 131 | /** |
97 | * Last eligibility trace vector | 132 | * Last eligibility trace vector |
98 | */ | 133 | */ |
99 | double * e_t; | 134 | double * e_t; |
135 | |||
136 | /** | ||
137 | * Address in use | ||
138 | */ | ||
139 | struct ATS_Address * address; | ||
100 | }; | 140 | }; |
101 | 141 | ||
102 | struct RIL_Network | 142 | struct RIL_Network |
@@ -228,66 +268,225 @@ struct GAS_RIL_Handle | |||
228 | /** | 268 | /** |
229 | * List of active peer-agents | 269 | * List of active peer-agents |
230 | */ | 270 | */ |
231 | struct RIL_Peer_Agent * agents_active_head; | 271 | struct RIL_Peer_Agent * agents_head; |
232 | struct RIL_Peer_Agent * agents_active_tail; | 272 | struct RIL_Peer_Agent * agents_tail; |
233 | |||
234 | /** | ||
235 | * List of paused peer-agents | ||
236 | */ | ||
237 | struct RIL_Peer_Agent * agents_paused_head; | ||
238 | struct RIL_Peer_Agent * agents_paused_tail; | ||
239 | }; | 273 | }; |
240 | 274 | ||
241 | 275 | ||
242 | enum Actions | 276 | |
243 | { | ||
244 | bw_dbl, | ||
245 | bw_hlv | ||
246 | }; | ||
247 | //TODO add the rest of the actions | ||
248 | 277 | ||
249 | /** | 278 | /** |
250 | * Private functions | 279 | * Private functions |
251 | * --------------------------- | 280 | * --------------------------- |
252 | */ | 281 | */ |
253 | 282 | ||
283 | /** | ||
284 | * Estimate the current action-value for state s and action a | ||
285 | * @param agent agent performing the estimation | ||
286 | * @param state s | ||
287 | * @param action a | ||
288 | * @return estimation value | ||
289 | */ | ||
290 | double | ||
291 | agent_estimate_q (struct RIL_Peer_Agent *agent, | ||
292 | double *state, | ||
293 | int action) | ||
294 | { | ||
295 | int i; | ||
296 | double result = 0; | ||
297 | |||
298 | for (i = 0; i < agent->m; i++) | ||
299 | { | ||
300 | result += state[i] * (agent->W)[agent->m][action]; | ||
301 | } | ||
302 | |||
303 | return result; | ||
304 | } | ||
305 | |||
306 | int | ||
307 | agent_choose_action (struct RIL_Peer_Agent *agent, | ||
308 | double *state) | ||
309 | { | ||
310 | int i; | ||
311 | int max_i = -1; | ||
312 | double r; | ||
313 | double cur_q; | ||
314 | double max_q = DBL_MIN; | ||
315 | |||
316 | r = ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX); | ||
317 | |||
318 | if (r < RIL_EXPLORE_RATIO) | ||
319 | { | ||
320 | return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n); | ||
321 | } | ||
322 | |||
323 | for (i = 0; i < agent->m; i++) | ||
324 | { | ||
325 | cur_q = agent_estimate_q (agent, state, i); | ||
326 | if (cur_q > max_q) | ||
327 | { | ||
328 | max_q = cur_q; | ||
329 | max_i = i; | ||
330 | } | ||
331 | } | ||
332 | |||
333 | GNUNET_assert(-1 != max_i); | ||
334 | |||
335 | return max_i; | ||
336 | } | ||
337 | |||
338 | double * | ||
339 | envi_get_state (void *s) | ||
340 | { | ||
341 | int i; | ||
342 | struct GAS_RIL_Handle *solver = s; | ||
343 | struct RIL_Network *net; | ||
344 | double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4); | ||
345 | |||
346 | for (i = 0; i < solver->networks_count; i += 4) | ||
347 | { | ||
348 | net = (&solver->network_entries)[i]; | ||
349 | state[i] = (double) net->bw_in_assigned; | ||
350 | state[i+1] = (double) net->bw_in_available; | ||
351 | state[i+2] = (double) net->bw_out_assigned; | ||
352 | state[i+3] = (double) net->bw_out_available; | ||
353 | } | ||
354 | |||
355 | return state; | ||
356 | } | ||
357 | |||
358 | double | ||
359 | envi_get_reward () | ||
360 | { | ||
361 | //TODO implement | ||
362 | return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX; | ||
363 | } | ||
364 | |||
254 | void | 365 | void |
255 | agent_periodic_step (void *solver, | 366 | agent_step (struct RIL_Peer_Agent *agent) |
367 | { | ||
368 | int a_next; | ||
369 | double *s_next; | ||
370 | double reward; | ||
371 | double delta; | ||
372 | double q_next; | ||
373 | |||
374 | |||
375 | s_next = envi_get_state(agent->envi); | ||
376 | reward = envi_get_reward(); | ||
377 | |||
378 | a_next = agent_choose_action (agent, s_next); | ||
379 | q_next = agent_estimate_q(agent, s_next, a_next); | ||
380 | |||
381 | if (NULL != agent->s_old) | ||
382 | { | ||
383 | delta = reward + | ||
384 | (agent->envi->parameters.gamma * q_next) - | ||
385 | agent_estimate_q(agent, agent->s_old, agent->a_old); | ||
386 | } | ||
387 | |||
388 | GNUNET_free(agent->s_old); | ||
389 | agent->s_old = s_next; | ||
390 | agent->a_old = a_next; | ||
391 | |||
392 | agent->step_count += 1; | ||
393 | } | ||
394 | |||
395 | void | ||
396 | ril_periodic_step (void *s, | ||
256 | const struct GNUNET_SCHEDULER_TaskContext *tc) | 397 | const struct GNUNET_SCHEDULER_TaskContext *tc) |
257 | { | 398 | { |
258 | /* | 399 | /* |
259 | * iterate over active agents and do a time step | 400 | * iterate over active agents and do a time step |
260 | */ | 401 | */ |
261 | struct GAS_RIL_Handle *s = solver; | 402 | struct GAS_RIL_Handle *solver = s; |
403 | struct RIL_Peer_Agent *cur; | ||
262 | 404 | ||
263 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", s->step_count); | 405 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count); |
264 | 406 | ||
265 | s->step_count += 1; | 407 | for (cur = solver->agents_head; NULL != cur; cur = cur->next) |
266 | s->next_step = GNUNET_SCHEDULER_add_delayed ( | 408 | { |
267 | s->step_time, | 409 | if (cur->active) |
268 | &agent_periodic_step, | 410 | { |
411 | agent_step (cur); | ||
412 | } | ||
413 | } | ||
414 | |||
415 | solver->step_count += 1; | ||
416 | solver->next_step = GNUNET_SCHEDULER_add_delayed ( | ||
417 | solver->step_time, | ||
418 | &ril_periodic_step, | ||
269 | solver); | 419 | solver); |
270 | } | 420 | } |
271 | 421 | ||
272 | /** | 422 | /** |
273 | * Whether a peer already has an agent in a list | 423 | * Initialize an agent without addresses and its knowledge base |
274 | * @param head of list | 424 | * @param s ril solver |
275 | * @param peer in question | 425 | * @param peer the one in question |
426 | * @return handle to the new agent | ||
276 | */ | 427 | */ |
277 | int | 428 | struct RIL_Peer_Agent * |
278 | list_contains_agent (struct RIL_Peer_Agent * head, | 429 | agent_init (void *s, |
279 | struct GNUNET_PeerIdentity * peer) | 430 | struct GNUNET_PeerIdentity peer) |
280 | { | 431 | { |
432 | int i; | ||
433 | struct GAS_RIL_Handle * solver = s; | ||
434 | struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent)); | ||
435 | |||
436 | agent->envi = solver; | ||
437 | agent->peer = peer; | ||
438 | agent->step_count = 0; | ||
439 | agent->active = GNUNET_NO; | ||
440 | agent->s_old = NULL; | ||
441 | agent->n = solver->networks_count * 4; | ||
442 | agent->m = RIL_NUM_ACTIONS; | ||
443 | agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n); | ||
444 | for (i = 0; i < agent->n; i++) | ||
445 | { | ||
446 | (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); | ||
447 | } | ||
448 | agent->a_old = -1; | ||
449 | agent->e_t = NULL; | ||
450 | |||
451 | GNUNET_CONTAINER_DLL_insert (solver->agents_head, solver->agents_tail, agent); | ||
452 | |||
453 | return agent; | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * Deallocate agent | ||
458 | * @param s solver handle | ||
459 | * @param agent the agent to retire | ||
460 | */ | ||
461 | void | ||
462 | agent_die (void *s, | ||
463 | struct RIL_Peer_Agent * agent) | ||
464 | { | ||
465 | |||
466 | } | ||
467 | |||
468 | /** | ||
469 | * Returns the agent for a peer | ||
470 | * @param s solver handle | ||
471 | * @param peer identity of the peer | ||
472 | * @return agent | ||
473 | */ | ||
474 | struct RIL_Peer_Agent * | ||
475 | ril_get_agent (struct GAS_RIL_Handle * s, | ||
476 | struct GNUNET_PeerIdentity peer) | ||
477 | { | ||
478 | struct GAS_RIL_Handle * solver = s; | ||
281 | struct RIL_Peer_Agent * cur; | 479 | struct RIL_Peer_Agent * cur; |
282 | 480 | ||
283 | for (cur = head; NULL != cur; cur = cur->next) | 481 | for (cur = s->agents_head; NULL != cur; cur = cur->next) |
284 | { | 482 | { |
285 | if (!memcmp (&(cur->peer.hashPubKey), &peer->hashPubKey, sizeof(struct GNUNET_HashCode))) | 483 | if (0 == GNUNET_CRYPTO_hash_cmp (&peer.hashPubKey, &cur->peer.hashPubKey)) |
286 | { | 484 | { |
287 | return GNUNET_YES; | 485 | return cur; |
288 | } | 486 | } |
289 | } | 487 | } |
290 | return GNUNET_NO; | 488 | |
489 | return agent_init (solver, peer); | ||
291 | } | 490 | } |
292 | 491 | ||
293 | /** | 492 | /** |
@@ -307,14 +506,15 @@ init_agents_it (void *cls, | |||
307 | struct ATS_Address *address = value; | 506 | struct ATS_Address *address = value; |
308 | struct RIL_Peer_Agent *agent; | 507 | struct RIL_Peer_Agent *agent; |
309 | 508 | ||
310 | if (!list_contains_agent (solver->agents_paused_head, &address->peer)) | 509 | agent = ril_get_agent (solver, address->peer); |
510 | |||
511 | GNUNET_assert (agent != NULL); | ||
512 | |||
513 | if (NULL == agent->address) | ||
311 | { | 514 | { |
312 | agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent)); | 515 | agent->address = address; |
313 | agent->peer = address->peer; | ||
314 | GNUNET_CONTAINER_DLL_insert (solver->agents_paused_head, solver->agents_paused_tail, agent); | ||
315 | } | 516 | } |
316 | 517 | ||
317 | //TODO add address to agent | ||
318 | return GNUNET_YES; | 518 | return GNUNET_YES; |
319 | } | 519 | } |
320 | 520 | ||
@@ -339,13 +539,14 @@ GAS_ril_address_change_preference (void *solver, | |||
339 | enum GNUNET_ATS_PreferenceKind kind, | 539 | enum GNUNET_ATS_PreferenceKind kind, |
340 | double pref_rel) | 540 | double pref_rel) |
341 | { | 541 | { |
342 | //TODO implement | 542 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
343 | 543 | "Preference `%s' for peer `%s' changed to %.2f \n", | |
344 | /* | 544 | GNUNET_ATS_print_preference_type (kind), |
345 | * Probably nothing to do here. The preference is looked up during reward calculation and does | 545 | GNUNET_i2s (peer), |
346 | * not trigger anything | 546 | pref_rel); |
347 | */ | 547 | /* |
348 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_change_preference() has been called\n"); | 548 | * Nothing to do here. Preferences are considered during reward calculation. |
549 | */ | ||
349 | } | 550 | } |
350 | 551 | ||
351 | 552 | ||
@@ -467,7 +668,7 @@ GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg, | |||
467 | 668 | ||
468 | solver->next_step = GNUNET_SCHEDULER_add_delayed ( | 669 | solver->next_step = GNUNET_SCHEDULER_add_delayed ( |
469 | GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000), | 670 | GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000), |
470 | &agent_periodic_step, | 671 | &ril_periodic_step, |
471 | solver); | 672 | solver); |
472 | 673 | ||
473 | return solver; | 674 | return solver; |
@@ -525,7 +726,8 @@ GAS_ril_address_add (void *solver, | |||
525 | */ | 726 | */ |
526 | void | 727 | void |
527 | GAS_ril_address_delete (void *solver, | 728 | GAS_ril_address_delete (void *solver, |
528 | struct ATS_Address *address, int session_only) | 729 | struct ATS_Address *address, |
730 | int session_only) | ||
529 | { | 731 | { |
530 | //TODO implement | 732 | //TODO implement |
531 | /* | 733 | /* |
@@ -556,12 +758,14 @@ GAS_ril_address_property_changed (void *solver, | |||
556 | uint32_t abs_value, | 758 | uint32_t abs_value, |
557 | double rel_value) | 759 | double rel_value) |
558 | { | 760 | { |
559 | //TODO implement | 761 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
560 | /* | 762 | "Property `%s' for peer `%s' address %p changed to %.2f \n", |
561 | * Like change_preference() not really interesting, since lookup happens anyway during reward | 763 | GNUNET_ATS_print_property_type (type), |
562 | * calculation | 764 | GNUNET_i2s (&address->peer), |
563 | */ | 765 | address, rel_value); |
564 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_property_changed() has been called\n"); | 766 | /* |
767 | * Nothing to do here, properties are considered in every reward calculation | ||
768 | */ | ||
565 | } | 769 | } |
566 | 770 | ||
567 | 771 | ||