aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2013-09-10 16:40:41 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2013-09-10 16:40:41 +0000
commitf7dd691efeb48df9777a903e58ce4a3916c80845 (patch)
treebcb39e54fd9babef3d15747da896053b752b8ffc /src
parent9deab19cc53c6c1324af2193cef9686875a39c16 (diff)
downloadgnunet-f7dd691efeb48df9777a903e58ce4a3916c80845.tar.gz
gnunet-f7dd691efeb48df9777a903e58ce4a3916c80845.zip
ats_ril: solver continued
Diffstat (limited to 'src')
-rwxr-xr-xsrc/ats/gnunet-service-ats-solver_ril.c310
1 files changed, 257 insertions, 53 deletions
diff --git a/src/ats/gnunet-service-ats-solver_ril.c b/src/ats/gnunet-service-ats-solver_ril.c
index d0524292e..0bf74af17 100755
--- a/src/ats/gnunet-service-ats-solver_ril.c
+++ b/src/ats/gnunet-service-ats-solver_ril.c
@@ -25,6 +25,7 @@
25 * @author Matthias Wachs 25 * @author Matthias Wachs
26 */ 26 */
27#include "platform.h" 27#include "platform.h"
28#include "float.h"
28#include "gnunet_util_lib.h" 29#include "gnunet_util_lib.h"
29#include "gnunet-service-ats_addresses.h" 30#include "gnunet-service-ats_addresses.h"
30#include "gnunet_statistics_service.h" 31#include "gnunet_statistics_service.h"
@@ -33,6 +34,7 @@
33#define RIL_DEFAULT_DISCOUNT_FACTOR 0.5 34#define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
34#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4 35#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
35#define RIL_DEFAULT_TRACE_DECAY 0.6 36#define RIL_DEFAULT_TRACE_DECAY 0.6
37#define RIL_EXPLORE_RATIO 0.1
36 38
37/** 39/**
38 * ATS reinforcement learning solver 40 * ATS reinforcement learning solver
@@ -40,6 +42,14 @@
40 * General description 42 * General description
41 */ 43 */
42 44
45enum RIL_Action
46{
47 RIL_BW_DBL = 0,
48 RIL_BW_HLV = 1,
49 RIL_NUM_ACTIONS = 2
50};
51//TODO add the rest of the actions
52
43/** 53/**
44 * Global learning parameters 54 * Global learning parameters
45 */ 55 */
@@ -74,29 +84,59 @@ struct RIL_Peer_Agent
74 struct RIL_Peer_Agent *prev; 84 struct RIL_Peer_Agent *prev;
75 85
76 /** 86 /**
87 * Environment handle
88 */
89 struct GAS_RIL_Handle *envi;
90
91 /**
77 * Peer ID 92 * Peer ID
78 */ 93 */
79 struct GNUNET_PeerIdentity peer; 94 struct GNUNET_PeerIdentity peer;
80 95
81 /** 96 /**
97 * Whether the agent is active or not
98 */
99 int active;
100
101 /**
102 * Number of performed time-steps
103 */
104 unsigned long long step_count;
105
106 /**
82 * Experience matrix W 107 * Experience matrix W
83 */ 108 */
84 double ** W; 109 double ** W;
85 110
86 /** 111 /**
112 * Number of rows of W / Number of state-vector features
113 */
114 int m;
115
116 /**
117 * Number of columns of W / Number of actions
118 */
119 int n;
120
121 /**
87 * Last perceived state feature vector 122 * Last perceived state feature vector
88 */ 123 */
89 double * s_t; 124 double * s_old;
90 125
91 /** 126 /**
92 * Last chosen action 127 * Last chosen action
93 */ 128 */
94 double * a_t; 129 int a_old;
95 130
96 /** 131 /**
97 * Last eligibility trace vector 132 * Last eligibility trace vector
98 */ 133 */
99 double * e_t; 134 double * e_t;
135
136 /**
137 * Address in use
138 */
139 struct ATS_Address * address;
100}; 140};
101 141
102struct RIL_Network 142struct RIL_Network
@@ -228,66 +268,225 @@ struct GAS_RIL_Handle
228 /** 268 /**
229 * List of active peer-agents 269 * List of active peer-agents
230 */ 270 */
231 struct RIL_Peer_Agent * agents_active_head; 271 struct RIL_Peer_Agent * agents_head;
232 struct RIL_Peer_Agent * agents_active_tail; 272 struct RIL_Peer_Agent * agents_tail;
233
234 /**
235 * List of paused peer-agents
236 */
237 struct RIL_Peer_Agent * agents_paused_head;
238 struct RIL_Peer_Agent * agents_paused_tail;
239}; 273};
240 274
241 275
242enum Actions 276
243{
244 bw_dbl,
245 bw_hlv
246};
247//TODO add the rest of the actions
248 277
249/** 278/**
250 * Private functions 279 * Private functions
251 * --------------------------- 280 * ---------------------------
252 */ 281 */
253 282
283/**
284 * Estimate the current action-value for state s and action a
285 * @param agent agent performing the estimation
286 * @param state s
287 * @param action a
288 * @return estimation value
289 */
290double
291agent_estimate_q (struct RIL_Peer_Agent *agent,
292 double *state,
293 int action)
294{
295 int i;
296 double result = 0;
297
298 for (i = 0; i < agent->m; i++)
299 {
300 result += state[i] * (agent->W)[agent->m][action];
301 }
302
303 return result;
304}
305
306int
307agent_choose_action (struct RIL_Peer_Agent *agent,
308 double *state)
309{
310 int i;
311 int max_i = -1;
312 double r;
313 double cur_q;
314 double max_q = DBL_MIN;
315
316 r = ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX);
317
318 if (r < RIL_EXPLORE_RATIO)
319 {
320 return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
321 }
322
323 for (i = 0; i < agent->m; i++)
324 {
325 cur_q = agent_estimate_q (agent, state, i);
326 if (cur_q > max_q)
327 {
328 max_q = cur_q;
329 max_i = i;
330 }
331 }
332
333 GNUNET_assert(-1 != max_i);
334
335 return max_i;
336}
337
338double *
339envi_get_state (void *s)
340{
341 int i;
342 struct GAS_RIL_Handle *solver = s;
343 struct RIL_Network *net;
344 double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4);
345
346 for (i = 0; i < solver->networks_count; i += 4)
347 {
348 net = (&solver->network_entries)[i];
349 state[i] = (double) net->bw_in_assigned;
350 state[i+1] = (double) net->bw_in_available;
351 state[i+2] = (double) net->bw_out_assigned;
352 state[i+3] = (double) net->bw_out_available;
353 }
354
355 return state;
356}
357
358double
359envi_get_reward ()
360{
361 //TODO implement
362 return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX;
363}
364
254void 365void
255agent_periodic_step (void *solver, 366agent_step (struct RIL_Peer_Agent *agent)
367{
368 int a_next;
369 double *s_next;
370 double reward;
371 double delta;
372 double q_next;
373
374
375 s_next = envi_get_state(agent->envi);
376 reward = envi_get_reward();
377
378 a_next = agent_choose_action (agent, s_next);
379 q_next = agent_estimate_q(agent, s_next, a_next);
380
381 if (NULL != agent->s_old)
382 {
383 delta = reward +
384 (agent->envi->parameters.gamma * q_next) -
385 agent_estimate_q(agent, agent->s_old, agent->a_old);
386 }
387
388 GNUNET_free(agent->s_old);
389 agent->s_old = s_next;
390 agent->a_old = a_next;
391
392 agent->step_count += 1;
393}
394
395void
396ril_periodic_step (void *s,
256 const struct GNUNET_SCHEDULER_TaskContext *tc) 397 const struct GNUNET_SCHEDULER_TaskContext *tc)
257{ 398{
258 /* 399 /*
259 * iterate over active agents and do a time step 400 * iterate over active agents and do a time step
260 */ 401 */
261 struct GAS_RIL_Handle *s = solver; 402 struct GAS_RIL_Handle *solver = s;
403 struct RIL_Peer_Agent *cur;
262 404
263 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", s->step_count); 405 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count);
264 406
265 s->step_count += 1; 407 for (cur = solver->agents_head; NULL != cur; cur = cur->next)
266 s->next_step = GNUNET_SCHEDULER_add_delayed ( 408 {
267 s->step_time, 409 if (cur->active)
268 &agent_periodic_step, 410 {
411 agent_step (cur);
412 }
413 }
414
415 solver->step_count += 1;
416 solver->next_step = GNUNET_SCHEDULER_add_delayed (
417 solver->step_time,
418 &ril_periodic_step,
269 solver); 419 solver);
270} 420}
271 421
272/** 422/**
273 * Whether a peer already has an agent in a list 423 * Initialize an agent without addresses and its knowledge base
274 * @param head of list 424 * @param s ril solver
275 * @param peer in question 425 * @param peer the one in question
426 * @return handle to the new agent
276 */ 427 */
277int 428struct RIL_Peer_Agent *
278list_contains_agent (struct RIL_Peer_Agent * head, 429agent_init (void *s,
279 struct GNUNET_PeerIdentity * peer) 430 struct GNUNET_PeerIdentity peer)
280{ 431{
432 int i;
433 struct GAS_RIL_Handle * solver = s;
434 struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
435
436 agent->envi = solver;
437 agent->peer = peer;
438 agent->step_count = 0;
439 agent->active = GNUNET_NO;
440 agent->s_old = NULL;
441 agent->n = solver->networks_count * 4;
442 agent->m = RIL_NUM_ACTIONS;
443 agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
444 for (i = 0; i < agent->n; i++)
445 {
446 (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
447 }
448 agent->a_old = -1;
449 agent->e_t = NULL;
450
451 GNUNET_CONTAINER_DLL_insert (solver->agents_head, solver->agents_tail, agent);
452
453 return agent;
454}
455
456/**
457 * Deallocate agent
458 * @param s solver handle
459 * @param agent the agent to retire
460 */
461void
462agent_die (void *s,
463 struct RIL_Peer_Agent * agent)
464{
465
466}
467
468/**
469 * Returns the agent for a peer
470 * @param s solver handle
471 * @param peer identity of the peer
472 * @return agent
473 */
474struct RIL_Peer_Agent *
475ril_get_agent (struct GAS_RIL_Handle * s,
476 struct GNUNET_PeerIdentity peer)
477{
478 struct GAS_RIL_Handle * solver = s;
281 struct RIL_Peer_Agent * cur; 479 struct RIL_Peer_Agent * cur;
282 480
283 for (cur = head; NULL != cur; cur = cur->next) 481 for (cur = s->agents_head; NULL != cur; cur = cur->next)
284 { 482 {
285 if (!memcmp (&(cur->peer.hashPubKey), &peer->hashPubKey, sizeof(struct GNUNET_HashCode))) 483 if (0 == GNUNET_CRYPTO_hash_cmp (&peer.hashPubKey, &cur->peer.hashPubKey))
286 { 484 {
287 return GNUNET_YES; 485 return cur;
288 } 486 }
289 } 487 }
290 return GNUNET_NO; 488
489 return agent_init (solver, peer);
291} 490}
292 491
293/** 492/**
@@ -307,14 +506,15 @@ init_agents_it (void *cls,
307 struct ATS_Address *address = value; 506 struct ATS_Address *address = value;
308 struct RIL_Peer_Agent *agent; 507 struct RIL_Peer_Agent *agent;
309 508
310 if (!list_contains_agent (solver->agents_paused_head, &address->peer)) 509 agent = ril_get_agent (solver, address->peer);
510
511 GNUNET_assert (agent != NULL);
512
513 if (NULL == agent->address)
311 { 514 {
312 agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent)); 515 agent->address = address;
313 agent->peer = address->peer;
314 GNUNET_CONTAINER_DLL_insert (solver->agents_paused_head, solver->agents_paused_tail, agent);
315 } 516 }
316 517
317 //TODO add address to agent
318 return GNUNET_YES; 518 return GNUNET_YES;
319} 519}
320 520
@@ -339,13 +539,14 @@ GAS_ril_address_change_preference (void *solver,
339 enum GNUNET_ATS_PreferenceKind kind, 539 enum GNUNET_ATS_PreferenceKind kind,
340 double pref_rel) 540 double pref_rel)
341{ 541{
342 //TODO implement 542 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
343 543 "Preference `%s' for peer `%s' changed to %.2f \n",
344 /* 544 GNUNET_ATS_print_preference_type (kind),
345 * Probably nothing to do here. The preference is looked up during reward calculation and does 545 GNUNET_i2s (peer),
346 * not trigger anything 546 pref_rel);
347 */ 547 /*
348 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_change_preference() has been called\n"); 548 * Nothing to do here. Preferences are considered during reward calculation.
549 */
349} 550}
350 551
351 552
@@ -467,7 +668,7 @@ GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg,
467 668
468 solver->next_step = GNUNET_SCHEDULER_add_delayed ( 669 solver->next_step = GNUNET_SCHEDULER_add_delayed (
469 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000), 670 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
470 &agent_periodic_step, 671 &ril_periodic_step,
471 solver); 672 solver);
472 673
473 return solver; 674 return solver;
@@ -525,7 +726,8 @@ GAS_ril_address_add (void *solver,
525 */ 726 */
526void 727void
527GAS_ril_address_delete (void *solver, 728GAS_ril_address_delete (void *solver,
528 struct ATS_Address *address, int session_only) 729 struct ATS_Address *address,
730 int session_only)
529{ 731{
530 //TODO implement 732 //TODO implement
531 /* 733 /*
@@ -556,12 +758,14 @@ GAS_ril_address_property_changed (void *solver,
556 uint32_t abs_value, 758 uint32_t abs_value,
557 double rel_value) 759 double rel_value)
558{ 760{
559 //TODO implement 761 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
560 /* 762 "Property `%s' for peer `%s' address %p changed to %.2f \n",
561 * Like change_preference() not really interesting, since lookup happens anyway during reward 763 GNUNET_ATS_print_property_type (type),
562 * calculation 764 GNUNET_i2s (&address->peer),
563 */ 765 address, rel_value);
564 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_property_changed() has been called\n"); 766 /*
767 * Nothing to do here, properties are considered in every reward calculation
768 */
565} 769}
566 770
567 771