aboutsummaryrefslogtreecommitdiff
path: root/src/ats
diff options
context:
space:
mode:
authorFabian Oehlmann <oehlmann@in.tum.de>2013-12-02 19:01:42 +0000
committerFabian Oehlmann <oehlmann@in.tum.de>2013-12-02 19:01:42 +0000
commit283188035a4c105f884d9eae1363d2aa32a02e2a (patch)
tree8535b8bf454c23e0a9f60558375ceefdd8b942dd /src/ats
parent3be0400cc15603a87cc66cdd137a8351d4328d6e (diff)
downloadgnunet-283188035a4c105f884d9eae1363d2aa32a02e2a.tar.gz
gnunet-283188035a4c105f884d9eae1363d2aa32a02e2a.zip
debug fa
Diffstat (limited to 'src/ats')
-rwxr-xr-xsrc/ats/libgnunet_plugin_ats_ril.c394
1 files changed, 242 insertions, 152 deletions
diff --git a/src/ats/libgnunet_plugin_ats_ril.c b/src/ats/libgnunet_plugin_ats_ril.c
index f3f958177..9403975b8 100755
--- a/src/ats/libgnunet_plugin_ats_ril.c
+++ b/src/ats/libgnunet_plugin_ats_ril.c
@@ -28,15 +28,18 @@
28 28
29#define LOG(kind,...) GNUNET_log_from (kind, "ats-ril",__VA_ARGS__) 29#define LOG(kind,...) GNUNET_log_from (kind, "ats-ril",__VA_ARGS__)
30 30
31#define MIN_BW ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__)
32
31#define RIL_ACTION_INVALID -1 33#define RIL_ACTION_INVALID -1
32#define RIL_FEATURES_ADDRESS_COUNT (3 + GNUNET_ATS_QualityPropertiesCount) 34#define RIL_FEATURES_ADDRESS_COUNT (0)// + GNUNET_ATS_QualityPropertiesCount)
33#define RIL_FEATURES_NETWORK_COUNT 6 35#define RIL_FEATURES_NETWORK_COUNT 2
34#define RIL_INTERVAL_EXPONENT 10 36#define RIL_INTERVAL_EXPONENT 10
35 37
36#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 500) 38#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 500)
37#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000) 39#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
38#define RIL_DEFAULT_ALGORITHM RIL_ALGO_SARSA 40#define RIL_DEFAULT_ALGORITHM RIL_ALGO_SARSA
39#define RIL_DEFAULT_DISCOUNT_BETA 1 41#define RIL_DEFAULT_DISCOUNT_BETA 1
42#define RIL_DEFAULT_DISCOUNT_GAMMA 0.5
40#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.1 43#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.1
41#define RIL_DEFAULT_TRACE_DECAY 0.5 44#define RIL_DEFAULT_TRACE_DECAY 0.5
42#define RIL_DEFAULT_EXPLORE_RATIO 0.1 45#define RIL_DEFAULT_EXPLORE_RATIO 0.1
@@ -56,16 +59,16 @@
56 */ 59 */
57enum RIL_Action_Type 60enum RIL_Action_Type
58{ 61{
59 RIL_ACTION_NOTHING = 0, 62 RIL_ACTION_NOTHING = -1,
60 RIL_ACTION_BW_IN_DBL = -1, //TODO! put actions back 63 RIL_ACTION_BW_IN_DBL = -2, //TODO! put actions back
61 RIL_ACTION_BW_IN_HLV = -2, 64 RIL_ACTION_BW_IN_HLV = -3,
62 RIL_ACTION_BW_IN_INC = 1, 65 RIL_ACTION_BW_IN_INC = 0,
63 RIL_ACTION_BW_IN_DEC = 2, 66 RIL_ACTION_BW_IN_DEC = 1,
64 RIL_ACTION_BW_OUT_DBL = -3, 67 RIL_ACTION_BW_OUT_DBL = -4,
65 RIL_ACTION_BW_OUT_HLV = -4, 68 RIL_ACTION_BW_OUT_HLV = -5,
66 RIL_ACTION_BW_OUT_INC = 3, 69 RIL_ACTION_BW_OUT_INC = -6,
67 RIL_ACTION_BW_OUT_DEC = 4, 70 RIL_ACTION_BW_OUT_DEC = -7,
68 RIL_ACTION_TYPE_NUM = 5 71 RIL_ACTION_TYPE_NUM = 2
69}; 72};
70 73
71enum RIL_Algorithm 74enum RIL_Algorithm
@@ -93,14 +96,19 @@ struct RIL_Learning_Parameters
93 enum RIL_Algorithm algorithm; 96 enum RIL_Algorithm algorithm;
94 97
95 /** 98 /**
96 * Learning discount factor in the TD-update 99 * Gradient-descent step-size
100 */
101 double alpha;
102
103 /**
104 * Learning discount variable in the TD-update for semi-MDPs
97 */ 105 */
98 double beta; 106 double beta;
99 107
100 /** 108 /**
101 * Gradient-descent step-size 109 * Learning discount factor in the TD-update for MDPs
102 */ 110 */
103 double alpha; 111 double gamma;
104 112
105 /** 113 /**
106 * Trace-decay factor for eligibility traces 114 * Trace-decay factor for eligibility traces
@@ -242,7 +250,7 @@ struct RIL_Peer_Agent
242 int suggestion_issue; 250 int suggestion_issue;
243 251
244 /** 252 /**
245 * The address which has to be issued () 253 * The address which has to be issued
246 */ 254 */
247 struct ATS_Address * suggestion_address; 255 struct ATS_Address * suggestion_address;
248}; 256};
@@ -260,9 +268,19 @@ struct RIL_Network
260 unsigned long long bw_in_available; 268 unsigned long long bw_in_available;
261 269
262 /** 270 /**
271 * Bandwidth inbound assigned in network after last step
272 */
273 unsigned long long bw_in_assigned;
274
275 /**
263 * Total available outbound bandwidth 276 * Total available outbound bandwidth
264 */ 277 */
265 unsigned long long bw_out_available; 278 unsigned long long bw_out_available;
279
280 /**
281 * * Bandwidth outbound assigned in network after last step
282 */
283 unsigned long long bw_out_assigned;
266}; 284};
267 285
268/** 286/**
@@ -378,7 +396,8 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
378 396
379 if (isinf(result)) 397 if (isinf(result))
380 { 398 {
381 return isinf(result) * (DBL_MAX / 2); //is still big enough 399 GNUNET_assert(GNUNET_NO);
400 return isinf(result) * (DBL_MAX / 2); //TODO! fix
382 } 401 }
383 402
384 return result; 403 return result;
@@ -518,9 +537,22 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
518 delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value 537 delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
519 delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step 538 delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
520 539
540// LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f y*Q(s+1,a+1) = %f Q(s,a) = %f\n, y = %f\n",
541// agent->envi->global_discount_integrated * reward,
542// agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime),
543// agent_estimate_q (agent, agent->s_old, agent->a_old),
544// agent->envi->global_discount_variable);
545//
546// LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta);
547
521 for (i = 0; i < agent->m; i++) 548 for (i = 0; i < agent->m; i++)
522 { 549 {
523 theta[i] += agent->envi->parameters.alpha * delta * agent->e[i]; 550// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
551// agent->envi->parameters.alpha,
552// delta,
553// i,
554// agent->e[i]);
555 theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->e[i];
524 } 556 }
525} 557}
526 558
@@ -535,7 +567,7 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
535 * @param mod the kind of modification 567 * @param mod the kind of modification
536 */ 568 */
537static void 569static void
538agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification mod) 570agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification mod, double *f)
539{ 571{
540 int i; 572 int i;
541 double *e = agent->e; 573 double *e = agent->e;
@@ -545,10 +577,10 @@ agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification
545 switch (mod) 577 switch (mod)
546 { 578 {
547 case RIL_E_ACCUMULATE: 579 case RIL_E_ACCUMULATE:
548 e[i] += 1; 580 e[i] += f[i];
549 break; 581 break;
550 case RIL_E_REPLACE: 582 case RIL_E_REPLACE:
551 e[i] = 1; 583 e[i] = f[i];
552 break; 584 break;
553 case RIL_E_SET: 585 case RIL_E_SET:
554 e[i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda; 586 e[i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda;
@@ -688,8 +720,6 @@ envi_state_networks (struct GAS_RIL_Handle *solver)
688{ 720{
689 int i; 721 int i;
690 struct RIL_Network net; 722 struct RIL_Network net;
691 unsigned long long assigned_in;
692 unsigned long long assigned_out;
693 int overutilized_in; 723 int overutilized_in;
694 int overutilized_out; 724 int overutilized_out;
695 725
@@ -697,17 +727,13 @@ envi_state_networks (struct GAS_RIL_Handle *solver)
697 { 727 {
698 net = solver->network_entries[i]; 728 net = solver->network_entries[i];
699 729
700 assigned_in = ril_network_get_assigned(solver, net.type, GNUNET_YES); 730 overutilized_in = net.bw_in_assigned > net.bw_in_available;
701 assigned_out = ril_network_get_assigned(solver, net.type, GNUNET_NO); 731 overutilized_out = net.bw_out_assigned > net.bw_out_available;
702 overutilized_in = assigned_in > net.bw_in_available;
703 overutilized_out = assigned_out > net.bw_out_available;
704 732
705 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = (double) assigned_in; 733 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
706 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) net.bw_in_available; 734 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
707 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = (double) overutilized_in; 735 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
708 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) assigned_out; 736 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
709 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 4] = (double) net.bw_out_available;
710 solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 5] = (double) overutilized_out;
711 } 737 }
712} 738}
713 739
@@ -721,39 +747,57 @@ static double *
721envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) 747envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
722{ 748{
723 int i; 749 int i;
724 int k; 750// int k;
725 double *state = GNUNET_malloc (sizeof (double) * agent->m); 751 double *state = GNUNET_malloc (sizeof (double) * agent->m);
726 struct RIL_Address_Wrapped *cur_address; 752 struct RIL_Address_Wrapped *cur_address;
727 const double *preferences; 753// const double *preferences;
728 const double *properties; 754// const double *properties;
755 struct RIL_Network *net;
729 756
730 //copy global networks state 757 //copy global networks state
731 for (i = 0; i < solver->networks_count * RIL_FEATURES_NETWORK_COUNT; i++) 758 for (i = 0; i < solver->networks_count * RIL_FEATURES_NETWORK_COUNT; i++)
732 { 759 {
733 state[i] = solver->global_state_networks[i]; 760// state[i] = solver->global_state_networks[i];
734 } 761 }
762 net = agent->address_inuse->solver_information;
735 763
736 //get peer features 764 state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
737 preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls, 765 if (net->bw_in_assigned > net->bw_in_available)
738 &agent->peer); 766 {
739 for (k = 0; k < GNUNET_ATS_PreferenceCount; k++) 767 state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available;
768 }
769 else
740 { 770 {
741 state[i++] = preferences[k]; 771 state[1] = 0;
742 } 772 }
773 LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]);
774 LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]);
775
776 LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
777 LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
778
779
780 //get peer features
781// preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
782// &agent->peer);
783// for (k = 0; k < GNUNET_ATS_PreferenceCount; k++)
784// {
785// state[i++] = preferences[k];
786// }
743 787
744 //get address specific features 788 //get address specific features
745 for (cur_address = agent->addresses_head; NULL != cur_address; cur_address = cur_address->next) 789 for (cur_address = agent->addresses_head; NULL != cur_address; cur_address = cur_address->next)
746 { 790 {
747 //when changing the number of address specific state features, change RIL_FEATURES_ADDRESS_COUNT macro 791// //when changing the number of address specific state features, change RIL_FEATURES_ADDRESS_COUNT macro
748 state[i++] = cur_address->address_naked->active; 792// state[i++] = cur_address->address_naked->active;
749 state[i++] = cur_address->address_naked->active ? agent->bw_in : 0; 793// state[i++] = cur_address->address_naked->active ? agent->bw_in : 0;
750 state[i++] = cur_address->address_naked->active ? agent->bw_out : 0; 794// state[i++] = cur_address->address_naked->active ? agent->bw_out : 0;
751 properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls, 795// properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
752 cur_address->address_naked); 796// cur_address->address_naked);
753 for (k = 0; k < GNUNET_ATS_QualityPropertiesCount; k++) 797// for (k = 0; k < GNUNET_ATS_QualityPropertiesCount; k++)
754 { 798// {
755 state[i++] = properties[k]; 799// state[i++] = properties[k];
756 } 800// }
757 } 801 }
758 802
759 return state; 803 return state;
@@ -848,15 +892,16 @@ envi_reward_global (struct GAS_RIL_Handle *solver)
848 { 892 {
849 net = solver->network_entries[i]; 893 net = solver->network_entries[i];
850 sum_in_available += net.bw_in_available; 894 sum_in_available += net.bw_in_available;
851 sum_in_assigned += ril_network_get_assigned(solver, net.type, GNUNET_YES); 895 sum_in_assigned += net.bw_in_assigned;
852 sum_out_available += net.bw_out_available; 896 sum_out_available += net.bw_out_available;
853 sum_out_assigned += ril_network_get_assigned(solver, net.type, GNUNET_NO); 897 sum_out_assigned += net.bw_out_assigned;
854 } 898 }
855 899
856 ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available); 900 ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
857 ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available); 901 ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
858 902
859 // global reward in [1,2] 903 // global reward in [1,2]
904 return ratio_in +1;
860 return ((ratio_in + ratio_out) / 2) + 1; 905 return ((ratio_in + ratio_out) / 2) + 1;
861} 906}
862 907
@@ -906,34 +951,44 @@ static double
906envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent) 951envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
907{ 952{
908 struct RIL_Network *net; 953 struct RIL_Network *net;
909 double reward = 0; 954// double reward = 0;
910 unsigned long long assigned_in; 955 long long overutilized_in = 0;
911 unsigned long long assigned_out; 956// long long overutilized_out;
912 unsigned long long overutilized = 0; 957 long long assigned_in = 0;
958// long long assigned_out = 0;
959// long long unused;
913 960
914 //punish overutilization 961 //punish overutilization
915 net = agent->address_inuse->solver_information; 962 net = agent->address_inuse->solver_information;
916 assigned_in = ril_network_get_assigned(solver, net->type, GNUNET_YES);
917 assigned_out = ril_network_get_assigned(solver, net->type, GNUNET_NO);
918 963
919 if (assigned_in > net->bw_in_available) 964 if (net->bw_in_assigned > net->bw_in_available)
920 { 965 {
921 overutilized += assigned_in - net->bw_in_available; 966 overutilized_in = (net->bw_in_assigned - net->bw_in_available);
967 assigned_in = net->bw_in_available;
922 } 968 }
923 if (assigned_out > net->bw_out_available) 969 else
924 {
925 overutilized += assigned_out - net->bw_out_available;
926 }
927 if (overutilized > 0)
928 { 970 {
929 return -1. * (double) overutilized / 1024; 971 assigned_in = net->bw_in_assigned;
930 return -1;
931 } 972 }
973// if (net->bw_out_assigned > net->bw_out_available)
974// {
975// overutilized_out = (net->bw_out_assigned - net->bw_out_available);
976// assigned_out = net->bw_out_available;
977// }
978// else
979// {
980// assigned_out = net->bw_out_assigned;
981// }
932 982
933 reward += envi_reward_global (solver) * (solver->parameters.reward_global_share); 983// unused = net->bw_in_available - net->bw_in_assigned;
934 reward += envi_reward_local (solver, agent) * (1 - solver->parameters.reward_global_share); 984// unused = unused < 0 ? unused : -unused;
935 985
936 return (reward - 1.) * 100; 986 return (double) (assigned_in - overutilized_in) / 1024;
987
988// reward += envi_reward_global (solver) * (solver->parameters.reward_global_share);
989// reward += envi_reward_local (solver, agent) * (1 - solver->parameters.reward_global_share);
990//
991// return (reward - 1.) * 100;
937} 992}
938 993
939/** 994/**
@@ -953,16 +1008,16 @@ envi_action_bw_double (struct GAS_RIL_Handle *solver,
953 if (direction_in) 1008 if (direction_in)
954 { 1009 {
955 new_bw = agent->bw_in * 2; 1010 new_bw = agent->bw_in * 2;
956 if (new_bw < agent->bw_in) 1011 if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
957 new_bw = ULLONG_MAX; 1012 new_bw = GNUNET_ATS_MaxBandwidth;
958 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, 1013 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
959 agent->bw_out, GNUNET_NO); 1014 agent->bw_out, GNUNET_NO);
960 } 1015 }
961 else 1016 else
962 { 1017 {
963 new_bw = agent->bw_out * 2; 1018 new_bw = agent->bw_out * 2;
964 if (new_bw < agent->bw_out) 1019 if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
965 new_bw = ULLONG_MAX; 1020 new_bw = GNUNET_ATS_MaxBandwidth;
966 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, 1021 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
967 new_bw, GNUNET_NO); 1022 new_bw, GNUNET_NO);
968 } 1023 }
@@ -982,22 +1037,21 @@ envi_action_bw_halven (struct GAS_RIL_Handle *solver,
982 struct RIL_Peer_Agent *agent, 1037 struct RIL_Peer_Agent *agent,
983 int direction_in) 1038 int direction_in)
984{ 1039{
985 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
986 unsigned long long new_bw; 1040 unsigned long long new_bw;
987 1041
988 if (direction_in) 1042 if (direction_in)
989 { 1043 {
990 new_bw = agent->bw_in / 2; 1044 new_bw = agent->bw_in / 2;
991 if (new_bw < min_bw || new_bw > agent->bw_in) 1045 if (new_bw < MIN_BW || new_bw > agent->bw_in)
992 new_bw = min_bw; 1046 new_bw = MIN_BW;
993 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out, 1047 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
994 GNUNET_NO); 1048 GNUNET_NO);
995 } 1049 }
996 else 1050 else
997 { 1051 {
998 new_bw = agent->bw_out / 2; 1052 new_bw = agent->bw_out / 2;
999 if (new_bw < min_bw || new_bw > agent->bw_out) 1053 if (new_bw < MIN_BW || new_bw > agent->bw_out)
1000 new_bw = min_bw; 1054 new_bw = MIN_BW;
1001 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw, 1055 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
1002 GNUNET_NO); 1056 GNUNET_NO);
1003 } 1057 }
@@ -1015,21 +1069,20 @@ static void
1015envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in) 1069envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in)
1016{ 1070{
1017 unsigned long long new_bw; 1071 unsigned long long new_bw;
1018 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1019 1072
1020 if (direction_in) 1073 if (direction_in)
1021 { 1074 {
1022 new_bw = agent->bw_in + (5 * min_bw); 1075 new_bw = agent->bw_in + (1 * MIN_BW);
1023 if (new_bw < agent->bw_in) 1076 if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
1024 new_bw = ULLONG_MAX; 1077 new_bw = GNUNET_ATS_MaxBandwidth;
1025 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, 1078 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
1026 agent->bw_out, GNUNET_NO); 1079 agent->bw_out, GNUNET_NO);
1027 } 1080 }
1028 else 1081 else
1029 { 1082 {
1030 new_bw = agent->bw_out + (5 * min_bw); 1083 new_bw = agent->bw_out + (1 * MIN_BW);
1031 if (new_bw < agent->bw_out) 1084 if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
1032 new_bw = ULLONG_MAX; 1085 new_bw = GNUNET_ATS_MaxBandwidth;
1033 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, 1086 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
1034 new_bw, GNUNET_NO); 1087 new_bw, GNUNET_NO);
1035 } 1088 }
@@ -1047,22 +1100,21 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
1047static void 1100static void
1048envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in) 1101envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in)
1049{ 1102{
1050 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1051 unsigned long long new_bw; 1103 unsigned long long new_bw;
1052 1104
1053 if (direction_in) 1105 if (direction_in)
1054 { 1106 {
1055 new_bw = agent->bw_in - (5 * min_bw); 1107 new_bw = agent->bw_in - (1 * MIN_BW);
1056 if (new_bw < min_bw || new_bw > agent->bw_in) 1108 if (new_bw < MIN_BW || new_bw > agent->bw_in)
1057 new_bw = min_bw; 1109 new_bw = MIN_BW;
1058 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out, 1110 envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
1059 GNUNET_NO); 1111 GNUNET_NO);
1060 } 1112 }
1061 else 1113 else
1062 { 1114 {
1063 new_bw = agent->bw_out - (5 * min_bw); 1115 new_bw = agent->bw_out - (1 * MIN_BW);
1064 if (new_bw < min_bw || new_bw > agent->bw_out) 1116 if (new_bw < MIN_BW || new_bw > agent->bw_out)
1065 new_bw = min_bw; 1117 new_bw = MIN_BW;
1066 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw, 1118 envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
1067 GNUNET_NO); 1119 GNUNET_NO);
1068 } 1120 }
@@ -1178,10 +1230,20 @@ agent_step (struct RIL_Peer_Agent *agent)
1178 s_next = envi_get_state (agent->envi, agent); 1230 s_next = envi_get_state (agent->envi, agent);
1179 reward = envi_get_reward (agent->envi, agent); 1231 reward = envi_get_reward (agent->envi, agent);
1180 1232
1233 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: A: %d R: %f IN %llu OUT %llu\n",
1234 agent->step_count,
1235 agent->a_old,
1236 reward,
1237 agent->bw_in/1024,
1238 agent->bw_out/1024);
1239 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Best A: %d Q(s,a): %f \n",
1240 agent->step_count,
1241 agent_get_action_best (agent, s_next),
1242 agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
1243
1181 switch (agent->envi->parameters.algorithm) 1244 switch (agent->envi->parameters.algorithm)
1182 { 1245 {
1183 case RIL_ALGO_SARSA: 1246 case RIL_ALGO_SARSA:
1184 agent_modify_eligibility (agent, RIL_E_SET);
1185 if (agent_decide_exploration (agent)) 1247 if (agent_decide_exploration (agent))
1186 { 1248 {
1187 a_next = agent_get_action_explore (agent, s_next); 1249 a_next = agent_get_action_explore (agent, s_next);
@@ -1194,6 +1256,7 @@ agent_step (struct RIL_Peer_Agent *agent)
1194 { 1256 {
1195 //updates weights with selected action (on-policy), if not first step 1257 //updates weights with selected action (on-policy), if not first step
1196 agent_update_weights (agent, reward, s_next, a_next); 1258 agent_update_weights (agent, reward, s_next, a_next);
1259 agent_modify_eligibility (agent, RIL_E_SET, s_next);
1197 } 1260 }
1198 break; 1261 break;
1199 1262
@@ -1207,29 +1270,22 @@ agent_step (struct RIL_Peer_Agent *agent)
1207 if (agent_decide_exploration (agent)) 1270 if (agent_decide_exploration (agent))
1208 { 1271 {
1209 a_next = agent_get_action_explore (agent, s_next); 1272 a_next = agent_get_action_explore (agent, s_next);
1210 agent_modify_eligibility (agent, RIL_E_ZERO); 1273 agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
1211 } 1274 }
1212 else 1275 else
1213 { 1276 {
1214 a_next = agent_get_action_best (agent, s_next); 1277 a_next = agent_get_action_best (agent, s_next);
1215 agent_modify_eligibility (agent, RIL_E_SET); 1278 agent_modify_eligibility (agent, RIL_E_SET, s_next);
1216 } 1279 }
1217 break; 1280 break;
1218 } 1281 }
1219 1282
1220 GNUNET_assert(RIL_ACTION_INVALID != a_next); 1283 GNUNET_assert(RIL_ACTION_INVALID != a_next);
1221 1284
1222 agent_modify_eligibility (agent, RIL_E_ACCUMULATE); 1285 agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
1223 1286
1224 envi_do_action (agent->envi, agent, a_next); 1287 envi_do_action (agent->envi, agent, a_next);
1225 1288
1226 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Action: %d Reward: %f Result: IN %llu OUT %llu\n",
1227 agent->step_count,
1228 a_next,
1229 reward,
1230 agent->bw_in/1024,
1231 agent->bw_out/1024);
1232
1233 GNUNET_free(agent->s_old); 1289 GNUNET_free(agent->s_old);
1234 agent->s_old = s_next; 1290 agent->s_old = s_next;
1235 agent->a_old = a_next; 1291 agent->a_old = a_next;
@@ -1268,10 +1324,10 @@ ril_get_used_resource_ratio (struct GAS_RIL_Handle *solver)
1268 for (i = 0; i < solver->networks_count; i++) 1324 for (i = 0; i < solver->networks_count; i++)
1269 { 1325 {
1270 net = solver->network_entries[i]; 1326 net = solver->network_entries[i];
1271 if (ril_network_get_assigned(solver, net.type, GNUNET_YES) > 0) //only consider scopes where an address is actually active 1327 if (net.bw_in_assigned > 0) //only consider scopes where an address is actually active
1272 { 1328 {
1273 sum_assigned += ril_network_get_assigned(solver, net.type, GNUNET_YES); 1329 sum_assigned += net.bw_in_assigned;
1274 sum_assigned += ril_network_get_assigned(solver, net.type, GNUNET_NO); 1330 sum_assigned += net.bw_out_assigned;
1275 sum_available += net.bw_in_available; 1331 sum_available += net.bw_in_available;
1276 sum_available += net.bw_out_available; 1332 sum_available += net.bw_out_available;
1277 } 1333 }
@@ -1314,7 +1370,6 @@ static int
1314ril_network_is_not_full (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network) 1370ril_network_is_not_full (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network)
1315{ 1371{
1316 struct RIL_Network *net; 1372 struct RIL_Network *net;
1317 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1318 struct RIL_Peer_Agent *agent; 1373 struct RIL_Peer_Agent *agent;
1319 unsigned long long address_count = 0; 1374 unsigned long long address_count = 0;
1320 1375
@@ -1331,7 +1386,7 @@ ril_network_is_not_full (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_
1331 } 1386 }
1332 1387
1333 net = ril_get_network (solver, network); 1388 net = ril_get_network (solver, network);
1334 return (net->bw_in_available > min_bw * address_count) && (net->bw_out_available > min_bw * address_count); 1389 return (net->bw_in_available > MIN_BW * address_count) && (net->bw_out_available > MIN_BW * address_count);
1335} 1390}
1336 1391
1337static void 1392static void
@@ -1339,7 +1394,6 @@ ril_try_unblock_agent (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *age
1339{ 1394{
1340 struct RIL_Address_Wrapped *addr_wrap; 1395 struct RIL_Address_Wrapped *addr_wrap;
1341 struct RIL_Network *net; 1396 struct RIL_Network *net;
1342 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1343 1397
1344 for (addr_wrap = agent->addresses_head; NULL != addr_wrap; addr_wrap = addr_wrap->next) 1398 for (addr_wrap = agent->addresses_head; NULL != addr_wrap; addr_wrap = addr_wrap->next)
1345 { 1399 {
@@ -1347,13 +1401,57 @@ ril_try_unblock_agent (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *age
1347 if (ril_network_is_not_full(solver, net->type)) 1401 if (ril_network_is_not_full(solver, net->type))
1348 { 1402 {
1349 if (NULL == agent->address_inuse) 1403 if (NULL == agent->address_inuse)
1350 envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, min_bw, min_bw, silent); 1404 envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, MIN_BW, MIN_BW, silent);
1351 return; 1405 return;
1352 } 1406 }
1353 } 1407 }
1354 agent->address_inuse = NULL; 1408 agent->address_inuse = NULL;
1355} 1409}
1356 1410
1411static void
1412ril_calculate_discount (struct GAS_RIL_Handle *solver)
1413{
1414 struct GNUNET_TIME_Absolute time_now;
1415 struct GNUNET_TIME_Relative time_delta;
1416 double tau;
1417
1418 // MDP case - remove when debugged
1419 if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us)
1420 {
1421 solver->global_discount_variable = solver->parameters.gamma;
1422 solver->global_discount_integrated = 1;
1423 return;
1424 }
1425
1426 // semi-MDP case
1427
1428 //calculate tau, i.e. how many real valued time units have passed, one time unit is one minimum time step
1429 time_now = GNUNET_TIME_absolute_get ();
1430 time_delta = GNUNET_TIME_absolute_get_difference (solver->step_time_last, time_now);
1431 solver->step_time_last = time_now;
1432 tau = (double) time_delta.rel_value_us
1433 / (double) solver->parameters.step_time_min.rel_value_us;
1434
1435 //calculate reward discounts (once per step for all agents)
1436 solver->global_discount_variable = pow (M_E, ((-1.) * ((double) solver->parameters.beta) * tau));
1437 solver->global_discount_integrated = (1. - solver->global_discount_variable)
1438 / (double) solver->parameters.beta;
1439}
1440
1441static void
1442ril_calculate_assigned_bwnet (struct GAS_RIL_Handle *solver)
1443{
1444 int c;
1445 struct RIL_Network *net;
1446
1447 for (c = 0; c < solver->networks_count; c++)
1448 {
1449 net = &solver->network_entries[c];
1450 net->bw_in_assigned = ril_network_get_assigned(solver, net->type, GNUNET_YES);
1451 net->bw_out_assigned = ril_network_get_assigned(solver, net->type, GNUNET_NO);
1452 }
1453}
1454
1357/** 1455/**
1358 * Schedules the next global step in an adaptive way. The more resources are 1456 * Schedules the next global step in an adaptive way. The more resources are
1359 * left, the earlier the next step is scheduled. This serves the reactivity of 1457 * left, the earlier the next step is scheduled. This serves the reactivity of
@@ -1401,9 +1499,6 @@ static void
1401ril_step (struct GAS_RIL_Handle *solver) 1499ril_step (struct GAS_RIL_Handle *solver)
1402{ 1500{
1403 struct RIL_Peer_Agent *cur; 1501 struct RIL_Peer_Agent *cur;
1404 struct GNUNET_TIME_Absolute time_now;
1405 struct GNUNET_TIME_Relative time_delta;
1406 double tau;
1407 1502
1408 if (GNUNET_YES == solver->bulk_lock) 1503 if (GNUNET_YES == solver->bulk_lock)
1409 { 1504 {
@@ -1420,20 +1515,11 @@ ril_step (struct GAS_RIL_Handle *solver)
1420 solver->step_time_last = GNUNET_TIME_absolute_get (); 1515 solver->step_time_last = GNUNET_TIME_absolute_get ();
1421 } 1516 }
1422 1517
1423 //calculate tau, i.e. how many real valued time units have passed, one time unit is one minimum time step 1518 ril_calculate_discount (solver);
1424 time_now = GNUNET_TIME_absolute_get (); 1519 ril_calculate_assigned_bwnet (solver);
1425 time_delta = GNUNET_TIME_absolute_get_difference (solver->step_time_last, time_now);
1426 solver->step_time_last = time_now;
1427 tau = ((double) time_delta.rel_value_us)
1428 / ((double) solver->parameters.step_time_min.rel_value_us);
1429
1430 //calculate reward discounts (once per step for all agents)
1431 solver->global_discount_variable = pow (M_E, ((-1.) * ((double) solver->parameters.beta) * tau));
1432 solver->global_discount_integrated = (1 - solver->global_discount_variable)
1433 / ((double) solver->parameters.beta);
1434 1520
1435 //calculate network state vector 1521 //calculate network state vector
1436 envi_state_networks(solver); 1522// envi_state_networks(solver);
1437 1523
1438 //trigger one step per active, unblocked agent 1524 //trigger one step per active, unblocked agent
1439 for (cur = solver->agents_head; NULL != cur; cur = cur->next) 1525 for (cur = solver->agents_head; NULL != cur; cur = cur->next)
@@ -1451,6 +1537,8 @@ ril_step (struct GAS_RIL_Handle *solver)
1451 } 1537 }
1452 } 1538 }
1453 1539
1540 ril_calculate_assigned_bwnet (solver);
1541
1454 solver->step_count += 1; 1542 solver->step_count += 1;
1455 ril_step_schedule_next (solver); 1543 ril_step_schedule_next (solver);
1456 1544
@@ -1490,21 +1578,21 @@ agent_w_start (struct RIL_Peer_Agent *agent)
1490 1578
1491 count = ril_count_agents(agent->envi); 1579 count = ril_count_agents(agent->envi);
1492 1580
1493 for (other = agent->envi->agents_head; NULL != other; other = other->next) 1581 for (i = 0; i < agent->n; i++)
1494 { 1582 {
1495 for (i = 0; i < agent->n; i++) 1583 for (k = 0; k < agent->m; k++)
1496 { 1584 {
1497 for (k = 0; k < agent->m; k++) 1585 if (0 == count) {
1498 { 1586 agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
1499 if (0 == count) { 1587 }
1500 agent->W[i][k] = 1; 1588 else {
1501 } 1589 for (other = agent->envi->agents_head; NULL != other; other = other->next)
1502 else { 1590 {
1503 agent->W[i][k] += (other->W[i][k] / (double) count); 1591 agent->W[i][k] += (other->W[i][k] / (double) count);
1504 } 1592 }
1505
1506 GNUNET_assert(!isinf(agent->W[i][k]));
1507 } 1593 }
1594
1595 GNUNET_assert(!isinf(agent->W[i][k]));
1508 } 1596 }
1509 } 1597 }
1510} 1598}
@@ -1527,11 +1615,11 @@ agent_init (void *s, const struct GNUNET_PeerIdentity *peer)
1527 agent->peer = *peer; 1615 agent->peer = *peer;
1528 agent->step_count = 0; 1616 agent->step_count = 0;
1529 agent->is_active = GNUNET_NO; 1617 agent->is_active = GNUNET_NO;
1530 agent->bw_in = 1024; //TODO? put min_bw 1618 agent->bw_in = MIN_BW;
1531 agent->bw_out = 1024; 1619 agent->bw_out = MIN_BW;
1532 agent->suggestion_issue = GNUNET_NO; 1620 agent->suggestion_issue = GNUNET_NO;
1533 agent->n = RIL_ACTION_TYPE_NUM; 1621 agent->n = RIL_ACTION_TYPE_NUM;
1534 agent->m = (solver->networks_count * RIL_FEATURES_NETWORK_COUNT) + GNUNET_ATS_PreferenceCount; 1622 agent->m = (RIL_FEATURES_NETWORK_COUNT);// + GNUNET_ATS_PreferenceCount;
1535 agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n); 1623 agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
1536 for (i = 0; i < agent->n; i++) 1624 for (i = 0; i < agent->n; i++)
1537 { 1625 {
@@ -1539,9 +1627,9 @@ agent_init (void *s, const struct GNUNET_PeerIdentity *peer)
1539 } 1627 }
1540 agent_w_start(agent); 1628 agent_w_start(agent);
1541 agent->a_old = RIL_ACTION_INVALID; 1629 agent->a_old = RIL_ACTION_INVALID;
1542 agent->s_old = envi_get_state (solver, agent); 1630 agent->s_old = GNUNET_malloc (sizeof (double) * agent->m);
1543 agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m); 1631 agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
1544 agent_modify_eligibility (agent, RIL_E_ZERO); 1632 agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
1545 1633
1546 return agent; 1634 return agent;
1547} 1635}
@@ -1609,10 +1697,9 @@ static int
1609ril_network_is_active (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network) 1697ril_network_is_active (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network)
1610{ 1698{
1611 struct RIL_Network *net; 1699 struct RIL_Network *net;
1612 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1613 1700
1614 net = ril_get_network (solver, network); 1701 net = ril_get_network (solver, network);
1615 return net->bw_out_available >= min_bw; 1702 return net->bw_out_available >= MIN_BW;
1616} 1703}
1617 1704
1618/** 1705/**
@@ -1745,6 +1832,15 @@ libgnunet_plugin_ats_ril_init (void *cls)
1745 { 1832 {
1746 solver->parameters.beta = RIL_DEFAULT_DISCOUNT_BETA; 1833 solver->parameters.beta = RIL_DEFAULT_DISCOUNT_BETA;
1747 } 1834 }
1835 if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_DISCOUNT_GAMMA", &string))
1836 {
1837 solver->parameters.gamma = strtod (string, NULL);
1838 GNUNET_free (string);
1839 }
1840 else
1841 {
1842 solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_GAMMA;
1843 }
1748 if (GNUNET_OK 1844 if (GNUNET_OK
1749 == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &string)) 1845 == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &string))
1750 { 1846 {
@@ -1812,11 +1908,6 @@ libgnunet_plugin_ats_ril_init (void *cls)
1812 LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024); 1908 LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
1813 } 1909 }
1814 1910
1815// solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (
1816// GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
1817// &ril_step_scheduler_task, solver);
1818// solver->step_next_task_id = GNUNET_SCHEDULER_add_now(&ril_step_scheduler_task, solver);
1819
1820 LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n"); 1911 LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");
1821 LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n", 1912 LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
1822 solver->parameters.algorithm ? "Q" : "SARSA", 1913 solver->parameters.algorithm ? "Q" : "SARSA",
@@ -1967,7 +2058,6 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
1967 unsigned int n_new; 2058 unsigned int n_new;
1968 int i; 2059 int i;
1969 struct RIL_Network *net; 2060 struct RIL_Network *net;
1970 uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
1971 2061
1972 LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_delete() Delete %s%s %s address %s for peer '%s'\n", 2062 LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_delete() Delete %s%s %s address %s for peer '%s'\n",
1973 session_only ? "session for " : "", address->active ? "active" : "inactive", address->plugin, 2063 session_only ? "session for " : "", address->active ? "active" : "inactive", address->plugin,
@@ -2034,7 +2124,7 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
2034 { 2124 {
2035 if (NULL != agent->addresses_head) //if peer has an address left, use it 2125 if (NULL != agent->addresses_head) //if peer has an address left, use it
2036 { 2126 {
2037 envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, min_bw, min_bw, 2127 envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, MIN_BW, MIN_BW,
2038 GNUNET_NO); 2128 GNUNET_NO);
2039 } 2129 }
2040 else 2130 else