این برنامه پیاده سازیه روش Q-Learning هست که برای یافتن یک هدف در مکان نا معلوم به وسیله ی یک agent نوشته شده.
در این برنامه ابتدا به وسیله ی این الگوریتم و چندین بار حرکت agent در مسیر ، هزینه هایی برای این مسیر پیدا میشه و در پایان هم بهترین مسیر تا هدف رو انتخاب میکنیم (در این برنامه از الگوریتم Dijkstra برای این کار استفاده شده)
به این نکته هم توجه داشت باشین که بهترین مسیر در این روش ، مسیری است که بزرگترین هرینه رو در هر مرحله انتخاب کنه
#include<iostream>
#include<time.h>
#include<vector>
using namespace std;
const int SIZE=6;
//*********************
struct node
{
int destination;
int s;
float distance;
string p;
};
//*********************
int max_distance(node []);
void dijkstra ( int ,int, float [][6] );
//*********************
class Q_Learning
{
public:
Q_Learning(int,int);
void displayR();
void displayQ();
void routing();
private:
float Q_cal(int,int);
float find_max(int);
float Q[6][6];
float R[6][6];
int start_state;
int goal_state;
float Y;
// state shoroo ham bayad bashe baadan
};
//*********************
Q_Learning :: Q_Learning(int s , int g)
{
int i,j,k,l,state,action;
float temp;
vector<int> states;
Y=0.8;
start_state=s;
goal_state=g;
for ( i=0 ; i<SIZE ; i++ )
for (j=0 ; j<SIZE ; j++ )
{
Q[i][j]=0;
R[i][j]=-1;
}
R[0][4]=0;R[1][3]=0;R[1][5]=0;
R[2][3]=0;R[3][1]=0;R[3][2]=0;
R[3][4]=0;R[4][0]=0;R[4][3]=0;
R[4][5]=0;R[5][1]=0;R[5][4]=0;
R[5][5]=0;
for (i=0 ; i<SIZE ; i++)
if ( R[i][goal_state] == 0 )
R[i][goal_state] = 100;
srand(time(0));
//namayeshe R
displayR();
//shoroo e episode ha
for (k=0 ; k<1200 ; k++)
{
state=rand()%SIZE;
//peida kardane state haye havi meghdar
for (i=0 ; i<SIZE ; i++)
{
if (R[state][i] != -1)
states.push_back(i);
}
//peida kardane action
action = states [ rand() % states.size() ];
Q [state][action] = Q_cal ( state , action );
states.clear();
state = action;
if ( !(k%10) )
displayQ();
//shoroo e tekrar ha ta mogheye residan be hadaf
while (state != goal_state)
{
for (i=0 ; i<SIZE ; i++)
{
if (R[state][i] != -1)
states.push_back(i);
}
action = states [ rand() % states.size() ];
Q [state][action] = Q_cal ( state , action );
states.clear();
state = action;
}
}
system("pause");
//normal sazi Q
temp=Q[0][0];
for (i=0 ; i<SIZE ; i++)
for (j=0 ; j<SIZE ; j++)
if (temp < Q[i][j])
temp=Q[i][j];
for (i=0 ; i<SIZE ; i++)
for (j=0 ; j<SIZE ; j++)
Q[i][j]=Q[i][j]/temp*100;
displayQ();
routing();
}
//*********************
float Q_Learning :: Q_cal(int a , int b)//mohasebeye formool Q-Learning
{
return R[a][b] + Y * find_max( b );
}
//*********************
float Q_Learning :: find_max ( int b )//yaftane max state
{
int i;
vector<int> states;
for (i=0 ; i<SIZE ; i++)
{
if (R[b][i] != -1)
states.push_back(i);
}
if ( ! states.size() ) return 0;
float max = Q[b][ states[0] ];
for (i=1 ; i<states.size() ; i++)
if ( max < Q[b][ states[i] ] )
max=Q[b][ states[i] ];
return max;
}
//*********************
void Q_Learning :: routing()
{
int i,j;
float temp[6][6];
for (i=0 ; i<SIZE ; i++)
for (j=0 ; j<SIZE ; j++)
{
temp[i][j]=Q[i][j];
if (temp[i][j] == 0 && i!=j)
temp[i][j]=std::numeric_limits<float>::min(); // MIN_FLOAT
}
dijkstra ( start_state ,goal_state, temp );
}
//*********************
void Q_Learning :: displayR()//namayeshe maghadire R
{
int i,j;
char item[6]={'A','B','C','D','E','F'};
system("cls");
cout<<"\n -1 --> No way\n 0 --> Is way\n 100 --> Direct way\n";
cout<<"\nRoads matrix = :\n\n ";
for (i=0 ; i<6 ; i++)
cout<<item[i]<<" ";
cout<<endl;
for (i=0 ; i<SIZE ; i++)
{
cout<<item[i]<<' ';
for (j=0 ; j<SIZE ; j++)
cout<<R[i][j]<<' ';
cout<<endl;
}
system("pause");
}
//*********************
void Q_Learning :: displayQ()//namayeshe maghadire Q
{
int i,j;
char item[6]={'A','B','C','D','E','F'};
system("cls");
cout<<"\nQ = :\n\n ";
for (i=0 ; i<SIZE ; i++)
cout<<item[i]<<' ';
cout<<endl;
for (i=0 ; i<SIZE ; i++)
{
cout<<item[i]<<' ';
for (j=0 ; j<SIZE ; j++)
cout<<Q[i][j]<<' ';
cout<<endl;
}
cout<<endl;
}
//*********************
int main()
{
int i,j,s,g;
cout<<"\nA:0 B:1 C:2 D:3 E:4 F:5\n";
while (1)
{
cout<<"\nEnter Start state: ";
cin>>s;
if (s<0 || s> SIZE-1)
{
cout<<'\a';
system("cls");
continue;
}
else break;
}
while (1)
{
cout<<"\nEnter Goal state: ";
cin>>g;
if (g<0 || g> SIZE-1)
{
cout<<'\a';
system("cls");
continue;
}
else break;
}
Q_Learning ob(s,g);
return 0;
}
//*********************
int max_distance(node a[6])
{
int max_index,i;
float max=numeric_limits<float>::min();
for (i=0 ; i<SIZE ; i++)
{
if (a[i].distance > max && a[i].s == 0)
{
max = a[i].distance;
max_index = i;
}
}
return max_index;
}
//*********************
void dijkstra ( int v ,int g, float P[6][6] )
{
int u,i,j,k;
float NP[6][6];
char ch[6]={'A','B','C','D','E','F'};
node state[6];
for (i=0 ; i< SIZE ; i++)
for (j=0 ; j< SIZE ; j++)
{
NP[i][j]=P[i][j];
if (NP[i][j] == 0 && i!=j)
NP[i][j]=std::numeric_limits<float>::min(); // MIN_FLOAT
}
for (i=0 ; i< SIZE ; i++)
{
state[i].destination=i;
state[i].s=0;
state[i].distance=NP[v][i];
state[i].p = v+'0';
}
state[v].s=1;
for (j=1 ; j< SIZE ; j++)
{
u=max_distance(state);
state[u].s=1;
state[u].p += u+'0';
for (k=0 ; k< SIZE ; k++)
if (state[k].s == 0)
{
if ( NP[u][k] != std::numeric_limits<float>::min() )
if ( state[k].distance < state[u].distance + NP[u][k] )
{
state[k].distance = state[u].distance + NP[u][k];
state[k].p = state[u].p;
}
}
}
/*for (i=0 ;i< SIZE ; i++)
{
cout<<"From "<<ch[v]<<" (start state) to ";
cout<<ch[state[i].destination]<<'\t';
//cout<<state[i].s<<'\t';
cout<<"Distance: ";
cout<<(int)state[i].distance<<"\t\t";
cout<<"Rout:";
for (j=0 ; j<state[i].p.size(); j++)
cout<<' '<<ch[ state[i].p[j]-48 ];
cout<<endl;
}*/
cout<<"\nStart state to Goal state:\n\n";
cout<<"From "<<ch[v]<<"(Start state) to "<<ch[g]<<"(Goal state):\n\n";
cout<<"Cost: "<<(int)state[g].distance;
cout<<"\tRoute: ";
for (j=0 ; j<state[g].p.size(); j++)
cout<<' '<<ch[ state[g].p[j]-48 ]<<' ';
cout<<"\n\n";
system("pause");
}