Acrob navigation in maze using neural network (Kundlová, Jurenka)

From RoboWiki
Jump to: navigation, search
Rozlozenie senzorov

Zadanie

Našou úlohou bolo natrénovať neuronóvú sieť určenú na navigáciu robota na čiare v bludisku.

Konštrukcia

Používame klasického robota Acrob, s dvoma servo motormi. Do prednej časti sme umiestnili štryri senzori, z ktorých tri (ľavý , pravý a stredný) ležia na priamke kolmo na smer pohybu a štvrtý (predný), leží vpredu na priamke tvorenej stredom robota a stredným senzorom.


Neurónová sieť

Vstup zo 4 senozorov sa mapuje na 1(čierna) alebo 0(biela) a slúži ako vstupná vrstva neurónovej siete.
Použili sme jednu skrytú vrstvu s piatimi neurónmi, z ktorých každý dostáva vstup z každého neuónu zo vstupnej vrstvy.
Výstupná vrstva obsahuje päť neurónov zodpovedajúcim akciám:

  1. Vpred
  2. Vpravo
  3. Vľavo
  4. Vpred a vpravo
  5. Vpred a vľavo

Zodpovedajúca akcia je potom určná algoritmom forward feed.

Backpropagation v Matlabe

Okrem architektúry charakterizujú neurónovú sieť aj váhy jednotlivých spojení.
Maticu váh medzi vstupnou a skytovou vrstvou označíme Theta1 a maticu medzi skyrtou a výstupnou vrstvou označíme Theta2.


Tréning


Neurónovú sieť trénujeme na vstupných dátach:

       X = [
       % left center right front     
       0 0 0 0; %right 
       0 0 0 1; %forward
       
       0 0 1 0; %right
       0 0 1 1; %forward
       
       0 1 0 0; %forward
       0 1 0 1; %forward
       
       1 0 0 0; %left
       1 0 0 1; %forward
       
       0 1 1 0; %fw-right
       0 1 1 1; %right
       
       1 0 1 0; %fw-right
       1 0 1 1; %foward
       
       1 1 0 0; %fw-left
       1 1 0 1; %forward
       
       1 1 1 0; %right
       1 1 1 1; %right
       
           ];

Požadovaný výstup je:

       y = [
         2;
         1;
       
         2;
         1;
       
         1;
         1;
       
         3;
         1;
       
         4;
         1;
       
         4;
         1;
       
         5;
         1;
       
         2;
         2;
       ];

Aby sme mohli vhodne upraviť váhy, potrebujeme určiť aktuálnu chybu a gradient, na čo slúži backpropagation algoritmus, ktorý vyzerá nasledovne:

   function [J, grad] = nnCostFunction(nn_params, ...
                                          input_layer_size, ...
                                      hidden_layer_size, ...
                                      num_labels, ...
                                      X, y, lambda)
       Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
                        hidden_layer_size, (input_layer_size + 1));
       Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
                        num_labels, (hidden_layer_size + 1));
       m = size(X, 1);
       J = 0;
       Theta1_grad = zeros(size(Theta1));
       Theta2_grad = zeros(size(Theta2));
       X = [ones(m, 1) X];
       a1 = X;
       z2 = a1*Theta1';
       a2 = sigmoid(z2);
       a2 = [ones(size(a2,1), 1) a2];
       z3 = a2*Theta2';
       a3 = sigmoid(z3);
       h = a3;
       y1 = zeros(size(y,1), num_labels);
       for i=1:size(y)
           y1(i, y(i)) = 1;
       end
       J = 1/m * sum(sum(((-y1).* log(h) - (1-y1).* log(1-h))));
       temp1 = Theta1;
       temp1(:,1) = zeros(size(temp1,1),1); 
       temp2 = Theta2;
       temp2(:,1) = zeros(size(temp2,1),1); 
       J = J + lambda / (2*m) * ( ...
               sum(sum(temp1 .*temp1)) + sum(sum(temp2.*temp2)) ...
           );
       for t = 1:m
           a1 = X(t,:);
           z2 = a1*Theta1';
           a2 = sigmoid(z2);
           a2 = [ones(size(a2,1), 1) a2];
           z3 = a2*Theta2';
           a3 = sigmoid(z3);
           yk = zeros(num_labels,1); 
           yk(y(t)) = 1;
           d3 = a3' - yk;
           d2 = (Theta2'*d3).*[ones(1,1); sigmoidGradient(z2')];
           d2 = d2(2:end);
           Theta1_grad = Theta1_grad + d2*a1;			
           Theta2_grad = Theta2_grad + d3*a2;
       end
       Theta1_grad = Theta1_grad / m;
       Theta2_grad = Theta2_grad / m;
       Theta1_grad = Theta1_grad + ( (lambda/m) .* [ zeros( hidden_layer_size, 1 ), Theta1( :, 2:end ) ] ); 
       Theta2_grad = Theta2_grad + ( (lambda/m) .* [ zeros( num_labels, 1 ), Theta2( :, 2:end ) ] ); 
       grad = [Theta1_grad(:) ; Theta2_grad(:)];
   end


Simulácia v Jave

Predtým než sme neurónovú sieť nasadili na robota sme sa rozhodli vytvoriť kompletnú simuláciu v Jave, kde sme mohli otestovať riadenie na ľubovoľnej .
Video zo simulácie:

Zdrojové kódy nájdete tu

Arduino

Acrob robot

Arduino nemá knižnicu na prácu s maticami, nepozná viacrozmerné polia a dokonca ani dynamickú alokáciu pamäte.
Bolo potrebné doplniť funkcie pre základné maticové operácie, ako aj funkciu pre počítanie sigmoidy.
Museli sme teda konštantami definovať veľkosti jednotlivých matíc, prípadne ich posielať ako parametre do funkcii.
Rovnako sme implementovali forward feed.

   #include <Servo.h>
   Servo LeftServo;  
   Servo RightServo;
   double THETA_1[5][5] = {
   {   -3.3015,   -3.2262,    8.4318,   -4.8450,   15.3742},
   {   -4.7470,   -0.3573,   -9.7632,   -9.3562,   18.5839},
   {  -13.2009,    8.7361,    8.5763,    7.5276,   10.6005},
   {    7.0469,  -10.8166,   -9.0304,   -4.0693,   13.2435},
   {    2.6464,   -6.6205,   -2.8116,   11.4895,    8.3321}
   };    
   double THETA_2[5][6] = {
   { -12.1840,   25.3169,   27.4735,  -23.5952,   -0.5421,   -2.3738},
   { -14.6229,   -2.5615,  -26.4061,   -0.6488,   27.8756,    1.7607},
   {  12.8366,  -16.8083,   -3.1984,  -14.8165,  -10.5194,  -16.8826},
   { -17.2910,   -4.2473,  -10.6967,    8.6676,  -24.9129,   25.1679},
   { -12.0564,    3.1205,   -3.9859,   21.9216,   -4.5046,  -25.5430}
   };
   char* ACTION_NAMES[]={
     "ERROR",
     "FORWARD",
     "RIGHT",
     "LEFT",
     "FORWARD RIGHT",
     "FORWARD LEFT"
   };
   double LINE_TRESHOLD = 0.5;
   #define FW_SPEED 1.0
   #define BIG_ROT_AMOUNT 1.0
   #define SLOWDOWN 1
   #define SMALL_ROT_AMOUNT 1
   #define LEFT_SENSOR     3
   #define RIGHT_SENSOR    2
   #define CENTER_SENSOR   0
   #define FRONT_SENSOR    1
   #define TRESHOLD 500
   double ACTIONS[6][2] = {
     {0,0},
     {FW_SPEED, 0},
     {0, -BIG_ROT_AMOUNT },
     {0, BIG_ROT_AMOUNT },
     {/*FW_SPEED / SLOWDOWN*/0, -SMALL_ROT_AMOUNT},
     {/*FW_SPEED / SLOWDOWN*/0, SMALL_ROT_AMOUNT}
   } ;
   #define INPUT_SIZE 4
   #define Euler 2.71828182845904523536
   #define THETA_1_ROWS 5
   #define THETA_1_COLUMNS 5
   #define THETA_2_ROWS 5
   #define THETA_2_COLUMNS 6
   int REPEAT_1[INPUT_SIZE] = {0,0,0,0};
   int REPEAT_2[INPUT_SIZE] = {1,1,1,0};
   int last = 1;
   boolean compareArrays(int  a[], int b[]){
     for(int i =0 ; i < INPUT_SIZE; i++){
       if(a[i] != b[i]){
         return false;
       }
     }
     return true;
   }
   void transpose(double * matrix, double * result , int rr, int cc){
       for(int r =0 ; r <  rr; r++){
        for(int c =0 ; c <  cc; c++){
         result[c * rr  + r] = matrix[r * cc + c];
       }
     }
   }
   int feed(int input[]) {
       if (last != 1 && ( compareArrays(input, REPEAT_1) || compareArrays (input, REPEAT_2)  )) {
           return last;
       }
       double x[1][INPUT_SIZE];
       for (int i = 0; i < INPUT_SIZE; i++) {
           x[0][i] = input[i];
       }
       int m = 1;
       double ones[m][1];
       for (int i = 0; i < m; i++) {
           ones[i][0] = 1;
       }
       double t1t[THETA_1_COLUMNS][THETA_1_ROWS];
       transpose((double*)THETA_1, (double*)t1t , THETA_1_ROWS, THETA_1_COLUMNS);
       double t2t[THETA_2_COLUMNS][THETA_2_ROWS];
       transpose((double*)THETA_2, (double*)t2t, THETA_2_ROWS, THETA_2_COLUMNS);
       double a0[m][INPUT_SIZE+1];
       int params[6];
       params[0] = m;
       params[1] = INPUT_SIZE+1;
       params[2] = m;
       params[3] = 1;
       params[4] = 0;
       params[5] = 0;
       combineMatrix((double*)ones, (double*)a0, params);
       params[2] = m;
       params[3] = INPUT_SIZE;
       params[4] = 0;
       params[5] = 1;
       combineMatrix((double*)x, (double*)a0, params);
       double h1[m][THETA_1_ROWS];
       zeroMatrix((double*)h1, m, THETA_1_ROWS);
       int mparams[4];
       mparams[0] = m;
       mparams[1] = INPUT_SIZE + 1;
       mparams[2] = THETA_1_COLUMNS;
       mparams[3] = THETA_1_ROWS;
       multiply((double*)a0, (double*)t1t, (double*)h1, mparams);
       sigmoid((double*)h1,(double*)h1 , m , THETA_1_ROWS);
       double a1[m][THETA_1_ROWS+1];
       params[0] = m;
       params[1] = THETA_1_ROWS+1;
       params[2] = m;
       params[3] = 1;
       params[4] = 0;
       params[5] = 0;
       combineMatrix((double*)ones, (double*)a1, params);
       params[2] = m;
       params[3] = THETA_1_ROWS;
       params[4] = 0;
       params[5] = 1;
       combineMatrix((double*)h1, (double*)a1, params);
       double h2[m][THETA_2_COLUMNS];
       zeroMatrix((double*)h2, m, THETA_2_ROWS);
       mparams[0] = m;
       mparams[1] = THETA_1_ROWS + 1;
       mparams[2] = THETA_2_COLUMNS;
       mparams[3] = THETA_2_ROWS;
       multiply((double*)a1, (double*)t2t, (double*)h2, mparams);
       sigmoid((double*)h2,(double*)h2 , m , THETA_2_ROWS);
       int actionIndex = maxValue((double*)h2, THETA_2_ROWS * m) + 1;
       last = actionIndex;
       return actionIndex;
   }
    double sigmoid(double * matrix, double * result, int rr, int cc){
        for(int r =0 ; r < rr; r++){
        for(int c =0 ; c < cc; c++){
         result[r*cc + c] = 1.0/(1.0+exp(-matrix[r*cc + c]));
       }
     }
   }
   void copyMatrix(double * matrix, double * result, int rr, int cc){
     int param[6];
     param[0] = rr;
     param[1] = cc;
     param[2] = rr;
     param[3] = cc;
     param[4] = 0;
     param[5] = 0;
     combineMatrix(matrix, result, param);
   }
   void combineMatrix(double * matrix, double * result, int param[6]){
     int lr = param[0];
     int lc = param[1];
     int sr = param[2];
     int sc = param[3];
     int ro = param[4];
     int co = param[5];
     for(int r =0 ; r <  sr; r++){
        for(int c =0 ; c < sc; c++){
         result[(r + ro ) * lc + co + c] = matrix[r * sc + c];
       }
     }
   }
   int maxValue(double* A, int length){
     int mValue = 0;
     for(int i=0; i<length; i++){
       if(A[mValue] <= A[i]){
         mValue = i;
       }
     }
     return mValue;
   } 
   void multiply(double * A, double * B, double * C, int dim[4]) {
         /*int mA = A.length;
         int nA = A[0].length;
         int mB = B.length;
         int nB = A[0].length;*/
         int mA = dim[0];
         int nA = dim[1];
         int mB = dim[2];
         int nB = dim[3];
         if (nA != mB){
               Serial.print("Multiplication error ");
               Serial.print(nA);
               Serial.print(" ");
               Serial.println(mB);
         }
         for (int i = 0; i < mA; i++){
             for (int j = 0; j < nB; j++){
                 for (int k = 0; k < nA; k++){
                      C[i*nB+j] += (A[i*nA+k] * B[k*nB+j]);
                   }
             }
         }
   }
   void printMatrix(double * matrix, int rr, int cc){
     for(int r =0 ; r <  rr; r++){
        for(int c =0 ; c < cc; c++){
         Serial.print(matrix[r*cc+c]);
         Serial.print(" ");
       }
       Serial.println();
     }
       Serial.println();
    }
   void zeroMatrix(double * matrix, int rr, int cc){
     for(int r =0 ; r <  rr; r++){
        for(int c =0 ; c < cc; c++){
         matrix[r*cc+c] = 0;
       }
     }
   }
   void setup() {
      Serial.begin(9600);
      int input[4];
      input[0] = 0;
      input[1] = 0;
      input[2] = 0;
      input[3] = 0;   
      int actionIndex = feed(input);
      Serial.println(actionIndex);
      LeftServo.attach(9);
      RightServo.attach(10);
   }
   int convert(long val){
      if(val >= TRESHOLD){
        return 1;
      }
      return 0;
   }
   void fw(double left, double right){
      double scale = 1;
      int lAmmount = 90 - round(left*scale);
      int rAmmount = 90 + round(right*scale);
      Serial.print("move ");
      Serial.print(lAmmount);
      Serial.print(" ");
      Serial.println(rAmmount);
      LeftServo.write(lAmmount);   
      RightServo.write(rAmmount); 
   }
   void loop() {
     long leftVal = analogRead(LEFT_SENSOR);
     long rightVal = analogRead(RIGHT_SENSOR);
     long centerVal = analogRead(CENTER_SENSOR);
     long frontVal = analogRead(FRONT_SENSOR);
     Serial.print("left: ");  
     Serial.print(leftVal, DEC);  
     Serial.print(" right: ");  
     Serial.print(rightVal, DEC);  
     Serial.print(" center: ");  
     Serial.print(centerVal, DEC);  
     Serial.print(" front: ");  
     Serial.println(frontVal, DEC);  
     int input[4];
     input[0] = convert(leftVal); 
     input[1] = convert(centerVal); 
     input[2] = convert(rightVal); 
     input[3] = convert(frontVal); 
     int action = feed(input);
     Serial.println(ACTION_NAMES[action]);
     double * move = ACTIONS[action];
     double left = move[0] + move[1];
     double right = move[0] - move[1];
     fw(left, right);
     delay(1000);
   }


Video



Záver

Robot demonštroval pohyb bludiskom zodpovedajúci trénovacím dátam neurónovej siete, čo je pekne vidieť na natočenom videu. Vďaka našej implementácii neurónovej siete v prostredí Arduino je robot autonómny a nepotrebuje k pohybu dodatočné výpočtové zariadenie.