1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | // R[6][6] = Reward Matrix 보상 행렬 #define MROW 6 #define MLOW 4 /* double R[MROW][MLOW] = { // double R[MROW][MLOW] = { // -> Action { -1, -1, -1, -1, 0, -1 }, // { -1, -1, 0, -1 }, // 0 1 2 3 4 5 6 { -1, -1, -1, 0, -1, 100 }, // { -1, 0, -1, 100 }, // s 1 { -1, -1, -1, 0, -1, -1 }, // { -1, 0, -1, -1 }, // t 2 { -1, 0, 0, -1, 0, -1 }, // { 0, -1, 0, -1 }, // a 3 { 0, -1, -1, 0, -1, 100 }, // { -1, 0, -1, 100 }, // t 4 { -1, 0, -1, -1, 0, 100 } // { -1, -1, 0, 100 } // e 5 }; // }; // 6 */ double Reward[MROW][MLOW] = { { -1, -1, 0, -1 }, { -1, 0, -1, 100 }, { -1, 0, -1, -1 }, { 0, -1, 0, -1 }, { -1, 0, -1, 100 }, { -1, -1, 0, 100 } }; // 학습한 결과를 나타낸 Q matrix (행동을 실제적으로 결정해주는 행렬) double QLearning[MROW][MLOW] = { 0, }; void print_matrix(double m[MROW][MLOW], int rows=MROW, int columns=MLOW) { for (int i = 0; i < rows; ++i) { for (int j = 0; j < columns; ++j) { std::cout << m[i][j] << "\t"; } std::cout << std::endl; } } double get_max_q(int state) { double temp_max = 0; for (int i = 0; i < MLOW; ++i) { if ((Reward[state][i] >= 0) && (QLearning[state][i] > temp_max)) { temp_max = QLearning[state][i]; } } return temp_max; } void main() { print_matrix(QLearning); double gamma = 0.8; int _Fcount = 0; bool is_goal = true; while (is_goal) { for (int state = 0; state < MROW; state++) { for (int action = 0; action < MLOW; action++) { if (Reward[state][action] == -1) { continue; } int next_state = action; double max_q = get_max_q(next_state); QLearning[state][action] = Reward[state][action] + gamma * max_q; } } _Fcount++; if (_Fcount == 100) { is_goal = false; } } std::cout << std::endl; print_matrix(QLearning); int iiiii = 0; } | cs |
'Programming > AI' 카테고리의 다른 글
4. TD(시간차 학습) (0) | 2017.12.19 |
---|---|
3. Markov Decision Processes ( MDP ) (0) | 2017.12.18 |
2. Markov Reward Processes (0) | 2017.12.18 |
1. Markov Processes ( Markov Chain ) (0) | 2017.12.18 |