5. QLearning
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677// R[6][6] = Reward Matrix 보상 행렬#define MROW 6#define MLOW 4/*double R[MROW][MLOW] = { // double R[MROW][MLOW] = { // -> Action{ -1, -1, -1, -1, 0, -1 }, // { -1, -1, 0, -1 }, // 0 1 2 3 4 5 6{ -1, -1, -1, 0, -1, 100 }, // { -1, 0, -1, 100 }, // s 1{ -1..
더보기