Urinx
diff --git a/‎QLearning/QLearning_Taxi_v2.py
+64 b/‎QLearning/QLearning_Taxi_v2.py
+64
diff --git a/‎README.md
+11-1 b/‎README.md
+11-1
diff --git a/‎imgs/taxi1.png
3.26 KB b/‎imgs/taxi1.png
3.26 KB
diff --git a/‎imgs/taxi2.png
2.19 KB b/‎imgs/taxi2.png
2.19 KB
diff --git a/‎imgs/taxi3.png
1.71 KB b/‎imgs/taxi3.png
1.71 KB
diff --git a/‎imgs/taxi4.png
1.68 KB b/‎imgs/taxi4.png
1.68 KB
diff --git a/‎imgs/taxi5.png
1.66 KB b/‎imgs/taxi5.png
1.66 KB
diff --git a/‎imgs/taxi6.png
1.61 KB b/‎imgs/taxi6.png
1.61 KB
@@ -0,0 +1,64 @@
+import numpy as np
+import gym
+import random
+
+env = gym.make("Taxi-v2")
+
+action_size = env.action_space.n
+state_size = env.observation_space.n
+qtable = np.zeros((state_size, action_size))
+
+# Hyperparameters
+total_episodes = 50000
+total_test_episodes = 100
+max_steps = 99
+learning_rate = 0.7
+gamma = 0.618
+epsilon = 1.0
+max_epsilon = 1.0
+min_epsilon = 0.01
+decay_rate = 0.01
+
+# Train
+for episode in range(total_episodes):
+    state = env.reset()
+    
+    for step in range(max_steps):
+        exp_exp_tradeoff = random.uniform(0, 1)
+        if exp_exp_tradeoff > epsilon:
+            action = np.argmax(qtable[state, :])
+        else:
+            action = env.action_space.sample()
+
+        new_state, reward, done, info = env.step(action)
+        qtable[state, action] += learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
+        
+        state = new_state
+        if done: break
+
+    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * (episode+1))
+
+
+# Play the Game
+rewards = []
+for episode in range(total_test_episodes):
+    state = env.reset()
+    total_rewards = 0
+
+    print('='*20)
+    print("[*] Episode", episode)
+    print('='*20)
+
+    for step in range(max_steps):
+        env.render()
+        action = np.argmax(qtable[state, :])
+        state, reward, done, info = env.step(action)
+        total_rewards += reward
+
+        if done:
+            rewards.append(total_rewards)
+            print('[*] Score', total_rewards)
+            break
+
+env.close()
+print('[*] Average Score: ' + str(sum(rewards) / total_test_episodes))
@@ -85,8 +85,18 @@ players draw rate: 0.09528
 
 Taxi v2
 -------
-[to be done]
 
+<div align=center>
+	<img width="93" height="133" src="imgs/taxi1.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi2.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi3.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi4.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi5.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi6.png" alt="Taxi v2">
+	<img width="93" height="133" src="imgs/taxi7.png" alt="Taxi v2">
+</div>
+
+基于 `Q-Learning` 玩 `Taxi v2` 游戏：[[code]](QLearning/QLearning_Taxi_v2.py)
 
 
 [0]. [Diving deeper into Reinforcement Learning with Q-Learning](https://medium.freecodecamp.org/diving-deeper-into-reinforcement-learning-with-q-learning-c18d0db58efe)<br/>