Skip to content

Commit 8f49055

Browse files
authored
Merge pull request #259 from njustesen/fix/a2c-illegal-action
Hotfix to weird multinomial bug
2 parents 5bdfde6 + 4ebe711 commit 8f49055

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

examples/a2c/a2c_agent.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ def forward(self, spatial_input, non_spatial_input):
8888
def act(self, spatial_inputs, non_spatial_input, action_mask):
8989
values, action_probs = self.get_action_probs(spatial_inputs, non_spatial_input, action_mask=action_mask)
9090
actions = action_probs.multinomial(1)
91+
# In rare cases, multinomial can sample an action with p=0, so let's avoid that
92+
for i, action in enumerate(actions):
93+
correct_action = action
94+
while not action_mask[i][correct_action]:
95+
correct_action = action_probs[i].multinomial(1)
96+
actions[i] = correct_action
9197
return values, actions
9298

9399
def evaluate_actions(self, spatial_inputs, non_spatial_input, actions, actions_mask):

0 commit comments

Comments
 (0)