From 151ba406ebf07c5274c99542549aaacd6f70ba24 Mon Sep 17 00:00:00 2001
From: Zhiqing Xiao <ZhiqingXiao@users.noreply.github.com>
Date: Thu, 1 Apr 2021 02:02:11 +0800
Subject: [PATCH] Revise the unattainable reward_threshold to an attainable
 value (#2205)

**Issues:**   The current `reward_threhold` for `FrozenLake-v0` and `FrozenLake8x8-v0` is too high to be attained.

Commit: https://github.com/openai/gym/commit/df515de07d0a0767b1b3c8c79e0c1386e0c3f172   @joschu

**Solution:**   Reduce the `reward_threhold` to make them attainable.

**Reference:**   Codes to calculate the theoretic optimal reward expectations:

```python
import gym
env = gym.make('FrozenLake-v0')
print(env.observation_space.n) # 16
print(env.action_space.n) # 4
print(env.spec.reward_threshold) # 0.78, should be smaller
print(env.spec.max_episode_steps) # 100

import numpy as np
v = np.zeros((101, 16), dtype=float)
q = np.zeros((101, 16, 4), dtype=float)
pi = np.zeros((101, 16), dtype=float)
for t in range(99, -1, -1): # backward
    for s in range(16):
        for a in range(4):
            for p, next_s, r, d in env.P[s][a]:
                q[t, s, a] += p * (r + (1. - float(d)) * v[t+1, next_s])
        v[t, s] = q[t, s].max()
        pi[t, s] = q[t, s].argmax()
print(v[0, 0]) # ~0.74 < 0.78
```

```python
import gym
env = gym.make('FrozenLake8x8-v0')
print(env.observation_space.n) # 64
print(env.action_space.n) # 4
print(env.spec.reward_threshold) # 0.99, should be smaller
print(env.spec.max_episode_steps) # 200

import numpy as np
v = np.zeros((201, 64), dtype=float)
q = np.zeros((201, 64, 4), dtype=float)
pi = np.zeros((201, 64), dtype=float)
for t in range(199, -1, -1): # backward
    for s in range(64):
        for a in range(4):
            for p, next_s, r, d in env.P[s][a]:
                q[t, s, a] += p * (r + (1. - float(d)) * v[t+1, next_s])
        v[t, s] = q[t, s].max()
        pi[t, s] = q[t, s].argmax()
print(v[0, 0]) # ~0.91 < 0.99
```
---
 gym/envs/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
index 98ba6238249..d4bc616253e 100644
--- a/gym/envs/__init__.py
+++ b/gym/envs/__init__.py
@@ -152,7 +152,7 @@
     entry_point='gym.envs.toy_text:FrozenLakeEnv',
     kwargs={'map_name' : '4x4'},
     max_episode_steps=100,
-    reward_threshold=0.78, # optimum = .8196
+    reward_threshold=0.70, # optimum = 0.74
 )
 
 register(
@@ -160,7 +160,7 @@
     entry_point='gym.envs.toy_text:FrozenLakeEnv',
     kwargs={'map_name' : '8x8'},
     max_episode_steps=200,
-    reward_threshold=0.99, # optimum = 1
+    reward_threshold=0.85, # optimum = 0.91
 )
 
 register(