From 151ba406ebf07c5274c99542549aaacd6f70ba24 Mon Sep 17 00:00:00 2001 From: Zhiqing Xiao Date: Thu, 1 Apr 2021 02:02:11 +0800 Subject: [PATCH] Revise the unattainable reward_threshold to an attainable value (#2205) **Issues:** The current `reward_threhold` for `FrozenLake-v0` and `FrozenLake8x8-v0` is too high to be attained. Commit: https://github.com/openai/gym/commit/df515de07d0a0767b1b3c8c79e0c1386e0c3f172 @joschu **Solution:** Reduce the `reward_threhold` to make them attainable. **Reference:** Codes to calculate the theoretic optimal reward expectations: ```python import gym env = gym.make('FrozenLake-v0') print(env.observation_space.n) # 16 print(env.action_space.n) # 4 print(env.spec.reward_threshold) # 0.78, should be smaller print(env.spec.max_episode_steps) # 100 import numpy as np v = np.zeros((101, 16), dtype=float) q = np.zeros((101, 16, 4), dtype=float) pi = np.zeros((101, 16), dtype=float) for t in range(99, -1, -1): # backward for s in range(16): for a in range(4): for p, next_s, r, d in env.P[s][a]: q[t, s, a] += p * (r + (1. - float(d)) * v[t+1, next_s]) v[t, s] = q[t, s].max() pi[t, s] = q[t, s].argmax() print(v[0, 0]) # ~0.74 < 0.78 ``` ```python import gym env = gym.make('FrozenLake8x8-v0') print(env.observation_space.n) # 64 print(env.action_space.n) # 4 print(env.spec.reward_threshold) # 0.99, should be smaller print(env.spec.max_episode_steps) # 200 import numpy as np v = np.zeros((201, 64), dtype=float) q = np.zeros((201, 64, 4), dtype=float) pi = np.zeros((201, 64), dtype=float) for t in range(199, -1, -1): # backward for s in range(64): for a in range(4): for p, next_s, r, d in env.P[s][a]: q[t, s, a] += p * (r + (1. - float(d)) * v[t+1, next_s]) v[t, s] = q[t, s].max() pi[t, s] = q[t, s].argmax() print(v[0, 0]) # ~0.91 < 0.99 ``` --- gym/envs/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index 98ba6238249..d4bc616253e 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -152,7 +152,7 @@ entry_point='gym.envs.toy_text:FrozenLakeEnv', kwargs={'map_name' : '4x4'}, max_episode_steps=100, - reward_threshold=0.78, # optimum = .8196 + reward_threshold=0.70, # optimum = 0.74 ) register( @@ -160,7 +160,7 @@ entry_point='gym.envs.toy_text:FrozenLakeEnv', kwargs={'map_name' : '8x8'}, max_episode_steps=200, - reward_threshold=0.99, # optimum = 1 + reward_threshold=0.85, # optimum = 0.91 ) register(