Skip to content

Commit

Permalink
Cherrypick bug fixes to release_9_branch (#4617)
Browse files Browse the repository at this point in the history
* [bug-fix] Don't load non-wrapped policy (#4593)

* pin cattrs version

* cap PyTorch version

* use v2 action and pin python version (#4568)

Co-authored-by: Ervin T <ervin@unity3d.com>
Co-authored-by: Chris Elion <chris.elion@unity3d.com>
  • Loading branch information
3 people committed Nov 2, 2020
1 parent 223ecaf commit 79faafd
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 11 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
- uses: actions/setup-python@v2
with:
python-version: 3.7.x
- uses: actions/setup-ruby@v1
with:
ruby-version: '2.6'
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ jobs:
python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
- name: Save python dependencies
run: pip freeze > pip_versions-${{ matrix.python-version }}.txt
run: |
pip freeze > pip_versions-${{ matrix.python-version }}.txt
cat pip_versions-${{ matrix.python-version }}.txt
- name: Run pytest
run: pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=junit/test-results-${{ matrix.python-version }}.xml -p no:warnings
- name: Upload pytest test results
Expand Down
14 changes: 14 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).


## [Unreleased] - 2020-11-04
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)

## [1.5.0-preview] - 2020-10-14
### Major Changes
#### com.unity.ml-agents (C#)
Expand Down
16 changes: 10 additions & 6 deletions ml-agents/mlagents/trainers/ghost/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,11 @@ def get_step(self) -> int:
@property
def reward_buffer(self) -> Deque[float]:
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
return self.trainer.reward_buffer

@property
Expand Down Expand Up @@ -319,7 +319,6 @@ def create_policy(
policy = self.trainer.create_policy(
parsed_behavior_id, behavior_spec, create_graph=True
)
self.trainer.model_saver.initialize_or_load(policy)
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

Expand All @@ -337,6 +336,11 @@ def create_policy(
self._save_snapshot() # Need to save after trainer initializes policy
self._learning_team = self.controller.get_learning_team
self.wrapped_trainer_team = team_id
else:
# Load the weights of the ghost policy from the wrapped one
policy.load_weights(
self.trainer.get_policy(parsed_behavior_id).get_weights()
)
return policy

def add_policy(
Expand Down
44 changes: 44 additions & 0 deletions ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,50 @@ def test_load_and_set(dummy_config, use_discrete):
np.testing.assert_array_equal(w, lw)


def test_resume(dummy_config, tmp_path):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
tmp_path = tmp_path.as_posix()
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
)

parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)

parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)

trainer.save_model()

# Make a new trainer, check that the policies are the same
ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
trainer2 = GhostTrainer(
ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
)
policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
trainer2.add_policy(parsed_behavior_id0, policy)

policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
trainer2.add_policy(parsed_behavior_id1, policy)

trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
weights = trainer1_policy.get_weights()
weights2 = trainer2_policy.get_weights()

for w, lw in zip(weights, weights2):
np.testing.assert_array_equal(w, lw)


def test_process_trajectory(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
Expand Down
44 changes: 44 additions & 0 deletions ml-agents/mlagents/trainers/tests/torch/test_ghost.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,50 @@ def test_load_and_set(dummy_config, use_discrete):
np.testing.assert_array_equal(w, lw)


def test_resume(dummy_config, tmp_path):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
tmp_path = tmp_path.as_posix()
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
)

parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)

parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)

trainer.save_model()

# Make a new trainer, check that the policies are the same
ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
trainer2 = GhostTrainer(
ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
)
policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
trainer2.add_policy(parsed_behavior_id0, policy)

policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
trainer2.add_policy(parsed_behavior_id1, policy)

trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
weights = trainer1_policy.get_weights()
weights2 = trainer2_policy.get_weights()

for w, lw in zip(weights, weights2):
np.testing.assert_array_equal(w, lw)


def test_process_trajectory(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
Expand Down
5 changes: 3 additions & 2 deletions ml-agents/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def run(self):
"protobuf>=3.6",
"pyyaml>=3.1.0",
"tensorflow>=1.14,<3.0",
"cattrs>=1.0.0",
# cattrs 1.1.0 dropped support for python 3.6.
"cattrs>=1.0.0,<1.1.0",
"attrs>=19.3.0",
'pypiwin32==223;platform_system=="Windows"',
# We don't actually need six, but tensorflow does, and pip seems
Expand All @@ -79,5 +80,5 @@ def run(self):
]
},
cmdclass={"verify": VerifyVersionCommand},
extras_require={"torch": ["torch>=1.5.0"]},
extras_require={"torch": ["torch>=1.5.0,<1.7.0"]},
)
2 changes: 1 addition & 1 deletion test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ pytest-cov==2.6.1
pytest-xdist==1.34.0

# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0
torch>=1.5.0, <1.7.0

tf2onnx>=1.5.5

0 comments on commit 79faafd

Please sign in to comment.