Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allowing FSDP strategy for hpu accelerator #19704

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Expand Up @@ -447,6 +447,14 @@ def _choose_strategy(self) -> Union[Strategy, str]:
return "ddp_fork"
return "ddp"

def _is_hpu_accelerator(self) -> bool:
if _habana_available_and_importable():
from lightning_habana import HPUAccelerator

if isinstance(self._accelerator_flag, HPUAccelerator):
return True
return False

def _check_strategy_and_fallback(self) -> None:
"""Checks edge cases when the strategy selection was a string input, and we need to fall back to a different
choice depending on other parameters or the environment."""
Expand All @@ -455,10 +463,12 @@ def _check_strategy_and_fallback(self) -> None:
strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag

if (
strategy_flag in FSDPStrategy.get_registered_strategies() or isinstance(self._strategy_flag, FSDPStrategy)
) and self._accelerator_flag not in ("cuda", "gpu"):
(strategy_flag in FSDPStrategy.get_registered_strategies() or isinstance(self._strategy_flag, FSDPStrategy))
and self._accelerator_flag not in ("cuda", "gpu")
and not self._is_hpu_accelerator()
):
raise MisconfigurationException(
f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU accelerator is not used."
f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU or HPU accelerator is not used."
)
if strategy_flag in _DDP_FORK_ALIASES and "fork" not in torch.multiprocessing.get_all_start_methods():
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_pytorch/strategies/test_fsdp.py
Expand Up @@ -218,7 +218,7 @@ def test_invalid_on_cpu(tmp_path, cuda_count_0):
"""Test to ensure that we raise Misconfiguration for FSDP on CPU."""
with pytest.raises(
MisconfigurationException,
match=f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU accelerator is not used.",
match=f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU or HPU accelerator is not used.",
):
trainer = Trainer(accelerator="cpu", default_root_dir=tmp_path, fast_dev_run=True, strategy="fsdp")
assert isinstance(trainer.strategy, FSDPStrategy)
Expand Down
Expand Up @@ -566,7 +566,7 @@ def test_strategy_choice_ddp_cpu_slurm(cuda_count_0, strategy):
def test_check_fsdp_strategy_and_fallback():
with pytest.raises(
MisconfigurationException,
match=f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU accelerator is not used.",
match=f"You selected strategy to be `{FSDPStrategy.strategy_name}`, but GPU or HPU accelerator is not used.",
):
Trainer(accelerator="cpu", strategy="fsdp")

Expand Down