Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add neptune clear command #1091

Merged
merged 5 commits into from Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/neptune/new/cli/__main__.py
Expand Up @@ -17,6 +17,7 @@
import click
import pkg_resources

from neptune.new.cli.clear_command import clear
from neptune.new.cli.commands import (
status,
sync,
Expand All @@ -30,6 +31,7 @@ def main():

main.add_command(sync)
main.add_command(status)
main.add_command(clear)

plugins = {entry_point.name: entry_point for entry_point in pkg_resources.iter_entry_points("neptune.plugins")}

Expand Down
80 changes: 80 additions & 0 deletions src/neptune/new/cli/clear.py
@@ -0,0 +1,80 @@
#
# Copyright (c) 2022, Neptune Labs Sp. z o.o.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

__all__ = ["ClearRunner"]

import shutil
from pathlib import Path
from typing import Sequence

import click

from neptune.new.cli.abstract_backend_runner import AbstractBackendRunner
from neptune.new.cli.container_manager import ContainersManager
from neptune.new.cli.status import StatusRunner
from neptune.new.cli.utils import get_offline_dirs
from neptune.new.internal.backends.api_model import ApiExperiment
from neptune.new.internal.id_formats import UniqueId
from neptune.new.internal.utils.logger import logger


class ClearRunner(AbstractBackendRunner):
def clear(self, path: Path, force: bool = False):
container_manager = ContainersManager(self._backend, path)
synced_containers, unsynced_containers, not_found = container_manager.partition_containers_and_clean_junk(path)

offline_containers = get_offline_dirs(path)

ClearRunner.remove_containers(not_found)

if offline_containers or unsynced_containers:
self.log_junk_metadata(offline_containers, unsynced_containers)

if force or click.confirm("\nDo you want to delete the listed metadata?"):
PatrykGala marked this conversation as resolved.
Show resolved Hide resolved
self.remove_data(container_manager, offline_containers, unsynced_containers)

@staticmethod
def log_junk_metadata(offline_containers, unsynced_containers):
if unsynced_containers:
logger.info("")
StatusRunner.log_unsync_objects(unsynced_containers=unsynced_containers)
if offline_containers:
logger.info("")
StatusRunner.log_offline_objects(offline_dirs=offline_containers, info=False)

@staticmethod
def remove_data(
container_manager: ContainersManager,
offline_containers: Sequence[UniqueId],
unsynced_containers: Sequence[ApiExperiment],
):

offline_containers_paths = [container_manager.resolve_offline_container_dir(x) for x in offline_containers]
unsynced_containers_paths = [
container_manager.resolve_async_path(container) for container in unsynced_containers
]

ClearRunner.remove_containers(offline_containers_paths)
ClearRunner.remove_containers(unsynced_containers_paths)

@staticmethod
def remove_containers(paths):
for path in paths:
try:
shutil.rmtree(path)
logger.info(f"Deleted: {path}")
except OSError:
logger.warn(f"Cannot remove directory: {path}")
49 changes: 49 additions & 0 deletions src/neptune/new/cli/clear_command.py
@@ -0,0 +1,49 @@
#
# Copyright (c) 2022, Neptune Labs Sp. z o.o.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pathlib import Path

import click

from neptune.new.cli.clear import ClearRunner
from neptune.new.cli.path_option import path_option
from neptune.new.internal.backends.hosted_neptune_backend import HostedNeptuneBackend
from neptune.new.internal.credentials import Credentials

__all__ = ["clear"]


@click.command()
@path_option
def clear(path: Path):
PatrykGala marked this conversation as resolved.
Show resolved Hide resolved
"""
Clears metadata that has been synchronized or trashed, but is still present in local storage.

Lists objects and data to be cleared before deleting the data.

Examples:

\b
# Clear junk metadata from local storage
neptune clear

\b
# Clear junk metadata from directory "foo/bar"
neptune clear --path foo/bar
"""
backend = HostedNeptuneBackend(Credentials.from_token())
clear_runner = ClearRunner(backend=backend)

clear_runner.clear(path)
23 changes: 1 addition & 22 deletions src/neptune/new/cli/commands.py
Expand Up @@ -25,9 +25,9 @@
import click

from neptune.common.exceptions import NeptuneException # noqa: F401
from neptune.new.cli.path_option import path_option
from neptune.new.cli.status import StatusRunner
from neptune.new.cli.sync import SyncRunner
from neptune.new.constants import NEPTUNE_DATA_DIRECTORY
from neptune.new.exceptions import ( # noqa: F401
CannotSynchronizeOfflineRunsWithoutProject,
NeptuneConnectionLostException,
Expand All @@ -46,27 +46,6 @@
from neptune.new.internal.utils.logger import logger


def get_neptune_path(ctx, param, path: str) -> Path:
# check if path exists and contains a '.neptune' folder
path = Path(path)
if (path / NEPTUNE_DATA_DIRECTORY).is_dir():
return path / NEPTUNE_DATA_DIRECTORY
elif path.name == NEPTUNE_DATA_DIRECTORY and path.is_dir():
return path
else:
raise click.BadParameter("Path {} does not contain a '{}' folder.".format(path, NEPTUNE_DATA_DIRECTORY))


path_option = click.option(
"--path",
type=click.Path(exists=True, file_okay=False, resolve_path=True),
default=Path.cwd(),
callback=get_neptune_path,
metavar="<location>",
help="path to a directory containing a '.neptune' folder with stored objects",
)


@click.command()
@path_option
def status(path: Path) -> None:
Expand Down
83 changes: 83 additions & 0 deletions src/neptune/new/cli/container_manager.py
@@ -0,0 +1,83 @@
#
# Copyright (c) 2022, Neptune Labs Sp. z o.o.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

__all__ = ["ContainersManager"]

import abc
from pathlib import Path
from typing import (
List,
Tuple,
)

from neptune.new.cli.utils import (
get_metadata_container,
is_container_synced_and_remove_junk,
iterate_containers,
)
from neptune.new.constants import (
ASYNC_DIRECTORY,
OFFLINE_DIRECTORY,
SYNC_DIRECTORY,
)
from neptune.new.internal.backends.api_model import ApiExperiment
from neptune.new.internal.backends.neptune_backend import NeptuneBackend
from neptune.new.internal.id_formats import UniqueId


class ContainersManager(abc.ABC):
_backend: NeptuneBackend

def __init__(self, backend: NeptuneBackend, base_path: Path):
self._backend = backend
self._base_path = base_path

def partition_containers_and_clean_junk(
self,
base_path: Path,
) -> Tuple[List[ApiExperiment], List[ApiExperiment], List[Path]]:
synced_containers = []
unsynced_containers = []
not_found = []
async_path = base_path / ASYNC_DIRECTORY
for container_type, container_id, path in iterate_containers(async_path):
metadata_container = get_metadata_container(
backend=self._backend,
container_id=container_id,
container_type=container_type,
)
if metadata_container:
if is_container_synced_and_remove_junk(path):
synced_containers.append(metadata_container)
else:

unsynced_containers.append(metadata_container)
else:
not_found.append(path)

synced_containers = [obj for obj in synced_containers if obj]
unsynced_containers = [obj for obj in unsynced_containers if obj]

return synced_containers, unsynced_containers, not_found

def resolve_async_path(self, container: ApiExperiment) -> Path:
return self._base_path / ASYNC_DIRECTORY / container.type.create_dir_name(container.id)

def resolve_offline_container_dir(self, offline_id: UniqueId):
return self._base_path / OFFLINE_DIRECTORY / offline_id

def iterate_sync_containers(self):
return iterate_containers(self._base_path / SYNC_DIRECTORY)
58 changes: 58 additions & 0 deletions src/neptune/new/cli/path_option.py
@@ -0,0 +1,58 @@
#
# Copyright (c) 2022, Neptune Labs Sp. z o.o.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

__all__ = ["path_option"]

from pathlib import Path

import click

from neptune.common.exceptions import NeptuneException # noqa: F401
from neptune.new.constants import NEPTUNE_DATA_DIRECTORY
from neptune.new.exceptions import ( # noqa: F401
CannotSynchronizeOfflineRunsWithoutProject,
NeptuneConnectionLostException,
ProjectNotFound,
RunNotFound,
)
from neptune.new.internal.backends.api_model import ( # noqa: F401
ApiExperiment,
Project,
)
from neptune.new.internal.backends.neptune_backend import NeptuneBackend # noqa: F401
from neptune.new.internal.disk_queue import DiskQueue # noqa: F401
from neptune.new.internal.operation import Operation # noqa: F401


def get_neptune_path(ctx, param, path: str) -> Path:
# check if path exists and contains a '.neptune' folder
path = Path(path)
if (path / NEPTUNE_DATA_DIRECTORY).is_dir():
return path / NEPTUNE_DATA_DIRECTORY
elif path.name == NEPTUNE_DATA_DIRECTORY and path.is_dir():
return path
else:
raise click.BadParameter("Path {} does not contain a '{}' folder.".format(path, NEPTUNE_DATA_DIRECTORY))


path_option = click.option(
"--path",
type=click.Path(exists=True, file_okay=False, resolve_path=True),
default=Path.cwd(),
callback=get_neptune_path,
metavar="<location>",
help="path to a directory containing a '.neptune' folder with stored objects",
)