Skip to content

Commit

Permalink
test(artifacts): Use tmp_path fixture instead of writing local file…
Browse files Browse the repository at this point in the history
…s during tests (#4521)

* Use tmp_path fixture instead of writing to the filesystem in tests

* Remove unused imports
  • Loading branch information
moredatarequired authored and andrewtruong committed Dec 2, 2022
1 parent 58f6cfc commit 52823e6
Showing 1 changed file with 36 additions and 47 deletions.
83 changes: 36 additions & 47 deletions tests/unit_tests/test_wandb_artifacts_cache.py
@@ -1,6 +1,5 @@
import base64
import os
import pathlib
import random
from multiprocessing import Pool

Expand All @@ -18,9 +17,8 @@ def _cache_writer(cache_path):
f.write("".join(random.choice("0123456") for _ in range(10)))


def test_opener_rejects_append_mode():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_opener_rejects_append_mode(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)
path, exists, opener = cache.check_md5_obj_path(base64.b64encode(b"abcdef"), 10)

with pytest.raises(ValueError):
Expand All @@ -32,13 +30,12 @@ def test_opener_rejects_append_mode():
pass


def test_check_md5_obj_path():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_md5_obj_path(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

md5 = base64.b64encode(b"abcdef")
path, exists, opener = cache.check_md5_obj_path(md5, 10)
expected_path = os.path.join("cache", "obj", "md5", "61", "6263646566")
expected_path = os.path.join(tmp_path, "obj", "md5", "61", "6263646566")
with opener() as f:
f.write("hi")
with open(path) as f:
Expand All @@ -49,9 +46,8 @@ def test_check_md5_obj_path():
assert contents == "hi"


def test_check_etag_obj_path_points_to_opener_dst():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_etag_obj_path_points_to_opener_dst(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

path, exists, opener = cache.check_etag_obj_path("http://my/url", "abc", 10)

Expand All @@ -63,9 +59,8 @@ def test_check_etag_obj_path_points_to_opener_dst():
assert contents == "hi"


def test_check_etag_obj_path_returns_exists_if_exists():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_etag_obj_path_returns_exists_if_exists(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

size = 123
_, exists, opener = cache.check_etag_obj_path("http://my/url", "abc", size)
Expand All @@ -78,9 +73,8 @@ def test_check_etag_obj_path_returns_exists_if_exists():
assert exists


def test_check_etag_obj_path_returns_not_exists_if_incomplete():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_etag_obj_path_returns_not_exists_if_incomplete(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

size = 123
_, exists, opener = cache.check_etag_obj_path("http://my/url", "abc", size)
Expand All @@ -99,9 +93,8 @@ def test_check_etag_obj_path_returns_not_exists_if_incomplete():
assert exists


def test_check_etag_obj_path_does_not_include_etag():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_etag_obj_path_does_not_include_etag(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

path, _, _ = cache.check_etag_obj_path("http://url/1", "abcdef", 10)
assert "abcdef" not in path
Expand All @@ -115,9 +108,10 @@ def test_check_etag_obj_path_does_not_include_etag():
("http://url/1", "http://url/2", "abc", "abc", False),
],
)
def test_check_etag_obj_path_hashes_url_and_etag(url1, url2, etag1, etag2, path_equal):
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_check_etag_obj_path_hashes_url_and_etag(
url1, url2, etag1, etag2, path_equal, tmp_path
):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

path_1, _, _ = cache.check_etag_obj_path(url1, etag1, 10)
path_2, _, _ = cache.check_etag_obj_path(url2, etag2, 10)
Expand All @@ -128,34 +122,29 @@ def test_check_etag_obj_path_hashes_url_and_etag(url1, url2, etag1, etag2, path_
assert path_1 != path_2


def test_check_write_parallel(runner):
with runner.isolated_filesystem() as t:
cache = os.path.join(t, "cache")
num_parallel = 5
def test_check_write_parallel(tmp_path):
num_parallel = 5

p = Pool(num_parallel)
p.map(_cache_writer, [cache for _ in range(num_parallel)])
_cache_writer(cache) # run in this process too for code coverage
p.close()
p.join()
p = Pool(num_parallel)
p.map(_cache_writer, [tmp_path for _ in range(num_parallel)])
_cache_writer(tmp_path) # run in this process too for code coverage
p.close()
p.join()

# Regardless of the ordering, we should be left with one
# file at the end.
files = [
f for f in (pathlib.Path(cache) / "obj" / "etag").rglob("*") if f.is_file()
]
assert len(files) == 1
# Regardless of the ordering, we should be left with one
# file at the end.
files = [f for f in (tmp_path / "obj" / "etag").rglob("*") if f.is_file()]
assert len(files) == 1


def test_artifacts_cache_cleanup_empty():
os.mkdir("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
def test_artifacts_cache_cleanup_empty(tmp_path):
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)
reclaimed_bytes = cache.cleanup(100000)
assert reclaimed_bytes == 0


def test_artifacts_cache_cleanup():
cache_root = os.path.join("cache", "obj", "md5")
def test_artifacts_cache_cleanup(tmp_path):
cache_root = os.path.join(tmp_path, "obj", "md5")

path_1 = os.path.join(cache_root, "aa")
os.makedirs(path_1)
Expand All @@ -178,20 +167,20 @@ def test_artifacts_cache_cleanup():
f.flush()
os.fsync(f)

cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)
reclaimed_bytes = cache.cleanup(5000)

# We should get rid of "aardvark" in this case
assert reclaimed_bytes == 5000


def test_artifacts_cache_cleanup_tmp_files():
path = os.path.join("cache", "obj", "md5", "aa")
def test_artifacts_cache_cleanup_tmp_files(tmp_path):
path = os.path.join(tmp_path, "obj", "md5", "aa")
os.makedirs(path)
with open(os.path.join(path, "tmp_abc"), "w") as f:
f.truncate(1000)

cache = wandb_sdk.wandb_artifacts.ArtifactsCache("cache")
cache = wandb_sdk.wandb_artifacts.ArtifactsCache(tmp_path)

# Even if we are above our target size, the cleanup
# should reclaim tmp files.
Expand Down

0 comments on commit 52823e6

Please sign in to comment.