Skip to content

Commit

Permalink
Ensure zips from different workflows don't clash
Browse files Browse the repository at this point in the history
  • Loading branch information
ZainRizvi committed May 5, 2023
1 parent 5d0e8e2 commit d671cef
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 28 deletions.
21 changes: 3 additions & 18 deletions .github/scripts/pytest_cache.py
Expand Up @@ -25,7 +25,7 @@ def main():
)
parser.add_argument("--pr_identifier", required=True, help="A unique PR identifier")
parser.add_argument("--workflow", required=True, help="The workflow name")
parser.add_argument("--job", required=True, help="The job name")
parser.add_argument("--job_identifier", required=True, help="A unique job identifier that should be the same for all runs of job")
parser.add_argument(
"--shard", required="--upload" in sys.argv, help="The shard id"
) # Only required for upload
Expand All @@ -50,23 +50,9 @@ def main():
# TODO: First check if it's even worth uploading a new cache:
# Does the cache even mark any failed tests?

id = os.getenv("AWS_ACCESS_KEY_ID")
# get the first three chars if it's not none
if id:
id = id[:3]
print(f"Access key id prefix: {id}xxxxxxxxxx")
else:
print("No access key id found")

if os.getenv("AWS_SECRET_ACCESS_KEY"):
print("Secret access key found")
else:
print("No secret access key found")

upload_pytest_cache(
pr_identifier=PRIdentifier(args.pr_identifier),
workflow=args.workflow,
job=args.job,
job_identifier=args.job_identifier,
shard=args.shard,
cache_dir=args.cache_dir,
bucket=args.bucket,
Expand All @@ -77,8 +63,7 @@ def main():
print(f"Downloading cache with args {args}")
download_pytest_cache(
pr_identifier=PRIdentifier(args.pr_identifier),
workflow=args.workflow,
job=args.job,
job_identifier=args.job_identifier,
dest_cache_dir=args.cache_dir,
bucket=args.bucket,
temp_dir=args.temp_dir,
Expand Down
18 changes: 8 additions & 10 deletions .github/scripts/pytest_caching_utils.py
Expand Up @@ -30,13 +30,13 @@ def __new__(cls, value):


def get_s3_key_prefix(
pr_identifier: PRIdentifier, workflow: str, job: str, shard: str = None
pr_identifier: PRIdentifier, job_identifier: str, shard: str = None
):
"""
The prefix to any S3 object key for a pytest cache. It's only a prefix though, not a full path to an object.
For example, it won't include the file extension.
"""
prefix = f"{PYTEST_CACHE_KEY_PREFIX}/{pr_identifier}/{sanitize_for_s3(workflow)}/{sanitize_for_s3(job)}"
prefix = f"{PYTEST_CACHE_KEY_PREFIX}/{pr_identifier}/{sanitize_for_s3(job_identifier)}"

if shard:
prefix += f"/{shard}"
Expand All @@ -51,8 +51,7 @@ def get_s3_key_prefix(
# However, in the short term the extra donloads are okay since they aren't that big
def upload_pytest_cache(
pr_identifier: PRIdentifier,
workflow: str,
job: str,
job_identifier: str,
shard: str,
cache_dir: str,
bucket: str = BUCKET,
Expand All @@ -75,7 +74,7 @@ def upload_pytest_cache(
if not temp_dir:
temp_dir = TEMP_DIR

obj_key_prefix = get_s3_key_prefix(pr_identifier, workflow, job, shard)
obj_key_prefix = get_s3_key_prefix(pr_identifier, job_identifier, shard)
zip_file_path_base = (
f"{temp_dir}/zip-upload/{obj_key_prefix}" # doesn't include the extension
)
Expand All @@ -95,8 +94,7 @@ def upload_pytest_cache(

def download_pytest_cache(
pr_identifier: PRIdentifier,
workflow: str,
job: str,
job_identifier: str,
dest_cache_dir: str,
bucket: str = BUCKET,
temp_dir: str = TEMP_DIR,
Expand All @@ -111,7 +109,7 @@ def download_pytest_cache(
f"pr_identifier must be of type PRIdentifier, not {type(pr_identifier)}"
)

obj_key_prefix = get_s3_key_prefix(pr_identifier, workflow, job)
obj_key_prefix = get_s3_key_prefix(pr_identifier, job_identifier)

zip_download_dir = f"{temp_dir}/cache-zip-downloads/{obj_key_prefix}"
# do the following in a try/finally block so we can clean up the temp files if something goes wrong
Expand All @@ -126,13 +124,13 @@ def download_pytest_cache(
shard_id = os.path.splitext(os.path.basename(downloaded_zip))[0]
cache_dir_for_shard = os.path.join(
f"{temp_dir}/unzipped-caches",
get_s3_key_prefix(pr_identifier, workflow, job, shard_id),
get_s3_key_prefix(pr_identifier, job_identifier, shard_id),
PYTEST_CACHE_DIR_NAME,
)

unzip_folder(downloaded_zip, cache_dir_for_shard)
print(
f"Merging cache for job {job} shard {shard_id} into {dest_cache_dir}"
f"Merging cache for job_identifier `{job_identifier}`, shard `{shard_id}` into `{dest_cache_dir}`"
)
merge_pytest_caches(cache_dir_for_shard, dest_cache_dir)
finally:
Expand Down

0 comments on commit d671cef

Please sign in to comment.