Skip to content

Commit

Permalink
bugfix/index_chunks_not_updated (#216)
Browse files Browse the repository at this point in the history
Allow generate_event_index_pipeline to overwrite existing index-chunks
  • Loading branch information
conantp committed Oct 4, 2022
1 parent 889e01a commit 5bb5488
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 8 deletions.
6 changes: 5 additions & 1 deletion cdp_backend/file_store/functions.py
Expand Up @@ -67,6 +67,7 @@ def upload_file(
filepath: str,
save_name: Optional[str] = None,
remove_local: bool = False,
overwrite: bool = False,
) -> str:
"""
Uploads a file to a Google Cloud file store bucket.
Expand All @@ -88,6 +89,9 @@ def upload_file(
The name to save the file as in the file store.
remove_local: bool
If True, remove the local file upon successful upload.
overwrite: bool
Boolean value indicating whether or not to overwrite the remote resource with
the same name if it already exists.
Returns
-------
Expand All @@ -107,7 +111,7 @@ def upload_file(
uri = get_file_uri(bucket, save_name, credentials_file)

# Return existing uri and remove local copy if desired
if uri:
if uri and not overwrite:
if remove_local:
remove_local_file(resolved_filepath)

Expand Down
1 change: 1 addition & 0 deletions cdp_backend/pipeline/generate_event_index_pipeline.py
Expand Up @@ -383,6 +383,7 @@ def chunk_index(
bucket=bucket_name,
filepath=str(local_chunk_path),
save_name=f"{REMOTE_INDEX_CHUNK_DIR}/{save_filename}",
overwrite=True,
)


Expand Down
76 changes: 69 additions & 7 deletions cdp_backend/tests/file_store/test_functions.py
Expand Up @@ -17,7 +17,7 @@
BUCKET = "bucket"
FILEPATH = "fake/path/" + FILENAME
SAVE_NAME = "fakeSaveName"
EXISTING_FILE_URI = "gs://bucket/existing_file.json"
EXISTING_FILE_URI = "gs://bucket/" + SAVE_NAME
GCS_FILE_URI = functions.GCS_URI.format(bucket=BUCKET, filename=FILENAME)

###############################################################################
Expand Down Expand Up @@ -56,19 +56,76 @@ def test_get_file_uri(


@pytest.mark.parametrize(
"bucket, filepath, save_name, remove_local, existing_file_uri, expected",
"bucket, filepath, save_name, remove_local, overwrite, existing_file_uri, expected",
[
(BUCKET, FILEPATH, SAVE_NAME, True, EXISTING_FILE_URI, EXISTING_FILE_URI),
(BUCKET, FILEPATH, SAVE_NAME, False, EXISTING_FILE_URI, EXISTING_FILE_URI),
(BUCKET, FILEPATH, None, False, None, GCS_FILE_URI),
(BUCKET, FILEPATH, None, True, None, GCS_FILE_URI),
(
BUCKET,
FILEPATH,
SAVE_NAME,
True,
True,
EXISTING_FILE_URI,
EXISTING_FILE_URI,
),
(
BUCKET,
FILEPATH,
SAVE_NAME,
True,
True,
None,
EXISTING_FILE_URI,
),
(
BUCKET,
FILEPATH,
SAVE_NAME,
True,
False,
EXISTING_FILE_URI,
EXISTING_FILE_URI,
),
(
BUCKET,
FILEPATH,
SAVE_NAME,
False,
True,
EXISTING_FILE_URI,
EXISTING_FILE_URI,
),
(
BUCKET,
FILEPATH,
SAVE_NAME,
False,
True,
None,
EXISTING_FILE_URI,
),
(
BUCKET,
FILEPATH,
SAVE_NAME,
False,
False,
EXISTING_FILE_URI,
EXISTING_FILE_URI,
),
(BUCKET, FILEPATH, None, False, True, GCS_FILE_URI, GCS_FILE_URI),
(BUCKET, FILEPATH, None, False, True, None, GCS_FILE_URI),
(BUCKET, FILEPATH, None, False, False, None, GCS_FILE_URI),
(BUCKET, FILEPATH, None, True, True, GCS_FILE_URI, GCS_FILE_URI),
(BUCKET, FILEPATH, None, True, True, None, GCS_FILE_URI),
(BUCKET, FILEPATH, None, True, False, None, GCS_FILE_URI),
],
)
def test_upload_file(
bucket: str,
filepath: str,
save_name: Optional[str],
remove_local: bool,
overwrite: bool,
existing_file_uri: str,
expected: str,
) -> None:
Expand All @@ -82,7 +139,12 @@ def test_upload_file(
mock_path.return_value.name = FILENAME

assert expected == functions.upload_file(
"path/to/creds", bucket, filepath, save_name, remove_local
"path/to/creds",
bucket,
filepath,
save_name,
remove_local,
overwrite,
)


Expand Down

0 comments on commit 5bb5488

Please sign in to comment.