Skip to content

Commit

Permalink
feature/support-processing-m3u8-video-playlist-uris (#188)
Browse files Browse the repository at this point in the history
* Install m3u8-to-mp4 in setup

* Handle and convert m3u8 as part of resource copy

* In case of m3u8, cdp will host

* Hash instead of uuid

* Add tests

* Lint and format

* Revert to uuid4 instead of hashing uri and add tests

* Lint and format
  • Loading branch information
Jackson Maxfield Brown committed May 22, 2022
1 parent adb41fa commit acd91e0
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 8 deletions.
6 changes: 6 additions & 0 deletions cdp_backend/pipeline/event_gather_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,12 @@ def convert_video_and_handle_host(
# Update variable name for easier downstream typing
video_filepath = mp4_filepath

# Check if original session video uri is a m3u8
# We cant follow the normal coonvert video process from above
# because the m3u8 looks to the URI for all the chunks
elif session.video_uri.endswith(".m3u8"):
cdp_will_host = True

# Store if the original host isn't https
elif not is_secure_uri(session.video_uri):
try:
Expand Down
4 changes: 4 additions & 0 deletions cdp_backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def resources_dir() -> Path:
EXAMPLE_YOUTUBE_VIDEO_EMBEDDED = "https://www.youtube.com/embed/XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_PARAMETER = "https://www.youtube.com/watch?v=XALBGkjkUPQ"
EXAMPLE_YOUTUBE_VIDEO_SHORT = "https://youtu.be/watch?v=XALBGkjkUPQ"
EXAMPLE_M3U8_PLAYLIST_URI = (
"https://archive-stream.granicus.com/OnDemand/_definst_/mp4:oakland/"
"oakland_fa356edd-b6a3-4532-8118-3ce4881783f4.mp4/playlist.m3u8"
)


@pytest.fixture
Expand Down
12 changes: 11 additions & 1 deletion cdp_backend/tests/pipeline/test_event_gather_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from cdp_backend.pipeline.pipeline_config import EventGatherPipelineConfig
from cdp_backend.pipeline.transcript_model import EXAMPLE_TRANSCRIPT, Transcript

from ..conftest import EXAMPLE_M3U8_PLAYLIST_URI

#############################################################################

# NOTE:
Expand Down Expand Up @@ -574,6 +576,9 @@ def test_store_event_processing_results(
0
].video_uri = "s3://bucket/does-not-exist.txt"

EXISTING_REMOTE_M3U8_MINIMAL_EVENT = deepcopy(EXAMPLE_MINIMAL_EVENT)
EXISTING_REMOTE_M3U8_MINIMAL_EVENT.sessions[0].video_uri = EXAMPLE_M3U8_PLAYLIST_URI


@mock.patch(f"{PIPELINE_PATH}.fs_functions.upload_file")
@mock.patch(f"{PIPELINE_PATH}.fs_functions.get_open_url_for_gcs_file")
Expand Down Expand Up @@ -606,6 +611,12 @@ def test_store_event_processing_results(
"example_video.mp4",
"hosted-video.mp4",
),
(
"example_video.mp4",
deepcopy(EXISTING_REMOTE_M3U8_MINIMAL_EVENT.sessions[0]),
"example_video.mp4",
"hosted-video.mp4",
),
],
)
def test_convert_video_and_handle_host(
Expand All @@ -618,7 +629,6 @@ def test_convert_video_and_handle_host(
expected_filepath: str,
expected_hosted_video_url: str,
) -> None:

mock_upload_file.return_value = "file_store_uri"
mock_generate_url.return_value = "hosted-video.mp4"
mock_convert_video_to_mp4.return_value = expected_filepath
Expand Down
18 changes: 11 additions & 7 deletions cdp_backend/tests/utils/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from .. import test_utils
from ..conftest import (
EXAMPLE_M3U8_PLAYLIST_URI,
EXAMPLE_MKV_VIDEO_FILENAME,
EXAMPLE_VIDEO_FILENAME,
EXAMPLE_VIDEO_HD_FILENAME,
Expand Down Expand Up @@ -239,21 +240,24 @@ def test_convert_video_to_mp4(
reason="No internet connection",
)
@pytest.mark.parametrize(
"youtube_uri, expected",
"uri, expected",
[
(EXAMPLE_YOUTUBE_VIDEO_EMBEDDED, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_PARAMETER, "XALBGkjkUPQ.mp4"),
(EXAMPLE_YOUTUBE_VIDEO_SHORT, "XALBGkjkUPQ.mp4"),
(EXAMPLE_M3U8_PLAYLIST_URI, None),
],
)
def test_youtube_downloader(
def test_remote_resource_copy(
resources_dir: Path,
youtube_uri: str,
expected: str,
uri: str,
expected: Optional[str],
) -> None:
actual_uri = file_utils.resource_copy(youtube_uri, resources_dir, True)
expected_uri = str(resources_dir / expected)
assert actual_uri == expected_uri
actual_uri = file_utils.resource_copy(uri, resources_dir, True)
if expected:
expected_uri = str(resources_dir / expected)
assert actual_uri == expected_uri

assert Path(actual_uri).exists()
assert Path(actual_uri).is_file()

Expand Down
21 changes: 21 additions & 0 deletions cdp_backend/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from hashlib import sha256
from pathlib import Path
from typing import Optional, Tuple, Union
from uuid import uuid4

import fsspec
import requests
Expand Down Expand Up @@ -113,6 +114,26 @@ def resource_copy(
if uri.find("youtube.com") >= 0 or uri.find("youtu.be") >= 0:
return youtube_copy(uri, dst, overwrite)

if uri.endswith(".m3u8"):
import m3u8_To_MP4

# We add a uuid4 to the front of the filename because m3u8 files
# are usually simply called playlist.m3u8 -- the result will be
# f"{uuid}-{name}"
mp4_name = dst.with_suffix(".mp4").name
save_name = f"{uuid4()}-{mp4_name}"

# Reset dst
dst = dst.parent / save_name

# Download and convert
m3u8_To_MP4.download(
uri,
mp4_file_dir=dst.parent,
mp4_file_name=save_name,
)
return str(dst)

# Set custom timeout for http resources
if uri.startswith("http"):
# The verify=False is passed to any http URIs
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"graphviz~=0.16",
"imageio~=2.18",
"imageio-ffmpeg~=0.4",
"m3u8-To-MP4~=0.1",
"nltk~=3.6",
"pandas~=1.0",
"prefect~=1.2",
Expand Down

0 comments on commit acd91e0

Please sign in to comment.