Skip to content

Commit

Permalink
Make subtitle rendering based on PyAV
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed May 9, 2024
1 parent 0065948 commit 606ed98
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 214 deletions.
71 changes: 46 additions & 25 deletions auto_editor/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from typing import TYPE_CHECKING

import numpy as np
from numpy import concatenate as npconcat
from numpy import zeros as npzeros

from auto_editor import version
from auto_editor.lang.json import Lexer, Parser, dump
Expand All @@ -19,7 +21,6 @@
orc,
)
from auto_editor.lib.data_structs import Sym
from auto_editor.render.subtitle import SubtitleParser
from auto_editor.utils.cmdkw import (
Required,
pAttr,
Expand Down Expand Up @@ -190,7 +191,7 @@ def none(self) -> NDArray[np.bool_]:
return np.ones(self.media_length, dtype=np.bool_)

def all(self) -> NDArray[np.bool_]:
return np.zeros(self.media_length, dtype=np.bool_)
return npzeros(self.media_length, dtype=np.bool_)

def read_cache(self, tag: str, obj: dict[str, Any]) -> None | np.ndarray:
workfile = os.path.join(
Expand Down Expand Up @@ -278,7 +279,7 @@ def get_max_volume(s: np.ndarray) -> float:
)
self.bar.start(audio_ticks, "Analyzing audio volume")

threshold_list = np.zeros((audio_ticks), dtype=np.float64)
threshold_list = npzeros((audio_ticks), dtype=np.float64)

if max_volume == 0: # Prevent dividing by zero
return threshold_list
Expand Down Expand Up @@ -313,31 +314,51 @@ def subtitle(
except re.error as e:
self.log.error(e)

sub_file = self.ensure.subtitle(self.src, stream)
parser = SubtitleParser(self.tb)
import av

result = npzeros((30), dtype=np.bool_)
count = 0
subtitle_length = 0

with open(sub_file, encoding="utf-8") as file:
parser.parse(file.read(), "webvtt")
with av.open(self.src.path, "r") as container:
for packet in container.demux(subtitles=stream):
if packet is None or packet.pts is None:
continue

# stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
def cleanhtml(raw_html: str) -> str:
cleanr = re.compile("<.*?>")
return re.sub(cleanr, "", raw_html)
line = ""
if sub := packet.decode():
for val in sub[0].rects:
if isinstance(val, av.subtitles.subtitle.AssSubtitle):
line += val.ass.decode("utf-8", "ignore")
if isinstance(val, av.subtitles.subtitle.TextSubtitle):
line += val.text.decode("utf-8", "ignore")

if not parser.contents:
self.log.error("subtitle has no valid entries")
if packet.duration is not None and packet.time_base is not None:
end = round(
(packet.pts + packet.duration) * packet.time_base * self.tb
)
subtitle_length = max(subtitle_length, end)

result = np.zeros((parser.contents[-1].end), dtype=np.bool_)
if line and re.search(pattern, line):
if packet.duration is None or packet.time_base is None:
self.log.warning("Subtitle has unknown duration")
continue

count = 0
for content in parser.contents:
if max_count is not None and count >= max_count:
break
count += 1
start = round(packet.pts * packet.time_base * self.tb)

if len(result) < end:
new_length = max(end, len(result) * 2)
result = npconcat(
[result, npzeros(new_length, dtype=np.bool_)], axis=0
)

result[start:end] = 1

if max_count is not None and count >= max_count:
break

line = cleanhtml(content.after.strip())
if line and re.search(pattern, line):
result[content.start : content.end] = 1
count += 1
result = result[:subtitle_length]

return result

Expand Down Expand Up @@ -377,7 +398,7 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
)
graph.configure()

threshold_list = np.zeros((1024), dtype=np.float64)
threshold_list = npzeros((1024), dtype=np.float64)

for unframe in container.decode(stream):
graph.push(unframe)
Expand All @@ -392,8 +413,8 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
current_frame = frame.to_ndarray()

if index > len(threshold_list) - 1:
threshold_list = np.concatenate(
(threshold_list, np.zeros((len(threshold_list)), dtype=np.float64)),
threshold_list = npconcat(
(threshold_list, npzeros((len(threshold_list)), dtype=np.float64)),
axis=0,
)

Expand Down
22 changes: 7 additions & 15 deletions auto_editor/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,6 @@ def edit_media(
ensure = Ensure(ffmpeg, samplerate, temp, log)

if tl is None:
# Extract subtitles in their native format.
if src is not None and len(src.subtitles) > 0 and not args.sn:
cmd = ["-i", f"{src.path}", "-hide_banner"]
for s, sub in enumerate(src.subtitles):
cmd.extend(["-map", f"0:s:{s}"])
for s, sub in enumerate(src.subtitles):
cmd.extend([os.path.join(temp, f"{s}s.{sub.ext}")])
ffmpeg.run(cmd)

tl = make_timeline(sources, ffmpeg, ensure, args, samplerate, bar, temp, log)

if export["export"] == "timeline":
Expand Down Expand Up @@ -280,12 +271,8 @@ def make_media(tl: v3, output: str) -> None:

visual_output = []
audio_output = []
sub_output = []
apply_later = False

if ctr.allow_subtitle and not args.sn:
sub_output = make_new_subtitles(tl, ffmpeg, temp, log)

if ctr.allow_audio:
audio_output = make_new_audio(tl, ensure, args, ffmpeg, bar, temp, log)

Expand All @@ -304,20 +291,25 @@ def make_media(tl: v3, output: str) -> None:
visual_output.append((False, out_path))

log.conwrite("Writing output file")

making_subs = ctr.allow_subtitle and not args.sn
old_out = os.path.join(temp, f"oldout.{out_ext}")
mux_quality_media(
ffmpeg,
visual_output,
audio_output,
sub_output,
0 if tl.v1 is None else len(tl.v1.source.subtitles),
apply_later,
ctr,
output,
old_out if making_subs else output,
tl.tb,
args,
src,
temp,
log,
)
if making_subs:
make_new_subtitles(tl, old_out, output, log)

if export["export"] == "clip-sequence":
if tl.v1 is None:
Expand Down
25 changes: 2 additions & 23 deletions auto_editor/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def mux_quality_media(
ffmpeg: FFmpeg,
visual_output: list[tuple[bool, str]],
audio_output: list[str],
sub_output: list[str],
subtitle_streams: int,
apply_v: bool,
ctr: Container,
output_path: str,
Expand All @@ -88,7 +88,6 @@ def mux_quality_media(
) -> None:
v_tracks = len(visual_output)
a_tracks = len(audio_output)
s_tracks = 0 if args.sn else len(sub_output)

cmd = ["-hide_banner", "-y", "-i", f"{src.path}"]

Expand Down Expand Up @@ -126,10 +125,7 @@ def mux_quality_media(
new_a_file = audio_output[0]
cmd.extend(["-i", new_a_file])

for subfile in sub_output:
cmd.extend(["-i", subfile])

for i in range(v_tracks + s_tracks + a_tracks):
for i in range(v_tracks + a_tracks):
cmd.extend(["-map", f"{i+1}:0"])

cmd.extend(["-map_metadata", "0"])
Expand Down Expand Up @@ -163,20 +159,6 @@ def mux_quality_media(
break
if astream.lang is not None:
cmd.extend([f"-metadata:s:a:{i}", f"language={astream.lang}"])
for i, sstream in enumerate(src.subtitles):
if i > s_tracks:
break
if sstream.lang is not None:
cmd.extend([f"-metadata:s:s:{i}", f"language={sstream.lang}"])

if s_tracks > 0:
scodec = src.subtitles[0].codec
if same_container:
cmd.extend(["-c:s", scodec])
elif ctr.scodecs is not None:
if scodec not in ctr.scodecs:
scodec = ctr.scodecs[0]
cmd.extend(["-c:s", scodec])

if a_tracks > 0:
cmd += _ffset("-c:a", args.audio_codec) + _ffset("-b:a", args.audio_bitrate)
Expand All @@ -200,9 +182,6 @@ def mux_quality_media(
cmd.extend(args.extras.split(" "))
cmd.extend(["-strict", "-2"]) # Allow experimental codecs.

if s_tracks > 0:
cmd.extend(["-map", "0:t?"]) # Add input attachments to output.

# This was causing a crash for 'example.mp4 multi-track.mov'
# cmd.extend(["-map", "0:d?"])

Expand Down

0 comments on commit 606ed98

Please sign in to comment.