Make subtitle rendering based on PyAV

WyattBlue · May 9, 2024 · 606ed98 · 606ed98
1 parent 0065948
commit 606ed98
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 214 deletions.
diff --git a/auto_editor/analyze.py b/auto_editor/analyze.py
@@ -6,6 +6,8 @@
 from typing import TYPE_CHECKING
 
 import numpy as np
+from numpy import concatenate as npconcat
+from numpy import zeros as npzeros
 
 from auto_editor import version
 from auto_editor.lang.json import Lexer, Parser, dump
@@ -19,7 +21,6 @@
     orc,
 )
 from auto_editor.lib.data_structs import Sym
-from auto_editor.render.subtitle import SubtitleParser
 from auto_editor.utils.cmdkw import (
     Required,
     pAttr,
@@ -190,7 +191,7 @@ def none(self) -> NDArray[np.bool_]:
         return np.ones(self.media_length, dtype=np.bool_)
 
     def all(self) -> NDArray[np.bool_]:
-        return np.zeros(self.media_length, dtype=np.bool_)
+        return npzeros(self.media_length, dtype=np.bool_)
 
     def read_cache(self, tag: str, obj: dict[str, Any]) -> None | np.ndarray:
         workfile = os.path.join(
@@ -278,7 +279,7 @@ def get_max_volume(s: np.ndarray) -> float:
         )
         self.bar.start(audio_ticks, "Analyzing audio volume")
 
-        threshold_list = np.zeros((audio_ticks), dtype=np.float64)
+        threshold_list = npzeros((audio_ticks), dtype=np.float64)
 
         if max_volume == 0:  # Prevent dividing by zero
             return threshold_list
@@ -313,31 +314,51 @@ def subtitle(
         except re.error as e:
             self.log.error(e)
 
-        sub_file = self.ensure.subtitle(self.src, stream)
-        parser = SubtitleParser(self.tb)
+        import av
+
+        result = npzeros((30), dtype=np.bool_)
+        count = 0
+        subtitle_length = 0
 
-        with open(sub_file, encoding="utf-8") as file:
-            parser.parse(file.read(), "webvtt")
+        with av.open(self.src.path, "r") as container:
+            for packet in container.demux(subtitles=stream):
+                if packet is None or packet.pts is None:
+                    continue
 
-        # stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
-        def cleanhtml(raw_html: str) -> str:
-            cleanr = re.compile("<.*?>")
-            return re.sub(cleanr, "", raw_html)
+                line = ""
+                if sub := packet.decode():
+                    for val in sub[0].rects:
+                        if isinstance(val, av.subtitles.subtitle.AssSubtitle):
+                            line += val.ass.decode("utf-8", "ignore")
+                        if isinstance(val, av.subtitles.subtitle.TextSubtitle):
+                            line += val.text.decode("utf-8", "ignore")
 
-        if not parser.contents:
-            self.log.error("subtitle has no valid entries")
+                if packet.duration is not None and packet.time_base is not None:
+                    end = round(
+                        (packet.pts + packet.duration) * packet.time_base * self.tb
+                    )
+                    subtitle_length = max(subtitle_length, end)
 
-        result = np.zeros((parser.contents[-1].end), dtype=np.bool_)
+                if line and re.search(pattern, line):
+                    if packet.duration is None or packet.time_base is None:
+                        self.log.warning("Subtitle has unknown duration")
+                        continue
 
-        count = 0
-        for content in parser.contents:
-            if max_count is not None and count >= max_count:
-                break
+                    count += 1
+                    start = round(packet.pts * packet.time_base * self.tb)
+
+                    if len(result) < end:
+                        new_length = max(end, len(result) * 2)
+                        result = npconcat(
+                            [result, npzeros(new_length, dtype=np.bool_)], axis=0
+                        )
+
+                    result[start:end] = 1
+
+                    if max_count is not None and count >= max_count:
+                        break
 
-            line = cleanhtml(content.after.strip())
-            if line and re.search(pattern, line):
-                result[content.start : content.end] = 1
-                count += 1
+        result = result[:subtitle_length]
 
         return result
 
@@ -377,7 +398,7 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
         )
         graph.configure()
 
-        threshold_list = np.zeros((1024), dtype=np.float64)
+        threshold_list = npzeros((1024), dtype=np.float64)
 
         for unframe in container.decode(stream):
             graph.push(unframe)
@@ -392,8 +413,8 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
             current_frame = frame.to_ndarray()
 
             if index > len(threshold_list) - 1:
-                threshold_list = np.concatenate(
-                    (threshold_list, np.zeros((len(threshold_list)), dtype=np.float64)),
+                threshold_list = npconcat(
+                    (threshold_list, npzeros((len(threshold_list)), dtype=np.float64)),
                     axis=0,
                 )
 

diff --git a/auto_editor/edit.py b/auto_editor/edit.py
@@ -214,15 +214,6 @@ def edit_media(
     ensure = Ensure(ffmpeg, samplerate, temp, log)
 
     if tl is None:
-        # Extract subtitles in their native format.
-        if src is not None and len(src.subtitles) > 0 and not args.sn:
-            cmd = ["-i", f"{src.path}", "-hide_banner"]
-            for s, sub in enumerate(src.subtitles):
-                cmd.extend(["-map", f"0:s:{s}"])
-            for s, sub in enumerate(src.subtitles):
-                cmd.extend([os.path.join(temp, f"{s}s.{sub.ext}")])
-            ffmpeg.run(cmd)
-
         tl = make_timeline(sources, ffmpeg, ensure, args, samplerate, bar, temp, log)
 
     if export["export"] == "timeline":
@@ -280,12 +271,8 @@ def make_media(tl: v3, output: str) -> None:
 
         visual_output = []
         audio_output = []
-        sub_output = []
         apply_later = False
 
-        if ctr.allow_subtitle and not args.sn:
-            sub_output = make_new_subtitles(tl, ffmpeg, temp, log)
-
         if ctr.allow_audio:
             audio_output = make_new_audio(tl, ensure, args, ffmpeg, bar, temp, log)
 
@@ -304,20 +291,25 @@ def make_media(tl: v3, output: str) -> None:
                     visual_output.append((False, out_path))
 
         log.conwrite("Writing output file")
+
+        making_subs = ctr.allow_subtitle and not args.sn
+        old_out = os.path.join(temp, f"oldout.{out_ext}")
         mux_quality_media(
             ffmpeg,
             visual_output,
             audio_output,
-            sub_output,
+            0 if tl.v1 is None else len(tl.v1.source.subtitles),
             apply_later,
             ctr,
-            output,
+            old_out if making_subs else output,
             tl.tb,
             args,
             src,
             temp,
             log,
         )
+        if making_subs:
+            make_new_subtitles(tl, old_out, output, log)
 
     if export["export"] == "clip-sequence":
         if tl.v1 is None:

diff --git a/auto_editor/output.py b/auto_editor/output.py
@@ -76,7 +76,7 @@ def mux_quality_media(
     ffmpeg: FFmpeg,
     visual_output: list[tuple[bool, str]],
     audio_output: list[str],
-    sub_output: list[str],
+    subtitle_streams: int,
     apply_v: bool,
     ctr: Container,
     output_path: str,
@@ -88,7 +88,6 @@ def mux_quality_media(
 ) -> None:
     v_tracks = len(visual_output)
     a_tracks = len(audio_output)
-    s_tracks = 0 if args.sn else len(sub_output)
 
     cmd = ["-hide_banner", "-y", "-i", f"{src.path}"]
 
@@ -126,10 +125,7 @@ def mux_quality_media(
                 new_a_file = audio_output[0]
             cmd.extend(["-i", new_a_file])
 
-    for subfile in sub_output:
-        cmd.extend(["-i", subfile])
-
-    for i in range(v_tracks + s_tracks + a_tracks):
+    for i in range(v_tracks + a_tracks):
         cmd.extend(["-map", f"{i+1}:0"])
 
     cmd.extend(["-map_metadata", "0"])
@@ -163,20 +159,6 @@ def mux_quality_media(
             break
         if astream.lang is not None:
             cmd.extend([f"-metadata:s:a:{i}", f"language={astream.lang}"])
-    for i, sstream in enumerate(src.subtitles):
-        if i > s_tracks:
-            break
-        if sstream.lang is not None:
-            cmd.extend([f"-metadata:s:s:{i}", f"language={sstream.lang}"])
-
-    if s_tracks > 0:
-        scodec = src.subtitles[0].codec
-        if same_container:
-            cmd.extend(["-c:s", scodec])
-        elif ctr.scodecs is not None:
-            if scodec not in ctr.scodecs:
-                scodec = ctr.scodecs[0]
-            cmd.extend(["-c:s", scodec])
 
     if a_tracks > 0:
         cmd += _ffset("-c:a", args.audio_codec) + _ffset("-b:a", args.audio_bitrate)
@@ -200,9 +182,6 @@ def mux_quality_media(
         cmd.extend(args.extras.split(" "))
     cmd.extend(["-strict", "-2"])  # Allow experimental codecs.
 
-    if s_tracks > 0:
-        cmd.extend(["-map", "0:t?"])  # Add input attachments to output.
-
     # This was causing a crash for 'example.mp4 multi-track.mov'
     # cmd.extend(["-map", "0:d?"])