Fix regular captions

This removes all of the code that was previously used to get them from /timedtext, and instead, always uses whatever is extracted from the video page. This does unfortunately now require a whole video fetch just for the captions. But assuming captions are only requested by a frontend, this won't be a problem due to the memory cache. The captions link will be in memory because the just-requested video is in memory too.
2026-07-21 16:17:51 +00:00 · 2021-11-20 08:40:34 +01:00 · 2021-11-20 08:40:34 +01:00 · 66b7d1bec8
commit 66b7d1bec8
parent 550b633663
1 changed files with 1 additions and 48 deletions
--- a/extractors/captions.py
+++ b/extractors/captions.py
@ -6,10 +6,7 @@ from urllib.parse import urlencode
 import xml.etree.ElementTree as ET

 def extract_captions(id, **kwargs):
-	if "label" in kwargs and "auto-generated" in kwargs["label"]:
-		captions = extract_captions_from_video(id)
-	else:
-		captions = extract_captions_from_api(id)
+	captions = extract_captions_from_video(id)
 	return extract_captions_from_dict(captions, **kwargs)

 # Return captions for the language specified,
@ -26,50 +23,6 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
 			return re.sub(r"^([0-9:.]+ --> [0-9:.]+).*$", r"\1", r.content.decode("utf8"), flags=re.MULTILINE)
 		return r

-# List of captions directly from youtube, but no automatic
-def extract_captions_from_api(id):
-	url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
-	with requests.get(url) as r:
-		if r.status_code == 404:
-			return {
-				"error": "Video unavailable",
-				"identifier": "NOT_FOUND"
-			}
-
-		r.raise_for_status()
-
-		transcript = ET.fromstring(r.content.decode("utf8"))
-		tracks = transcript.findall("track")
-
-		captions = []
-		result = {
-			"captions": captions
-		}
-
-		for track in tracks:
-			language_code = track.attrib["lang_code"]
-			label = track.get("name", default=language_code)
-			subtitle_api_url = get_subtitle_api_url(id, label, language_code)
-
-			params = urlencode({
-				"lang": language_code,
-				"v": id,
-				"fmt": "vtt",
-				"name": label
-			})
-
-			subtitle_url = "https://www.youtube.com/api/timedtext?" + params
-
-			captions.append({
-				"label": label if label != "" else language_code,
-				"languageCode": language_code,
-				"url": subtitle_api_url,
-				"second__remoteUrl": subtitle_url
-			})
-
-		return result
-
-# We'll fall back to this function for auto-captions.
 def extract_captions_from_video(id):
 	return {
 		"captions": extract_video(id)["captions"]