1
0
Fork 0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2026-03-18 02:11:36 +00:00

Support auto-generated captions

The caption extraction is now entirely in our own hands.
This commit is contained in:
Cadence Ember 2021-04-05 01:23:54 +12:00
parent aaf7d65b32
commit 1d52fca3a0
No known key found for this signature in database
GPG key ID: BC1C2C61CF521B17
3 changed files with 35 additions and 26 deletions

View file

@ -5,7 +5,10 @@ from urllib.parse import urlencode
import xml.etree.ElementTree as ET
def extract_captions(id, **kwargs):
captions = extract_captions_from_api(id)
if "label" in kwargs and "auto-generated" in kwargs["label"]:
captions = extract_captions_from_video(id)
else:
captions = extract_captions_from_api(id)
return extract_captions_from_dict(captions, **kwargs)
# Return captions for the language specified,
@ -19,15 +22,9 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
r.raise_for_status()
return r
# Currently unused in favour of extract_captions_from_api.
def extract_captions_from_video(id):
return {
"captions": extract_video(id)["captions"]
}
# no automatic captions
# List of captions directly from youtube, but no automatic
def extract_captions_from_api(id):
url = "https://video.google.com/timedtext?hl=en&type=list&v=%s" % id
url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
with requests.get(url) as r:
if r.status_code == 404:
return {
@ -67,3 +64,9 @@ def extract_captions_from_api(id):
})
return result
# We'll fall back to this function for auto-captions.
def extract_captions_from_video(id):
return {
"captions": extract_video(id)["captions"]
}