mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2024-11-22 07:37:29 +00:00
Implement captions
Automatic subtitles are not supported, because youtube_dlc does not provide them.
This commit is contained in:
parent
985f0c1c32
commit
6709aa30c2
@ -13,6 +13,7 @@ These endpoints are somewhat implemented:
|
|||||||
- `/api/v1/channels/{part}/{ucid}`
|
- `/api/v1/channels/{part}/{ucid}`
|
||||||
- `/api/v1/search?q={search}`
|
- `/api/v1/search?q={search}`
|
||||||
- `/api/v1/search/suggestions?q={search}`
|
- `/api/v1/search/suggestions?q={search}`
|
||||||
|
- `/api/v1/captions/{id}`
|
||||||
- `/vi/{id}/{file}`
|
- `/vi/{id}/{file}`
|
||||||
- `/api/manifest/dash/id/{id}`
|
- `/api/manifest/dash/id/{id}`
|
||||||
|
|
||||||
|
73
extractors/captions.py
Normal file
73
extractors/captions.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import requests
|
||||||
|
from extractors.video import extract_video
|
||||||
|
from tools.converters import escape_html_textcontent, get_subtitle_api_url
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
def extract_captions(id, **kwargs):
|
||||||
|
captions = extract_captions_from_api(id)
|
||||||
|
return extract_captions_from_dict(captions, **kwargs)
|
||||||
|
|
||||||
|
# Return captions for the language specified,
|
||||||
|
# The captions list otherwise
|
||||||
|
def extract_captions_from_dict(captions, **kwargs):
|
||||||
|
lang = None
|
||||||
|
label = None
|
||||||
|
|
||||||
|
if "lang" in kwargs:
|
||||||
|
lang = kwargs["lang"]
|
||||||
|
elif "label" in kwargs:
|
||||||
|
label = kwargs["label"]
|
||||||
|
else:
|
||||||
|
return captions
|
||||||
|
|
||||||
|
for subtitle in captions["captions"]:
|
||||||
|
if lang == subtitle["languageCode"] or label == subtitle["label"]:
|
||||||
|
url = subtitle["second__subtitleUrl"]
|
||||||
|
|
||||||
|
with requests.get(url) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.content.decode("utf8")
|
||||||
|
|
||||||
|
# Currently unused in favour of extract_captions_from_api.
|
||||||
|
def extract_captions_from_video(id):
|
||||||
|
return {
|
||||||
|
"captions": extract_video(id)["captions"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# no automatic captions
|
||||||
|
def extract_captions_from_api(id):
|
||||||
|
url = "https://video.google.com/timedtext?hl=en&type=list&v=%s" % id
|
||||||
|
with requests.get(url) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
transcript = ET.fromstring(r.content.decode("utf8"))
|
||||||
|
tracks = transcript.findall("track")
|
||||||
|
|
||||||
|
captions = []
|
||||||
|
result = {
|
||||||
|
"captions": captions
|
||||||
|
}
|
||||||
|
|
||||||
|
for track in tracks:
|
||||||
|
language_code = track.attrib["lang_code"]
|
||||||
|
label = track.get("name", default=language_code)
|
||||||
|
subtitle_api_url = get_subtitle_api_url(id, label, language_code)
|
||||||
|
|
||||||
|
params = urlencode({
|
||||||
|
"lang": language_code,
|
||||||
|
"v": id,
|
||||||
|
"fmt": "vtt",
|
||||||
|
"name": label
|
||||||
|
})
|
||||||
|
|
||||||
|
subtitle_url = "https://www.youtube.com/api/timedtext?" + params
|
||||||
|
|
||||||
|
captions.append({
|
||||||
|
"label": label if label != "" else language_code,
|
||||||
|
"languageCode": language_code,
|
||||||
|
"url": subtitle_api_url,
|
||||||
|
"second__subtitleUrl": subtitle_url
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
@ -19,7 +19,9 @@ ytdl_opts = {
|
|||||||
"playlist_items": "1-100",
|
"playlist_items": "1-100",
|
||||||
"extract_flat": "in_playlist",
|
"extract_flat": "in_playlist",
|
||||||
"write_pages": True,
|
"write_pages": True,
|
||||||
"source_address": "0.0.0.0"
|
"source_address": "0.0.0.0",
|
||||||
|
"writesubtitles": True,
|
||||||
|
"allsubtitles": True,
|
||||||
}
|
}
|
||||||
ytdl = youtube_dlc.YoutubeDL(ytdl_opts)
|
ytdl = youtube_dlc.YoutubeDL(ytdl_opts)
|
||||||
|
|
||||||
@ -172,6 +174,23 @@ def extract_video(id):
|
|||||||
"second__height": format["height"]
|
"second__height": format["height"]
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if "requested_subtitles" in info and info["requested_subtitles"]:
|
||||||
|
|
||||||
|
for language_code, subtitle in info["requested_subtitles"].items():
|
||||||
|
|
||||||
|
if language_code != "live_chat":
|
||||||
|
subtitle_url = subtitle["url"]
|
||||||
|
label = get_language_label_from_url(subtitle_url)
|
||||||
|
subtitle_api_url = get_subtitle_api_url(id, label, language_code)
|
||||||
|
|
||||||
|
result["captions"].append({
|
||||||
|
"label": label if label != "" else language_code,
|
||||||
|
"languageCode": language_code,
|
||||||
|
"url": subtitle_api_url,
|
||||||
|
"second__subtitleUrl": subtitle_url # Direct YouTube url
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
result = get_more_stuff_from_file(info["id"], result)
|
result = get_more_stuff_from_file(info["id"], result)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
15
index.py
15
index.py
@ -7,6 +7,7 @@ from extractors.channel import extract_channel, extract_channel_videos, extract_
|
|||||||
from extractors.manifest import extract_manifest
|
from extractors.manifest import extract_manifest
|
||||||
from extractors.search import extract_search
|
from extractors.search import extract_search
|
||||||
from extractors.suggestions import extract_search_suggestions
|
from extractors.suggestions import extract_search_suggestions
|
||||||
|
from extractors.captions import extract_captions
|
||||||
|
|
||||||
@cherrypy.tools.register("before_finalize", priority=60)
|
@cherrypy.tools.register("before_finalize", priority=60)
|
||||||
def custom_headers():
|
def custom_headers():
|
||||||
@ -22,7 +23,8 @@ class Second(object):
|
|||||||
endpoints = [
|
endpoints = [
|
||||||
["channels", 1, 2],
|
["channels", 1, 2],
|
||||||
["videos", 1, 1],
|
["videos", 1, 1],
|
||||||
["search", 0, 1]
|
["search", 0, 1],
|
||||||
|
["captions", 1, 1]
|
||||||
]
|
]
|
||||||
for e in endpoints:
|
for e in endpoints:
|
||||||
if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:
|
if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:
|
||||||
@ -91,6 +93,17 @@ class Second(object):
|
|||||||
def suggestions(self, *, q, **kwargs):
|
def suggestions(self, *, q, **kwargs):
|
||||||
return extract_search_suggestions(q)
|
return extract_search_suggestions(q)
|
||||||
|
|
||||||
|
@cherrypy.expose
|
||||||
|
def captions(self, id, **kwargs):
|
||||||
|
result = extract_captions(id, **kwargs)
|
||||||
|
if type(result) is dict:
|
||||||
|
cherrypy.response.headers["content-type"] = "application/json"
|
||||||
|
return bytes(json.dumps(result), "utf8")
|
||||||
|
else:
|
||||||
|
cherrypy.response.headers["content-type"] = "text/vtt; charset=UTF-8"
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@cherrypy.expose
|
@cherrypy.expose
|
||||||
def vi(self, id, file):
|
def vi(self, id, file):
|
||||||
with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r:
|
with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r:
|
||||||
|
@ -2,6 +2,7 @@ import configuration
|
|||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
from urllib.parse import urlparse, parse_qs, quote_plus
|
||||||
|
|
||||||
def length_text_to_seconds(text):
|
def length_text_to_seconds(text):
|
||||||
s = text.split(":")
|
s = text.split(":")
|
||||||
@ -205,3 +206,20 @@ def time_to_past_text(timestamp):
|
|||||||
number = diff // unit_value
|
number = diff // unit_value
|
||||||
plural_unit = unit_name if number == 1 else unit_name + "s"
|
plural_unit = unit_name if number == 1 else unit_name + "s"
|
||||||
return "{} {} ago".format(number, plural_unit)
|
return "{} {} ago".format(number, plural_unit)
|
||||||
|
|
||||||
|
def get_language_label_from_url(url_string):
|
||||||
|
url = urlparse(url_string)
|
||||||
|
params = parse_qs(url.query)
|
||||||
|
label = params["name"][0] if "name" in params else "" # name may be in params with empty value
|
||||||
|
return label
|
||||||
|
|
||||||
|
def get_subtitle_api_url(id, label, language_code):
|
||||||
|
subtitle_api_url = "{}/api/v1/captions/{}?".format(configuration.website_origin, id)
|
||||||
|
|
||||||
|
if label == "":
|
||||||
|
label = language_code
|
||||||
|
subtitle_api_url += "lang=" + quote_plus(language_code)
|
||||||
|
else:
|
||||||
|
subtitle_api_url += "label=" + quote_plus(label)
|
||||||
|
|
||||||
|
return subtitle_api_url
|
||||||
|
Loading…
Reference in New Issue
Block a user