import cherrypy import json import pathlib import requests import yt_dlp from extractors.video import extract_video from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest from extractors.manifest import extract_manifest from extractors.search import extract_search from extractors.suggestions import extract_search_suggestions from extractors.captions import extract_captions from extractors.comments import extract_comments import configuration @cherrypy.tools.register("before_finalize", priority=60) def custom_headers(): cherrypy.response.headers["access-control-allow-origin"] = "*" class NewLeaf(object): def _cp_dispatch(self, vpath): if vpath[:4] == ["api", "manifest", "dash", "id"]: vpath[:4] = ["manifest"] return self if vpath[:2] == ["api", "v1"]: endpoints = [ ["channels", 1, 2], ["videos", 1, 1], ["search", 0, 1], ["captions", 1, 1], ["comments", 1, 1] ] for e in endpoints: if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3: vpath[:3] = [e[0]] return self return vpath @cherrypy.expose @cherrypy.tools.json_out() def videos(self, id, **kwargs): return extract_video(id) @cherrypy.expose @cherrypy.tools.encode() def manifest(self, id, **kwargs): result = extract_manifest(id) if type(result) is dict: cherrypy.response.headers["content-type"] = "application/json" return bytes(json.dumps(result), "utf8") elif type(result) is requests.models.Response: cherrypy.response.headers["content-type"] = result.headers["content-type"] return result else: cherrypy.response.headers["content-type"] = "application/dash+xml" return result @cherrypy.expose @cherrypy.tools.json_out() def channels(self, *suffix, second__path="channel", **kwargs): ucid = "" part = "" possible_parts = ("videos", "latest", "playlists") if len(suffix) == 1: ucid = suffix[0] else: # len(suffix) >= 2 if suffix[0] in possible_parts: [part, ucid] = suffix elif suffix[1] in possible_parts: [ucid, part] = suffix else: return { "error": "Two components specified in URL, but neither component was recognised as a part keyword.", "identifier": "PART_KEYWORD_NOT_RECOGNISED" } possible_paths = ("channel",) if part == "latest" else ("channel", "c", "user") if second__path not in possible_paths: return { "error": "second__path parameter must be one of: " + str(possible_paths), "identifier": "PATH_PARAMETER_NOT_RECOGNISED" } if part == "playlists": return [] elif part == "latest": return extract_channel_latest(ucid) elif part == "videos": return extract_channel_videos(ucid, second__path) else: # part == "", so extract whole channel return extract_channel(ucid, second__path) @cherrypy.expose @cherrypy.tools.json_out() def search(self, *suffix, q, **kwargs): if suffix == ("suggestions",): return self.suggestions(q=q) return extract_search(q) @cherrypy.expose @cherrypy.tools.json_out() def suggestions(self, *, q, **kwargs): return extract_search_suggestions(q) @cherrypy.expose def captions(self, id, **kwargs): try: result = extract_captions(id, **kwargs) if type(result) is dict: cherrypy.response.headers["content-type"] = "application/json" return bytes(json.dumps(result), "utf8") else: cherrypy.response.headers["content-type"] = "text/vtt; charset=UTF-8" return result except StopIteration: cherrypy.response.status = "400" cherrypy.response.headers["content-type"] = "application/json" return bytes(json.dumps({ "error": "No captions matching that language or label", "identifier": "NO_MATCHING_CAPTIONS" }), "utf8") @cherrypy.expose @cherrypy.tools.json_out() def comments(self, id, **kwargs): return extract_comments(id) @cherrypy.expose def vi(self, id, file): r = requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file), stream=True) r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] return next(r.iter_content(chunk_size=None)) @cherrypy.expose def ggpht(self, *path): r = requests.get("https://yt3.ggpht.com/{}".format("/".join(path)), stream=True) r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] return next(r.iter_content(chunk_size=None)) bind_port = getattr(configuration, "bind_port", 3000) bind_host = getattr(configuration, "bind_host", "0.0.0.0") server_root = pathlib.Path(__file__).parent.joinpath("root") cherrypy.config.update({"server.socket_port": bind_port, "server.socket_host": bind_host}) cherrypy.quickstart(NewLeaf(), "/", { "/": { "tools.custom_headers.on": True, "tools.staticdir.on": True, "tools.staticdir.dir": str(server_root.absolute()), "tools.staticdir.index": "index.html" } })