NewLeaf/index.py

import cherrypy
import json
import pathlib
import requests
import yt_dlp
from extractors.video import extract_video
from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest
from extractors.manifest import extract_manifest
from extractors.search import extract_search
from extractors.suggestions import extract_search_suggestions
from extractors.captions import extract_captions
from extractors.comments import extract_comments
import configuration

@cherrypy.tools.register("before_finalize", priority=60)
def custom_headers():
	cherrypy.response.headers["access-control-allow-origin"] = "*"

class NewLeaf(object):
	def _cp_dispatch(self, vpath):
		if vpath[:4] == ["api", "manifest", "dash", "id"]:
			vpath[:4] = ["manifest"]
			return self

		if vpath[:2] == ["api", "v1"]:
			endpoints = [
				["channels", 1, 2],
				["videos", 1, 1],
				["search", 0, 1],
				["captions", 1, 1],
				["comments", 1, 1]
			]
			for e in endpoints:
				if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:
					vpath[:3] = [e[0]]
					return self

		return vpath

	@cherrypy.expose
	@cherrypy.tools.json_out()
	def videos(self, id, **kwargs):
		return extract_video(id)

	@cherrypy.expose
	@cherrypy.tools.encode()
	def manifest(self, id, **kwargs):
		result = extract_manifest(id)
		if type(result) is dict:
			cherrypy.response.headers["content-type"] = "application/json"
			return bytes(json.dumps(result), "utf8")
		elif type(result) is requests.models.Response:
			cherrypy.response.headers["content-type"] = result.headers["content-type"]
			return result
		else:
			cherrypy.response.headers["content-type"] = "application/dash+xml"
			return result

	@cherrypy.expose
	@cherrypy.tools.json_out()
	def channels(self, *suffix, second__path="channel", **kwargs):
		ucid = ""
		part = ""
		possible_parts = ("videos", "latest", "playlists")
		if len(suffix) == 1:
			ucid = suffix[0]
		else: # len(suffix) >= 2
			if suffix[0] in possible_parts:
				[part, ucid] = suffix
			elif suffix[1] in possible_parts:
				[ucid, part] = suffix
			else:
				return {
					"error": "Two components specified in URL, but neither component was recognised as a part keyword.",
					"identifier": "PART_KEYWORD_NOT_RECOGNISED"
				}
		possible_paths = ("channel",) if part == "latest" else ("channel", "c", "user")
		if second__path not in possible_paths:
			return {
				"error": "second__path parameter must be one of: " + str(possible_paths),
				"identifier": "PATH_PARAMETER_NOT_RECOGNISED"
			}

		if part == "playlists":
			return []
		elif part == "latest":
			return extract_channel_latest(ucid)
		elif part == "videos":
			return extract_channel_videos(ucid, second__path)
		else: # part == "", so extract whole channel
			return extract_channel(ucid, second__path)

	@cherrypy.expose
	@cherrypy.tools.json_out()
	def search(self, *suffix, q, **kwargs):
		if suffix == ("suggestions",):
			return self.suggestions(q=q)

		return extract_search(q)

	@cherrypy.expose
	@cherrypy.tools.json_out()
	def suggestions(self, *, q, **kwargs):
		return extract_search_suggestions(q)

	@cherrypy.expose
	def captions(self, id, **kwargs):
		try:
			result = extract_captions(id, **kwargs)
			if type(result) is dict:
				cherrypy.response.headers["content-type"] = "application/json"
				return bytes(json.dumps(result), "utf8")
			else:
				cherrypy.response.headers["content-type"] = "text/vtt; charset=UTF-8"
				return result

		except StopIteration:
			cherrypy.response.status = "400"
			cherrypy.response.headers["content-type"] = "application/json"
			return bytes(json.dumps({
				"error": "No captions matching that language or label",
				"identifier": "NO_MATCHING_CAPTIONS"
			}), "utf8")

	@cherrypy.expose
	@cherrypy.tools.json_out()
	def comments(self, id, **kwargs):
		return extract_comments(id)

	@cherrypy.expose
	def vi(self, id, file):
		r = requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file), stream=True)
		r.raise_for_status()
		cherrypy.response.headers["content-type"] = r.headers["content-type"]
		return next(r.iter_content(chunk_size=None))

	@cherrypy.expose
	def ggpht(self, *path):
		r = requests.get("https://yt3.ggpht.com/{}".format("/".join(path)), stream=True)
		r.raise_for_status()
		cherrypy.response.headers["content-type"] = r.headers["content-type"]
		return next(r.iter_content(chunk_size=None))

bind_port = getattr(configuration, "bind_port", 3000)
bind_host = getattr(configuration, "bind_host", "0.0.0.0")
server_root = pathlib.Path(__file__).parent.joinpath("root")

cherrypy.config.update({"server.socket_port": bind_port, "server.socket_host": bind_host})
cherrypy.quickstart(NewLeaf(), "/", {
	"/": {
		"tools.custom_headers.on": True,
		"tools.staticdir.on": True,
		"tools.staticdir.dir": str(server_root.absolute()),
		"tools.staticdir.index": "index.html"
	}
})
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`import cherrypy`
			`import json`
Add robots.txt 2021-04-03 01:00:05 +00:00			`import pathlib`
Add thumbnail proxy 2020-08-07 14:51:32 +00:00			`import requests`
Fix extracting with cookie consent page in EU Fix #27 use maintained yt-dlp lib instead of youtube-dlc Because of the following changes in YT, we have to switch to a maintained library https://github.com/ytdl-org/youtube-dl/issues/28604 While yt-dlp is not fixed today, youtube-dl is fixed in master and as yt-dlp is quick to merge upstream changes back to their repo, we can hope the issue will also be fixed there timely. For requests sent by us directly, we include the cookies. Ref https://github.com/ytdl-org/youtube-dl/issues/28604 2021-03-31 22:31:33 +00:00			`import yt_dlp`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`from extractors.video import extract_video`
			`from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest`
			`from extractors.manifest import extract_manifest`
			`from extractors.search import extract_search`
			`from extractors.suggestions import extract_search_suggestions`
Implement captions Automatic subtitles are not supported, because youtube_dlc does not provide them. 2021-01-17 22:59:14 +00:00			`from extractors.captions import extract_captions`
Retrieve the first 20 comments of a video on /api/v1/comments/:videoid Got some inspiration from https://github.com/nlitsme/youtube_tool (for the x-youtube-client-X headers). This is not a complete reimplementation of Invidious API as continuation is not implemented (to retrieve more than the first 20 comments and comments replies), likes and replies count are also missing. 2021-06-27 02:06:13 +00:00			`from extractors.comments import extract_comments`
Allow configuring the bind host address and port. 2021-02-28 17:40:00 +00:00			`import configuration`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00
Add Access-Control-Allow-Origin header 2020-10-02 10:40:39 +00:00			`@cherrypy.tools.register("before_finalize", priority=60)`
			`def custom_headers():`
Use lowercase only for headers 2021-01-20 09:59:54 +00:00			`cherrypy.response.headers["access-control-allow-origin"] = "*"`
Add DASH manifest endpoint 2020-08-09 08:38:40 +00:00
Rename to NewLeaf 2021-02-27 00:09:31 +00:00			`class NewLeaf(object):`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`def _cp_dispatch(self, vpath):`
Add DASH manifest endpoint 2020-08-09 08:38:40 +00:00			`if vpath[:4] == ["api", "manifest", "dash", "id"]:`
			`vpath[:4] = ["manifest"]`
			`return self`

Add searches 2020-08-07 14:51:01 +00:00			`if vpath[:2] == ["api", "v1"]:`
			`endpoints = [`
			`["channels", 1, 2],`
			`["videos", 1, 1],`
Implement captions Automatic subtitles are not supported, because youtube_dlc does not provide them. 2021-01-17 22:59:14 +00:00			`["search", 0, 1],`
Retrieve the first 20 comments of a video on /api/v1/comments/:videoid Got some inspiration from https://github.com/nlitsme/youtube_tool (for the x-youtube-client-X headers). This is not a complete reimplementation of Invidious API as continuation is not implemented (to retrieve more than the first 20 comments and comments replies), likes and replies count are also missing. 2021-06-27 02:06:13 +00:00			`["captions", 1, 1],`
			`["comments", 1, 1]`
Add searches 2020-08-07 14:51:01 +00:00			`]`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`for e in endpoints:`
Add searches 2020-08-07 14:51:01 +00:00			`if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:`
			`vpath[:3] = [e[0]]`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`return self`

			`return vpath`

			`@cherrypy.expose`
			`@cherrypy.tools.json_out()`
Add kwargs to all endpoints 2020-08-09 11:42:15 +00:00			`def videos(self, id, **kwargs):`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`return extract_video(id)`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00
Add DASH manifest endpoint 2020-08-09 08:38:40 +00:00			`@cherrypy.expose`
			`@cherrypy.tools.encode()`
Add kwargs to all endpoints 2020-08-09 11:42:15 +00:00			`def manifest(self, id, **kwargs):`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`result = extract_manifest(id)`
			`if type(result) is dict:`
			`cherrypy.response.headers["content-type"] = "application/json"`
			`return bytes(json.dumps(result), "utf8")`
			`elif type(result) is requests.models.Response:`
			`cherrypy.response.headers["content-type"] = result.headers["content-type"]`
			`return result`
			`else:`
			`cherrypy.response.headers["content-type"] = "application/dash+xml"`
			`return result`
Add DASH manifest endpoint 2020-08-09 08:38:40 +00:00
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`@cherrypy.expose`
			`@cherrypy.tools.json_out()`
Channel path fixes I'm pretty sure I already did? - use "channels" as default path, not "user" - cache based on the combination of the path and the id - fix channel latest 2022-09-12 11:13:37 +00:00			`def channels(self, suffix, second__path="channel", *kwargs):`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`ucid = ""`
			`part = ""`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`possible_parts = ("videos", "latest", "playlists")`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`if len(suffix) == 1:`
			`ucid = suffix[0]`
			`else: # len(suffix) >= 2`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`if suffix[0] in possible_parts:`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`[part, ucid] = suffix`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`elif suffix[1] in possible_parts:`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00			`[ucid, part] = suffix`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`else:`
			`return {`
Quote json keys correctly 2021-05-14 06:46:46 +00:00			`"error": "Two components specified in URL, but neither component was recognised as a part keyword.",`
			`"identifier": "PART_KEYWORD_NOT_RECOGNISED"`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`}`
Channel path fixes I'm pretty sure I already did? - use "channels" as default path, not "user" - cache based on the combination of the path and the id - fix channel latest 2022-09-12 11:13:37 +00:00			`possible_paths = ("channel",) if part == "latest" else ("channel", "c", "user")`
#29 Extract named channels using dynamic endpoint with second__path param instead of /user/ 2021-11-13 19:15:16 +00:00			`if second__path not in possible_paths:`
			`return {`
			`"error": "second__path parameter must be one of: " + str(possible_paths),`
			`"identifier": "PATH_PARAMETER_NOT_RECOGNISED"`
			`}`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00
Various live video extraction fixes 2020-08-13 08:25:10 +00:00			`if part == "playlists":`
			`return []`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`elif part == "latest":`
Channel path fixes I'm pretty sure I already did? - use "channels" as default path, not "user" - cache based on the combination of the path and the id - fix channel latest 2022-09-12 11:13:37 +00:00			`return extract_channel_latest(ucid)`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`elif part == "videos":`
#29 Extract named channels using dynamic endpoint with second__path param instead of /user/ 2021-11-13 19:15:16 +00:00			`return extract_channel_videos(ucid, second__path)`
Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`else: # part == "", so extract whole channel`
#29 Extract named channels using dynamic endpoint with second__path param instead of /user/ 2021-11-13 19:15:16 +00:00			`return extract_channel(ucid, second__path)`
Working code; channels and basic videos 2020-08-07 12:22:48 +00:00
Add searches 2020-08-07 14:51:01 +00:00			`@cherrypy.expose`
			`@cherrypy.tools.json_out()`
Implement search suggestions 2020-08-10 07:25:15 +00:00			`def search(self, suffix, q, *kwargs):`
			`if suffix == ("suggestions",):`
			`return self.suggestions(q=q)`

Refactor everything to separate files 2020-08-13 14:20:11 +00:00			`return extract_search(q)`
Add searches 2020-08-07 14:51:01 +00:00
Implement search suggestions 2020-08-10 07:25:15 +00:00			`@cherrypy.expose`
			`@cherrypy.tools.json_out()`
			`def suggestions(self, , q, *kwargs):`
Fix name of search suggestions function 2020-08-17 12:44:09 +00:00			`return extract_search_suggestions(q)`
Captions: Error checking 2021-01-20 04:35:24 +00:00
Implement captions Automatic subtitles are not supported, because youtube_dlc does not provide them. 2021-01-17 22:59:14 +00:00			`@cherrypy.expose`
			`def captions(self, id, **kwargs):`
Captions: Error checking 2021-01-20 04:35:24 +00:00			`try:`
			`result = extract_captions(id, **kwargs)`
			`if type(result) is dict:`
			`cherrypy.response.headers["content-type"] = "application/json"`
			`return bytes(json.dumps(result), "utf8")`
			`else:`
			`cherrypy.response.headers["content-type"] = "text/vtt; charset=UTF-8"`
			`return result`
Implement captions Automatic subtitles are not supported, because youtube_dlc does not provide them. 2021-01-17 22:59:14 +00:00
Captions: Error checking 2021-01-20 04:35:24 +00:00			`except StopIteration:`
			`cherrypy.response.status = "400"`
			`cherrypy.response.headers["content-type"] = "application/json"`
			`return bytes(json.dumps({`
			`"error": "No captions matching that language or label",`
			`"identifier": "NO_MATCHING_CAPTIONS"`
			`}), "utf8")`
Implement search suggestions 2020-08-10 07:25:15 +00:00
Retrieve the first 20 comments of a video on /api/v1/comments/:videoid Got some inspiration from https://github.com/nlitsme/youtube_tool (for the x-youtube-client-X headers). This is not a complete reimplementation of Invidious API as continuation is not implemented (to retrieve more than the first 20 comments and comments replies), likes and replies count are also missing. 2021-06-27 02:06:13 +00:00			`@cherrypy.expose`
			`@cherrypy.tools.json_out()`
			`def comments(self, id, **kwargs):`
			`return extract_comments(id)`

Add thumbnail proxy 2020-08-07 14:51:32 +00:00			`@cherrypy.expose`
			`def vi(self, id, file):`
Remove `with requests` when it is unnecessary 2022-01-16 08:51:26 +00:00			`r = requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file), stream=True)`
			`r.raise_for_status()`
			`cherrypy.response.headers["content-type"] = r.headers["content-type"]`
			`return next(r.iter_content(chunk_size=None))`
Add thumbnail proxy 2020-08-07 14:51:32 +00:00
Add ggpht proxy (channel-related images) 2021-01-14 12:07:05 +00:00			`@cherrypy.expose`
			`def ggpht(self, *path):`
Remove `with requests` when it is unnecessary 2022-01-16 08:51:26 +00:00			`r = requests.get("https://yt3.ggpht.com/{}".format("/".join(path)), stream=True)`
			`r.raise_for_status()`
			`cherrypy.response.headers["content-type"] = r.headers["content-type"]`
			`return next(r.iter_content(chunk_size=None))`
Add ggpht proxy (channel-related images) 2021-01-14 12:07:05 +00:00
Allow configuring the bind host address and port. 2021-02-28 17:40:00 +00:00			`bind_port = getattr(configuration, "bind_port", 3000)`
			`bind_host = getattr(configuration, "bind_host", "0.0.0.0")`
Add robots.txt 2021-04-03 01:00:05 +00:00			`server_root = pathlib.Path(__file__).parent.joinpath("root")`
Allow configuring the bind host address and port. 2021-02-28 17:40:00 +00:00
			`cherrypy.config.update({"server.socket_port": bind_port, "server.socket_host": bind_host})`
Rename to NewLeaf 2021-02-27 00:09:31 +00:00			`cherrypy.quickstart(NewLeaf(), "/", {`
Add Access-Control-Allow-Origin header 2020-10-02 10:40:39 +00:00			`"/": {`
Add robots.txt 2021-04-03 01:00:05 +00:00			`"tools.custom_headers.on": True,`
			`"tools.staticdir.on": True,`
			`"tools.staticdir.dir": str(server_root.absolute()),`
			`"tools.staticdir.index": "index.html"`
Add Access-Control-Allow-Origin header 2020-10-02 10:40:39 +00:00			`}`
			`})`