Add recommended videos in a terrible way

2026-07-21 16:17:51 +00:00 · 2020-08-08 02:00:26 +12:00 · 2020-08-08 02:00:26 +12:00 · ad018b0a0c
commit ad018b0a0c
parent 132f814f3a
2 changed files with 85 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -14,7 +14,6 @@ These endpoints are somewhat implemented:

 ## The future

- Video recommendations
 - RSS as a source for channel listings
 - Searches
 - Dash manifests
--- a/index.py
+++ b/index.py
@ -2,6 +2,10 @@ import cherrypy
 import json
 import youtube_dl
 import datetime
+import os
+import re
+import json
+import traceback

 ytdl_opts = {
 	"quiet": True,
@ -11,7 +15,15 @@ ytdl_opts = {
 }
 ytdl = youtube_dl.YoutubeDL(ytdl_opts)

-class HelloWorld(object):
+ytdl_save_opts = ytdl_opts.copy()
+ytdl_save_opts["write_pages"] = True
+ytdl_save = youtube_dl.YoutubeDL(ytdl_save_opts)
+
+def length_text_to_seconds(text):
+	s = text.split(":")
+	return sum([int(x) * 60**(len(s)-i-1) for i, x in enumerate(s)])
+
+class Second(object):
 	def _cp_dispatch(self, vpath):
 		if vpath[:2] == ["api", "v1"] and len(vpath) >= 4:
 			endpoints = ["channels", "videos"]
@ -26,15 +38,17 @@ class HelloWorld(object):
 	@cherrypy.tools.json_out()
 	def videos(self, id):
 		try:
-			info = ytdl.extract_info(id, download=False)
+			info = ytdl_save.extract_info(id, download=False)

 			year = int(info["upload_date"][:4])
 			month = int(info["upload_date"][4:6])
 			day = int(info["upload_date"][6:8])

+			# Adaptive formats have either audio or video, format streams have both
 			def format_is_adaptive(format):
 				return format["acodec"] == "none" or format["vcodec"] == "none"

+			# just the "type" field
 			def format_type(format):
 				sense = "audio"
 				codecs = []
@ -45,7 +59,7 @@ class HelloWorld(object):
 					codecs.append(format["acodec"])
 				return '{}/{}; codecs="{}"'.format(sense, format["ext"], ", ".join(codecs))

-			return {
+			result = {
 				"type": "video",
 				"title": info["title"],
 				"videoId": info["id"],
@ -57,6 +71,8 @@ class HelloWorld(object):
 				"publishedText": None,
 				"keywords": None,
 				"viewCount": info["view_count"],
+				"second__viewCountText": None,
+				"second__viewCountTextShort": None,
 				"likeCount": info["like_count"],
 				"dislikeCount": info["dislike_count"],
 				"paid": None,
@ -115,6 +131,71 @@ class HelloWorld(object):
 				"recommendedVideos": []
 			}

+			# Now try to get more stuff by manually examining the saved file
+			# Figure out what the name of the saved file was
+			possible_files = [f for f in os.listdir() if f.startswith("{}_".format(info["id"]))]
+			try:
+				if len(possible_files) == 1:
+					filename = possible_files[0]
+					with open(filename) as file:
+						r = re.compile(r"""^\s*window\["ytInitialData"\] = (\{.*\});\n?$""")
+						for line in file:
+							match_result = re.search(r, line)
+							if match_result:
+								yt_initial_data = json.loads(match_result.group(1))
+								views = yt_initial_data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][0]\
+									["videoPrimaryInfoRenderer"]["viewCount"]["videoViewCountRenderer"]
+								result["second__viewCountText"] = views["viewCount"]["simpleText"]
+								result["second__viewCountTextShort"] = views["shortViewCount"]["simpleText"]
+								recommendations = yt_initial_data["contents"]["twoColumnWatchNextResults"]["secondaryResults"]\
+									["secondaryResults"]["results"]
+
+								def get_useful_recommendation_data(r):
+									if "compactVideoRenderer" in r:
+										return r["compactVideoRenderer"]
+									if "compactAutoplayRenderer" in r:
+										return r["compactAutoplayRenderer"]["contents"][0]["compactVideoRenderer"]
+									return None
+
+								def get_view_count(r):
+									text = r["viewCountText"]["simpleText"]
+									if text == "Recommended for you":
+										return 0 # subject to change?
+									else:
+										return int(text.replace(",", "").split(" ")[0])
+
+								def get_view_count_text(r):
+									text = r["viewCountText"]["simpleText"]
+									if text == "Recommended for you":
+										return "Recommended for you" # subject to change?
+									else:
+										return text
+
+								# result["recommendedVideos"] = recommendations
+								# return result
+
+								result["recommendedVideos"] = list({
+									"videoId": r["videoId"],
+									"title": r["title"]["simpleText"],
+									"videoThumbnails": [],
+									"author": r["longBylineText"]["runs"][0]["text"],
+									"authorUrl": r["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["canonicalBaseUrl"],
+									"authorId": r["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"],
+									"lengthSeconds": length_text_to_seconds(r["lengthText"]["simpleText"]),
+									"second__lengthText": r["lengthText"]["simpleText"],
+									"viewCountText": get_view_count_text(r),
+									"viewCount": get_view_count(r)
+								} for r in [get_useful_recommendation_data(r) for r in recommendations if get_useful_recommendation_data(r)])
+
+			except Exception:
+				traceback.print_exc()
+
+			finally:
+				for file in possible_files:
+					os.unlink(file)
+
+				return result
+
 		except youtube_dl.DownloadError:
 			return {
 				"error": "Video unavailable",
@ -186,4 +267,4 @@ class HelloWorld(object):
 			}

 cherrypy.config.update({"server.socket_port": 3000})
-cherrypy.quickstart(HelloWorld())
+cherrypy.quickstart(Second())