From ad018b0a0cd360535cd9d9307b30c55ec11e112e Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sat, 8 Aug 2020 02:00:26 +1200 Subject: [PATCH] Add recommended videos in a terrible way --- README.md | 1 - index.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 50fa098..36c3902 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ These endpoints are somewhat implemented: ## The future -- Video recommendations - RSS as a source for channel listings - Searches - Dash manifests diff --git a/index.py b/index.py index 331c043..4e45181 100644 --- a/index.py +++ b/index.py @@ -2,6 +2,10 @@ import cherrypy import json import youtube_dl import datetime +import os +import re +import json +import traceback ytdl_opts = { "quiet": True, @@ -11,7 +15,15 @@ ytdl_opts = { } ytdl = youtube_dl.YoutubeDL(ytdl_opts) -class HelloWorld(object): +ytdl_save_opts = ytdl_opts.copy() +ytdl_save_opts["write_pages"] = True +ytdl_save = youtube_dl.YoutubeDL(ytdl_save_opts) + +def length_text_to_seconds(text): + s = text.split(":") + return sum([int(x) * 60**(len(s)-i-1) for i, x in enumerate(s)]) + +class Second(object): def _cp_dispatch(self, vpath): if vpath[:2] == ["api", "v1"] and len(vpath) >= 4: endpoints = ["channels", "videos"] @@ -26,15 +38,17 @@ class HelloWorld(object): @cherrypy.tools.json_out() def videos(self, id): try: - info = ytdl.extract_info(id, download=False) + info = ytdl_save.extract_info(id, download=False) year = int(info["upload_date"][:4]) month = int(info["upload_date"][4:6]) day = int(info["upload_date"][6:8]) + # Adaptive formats have either audio or video, format streams have both def format_is_adaptive(format): return format["acodec"] == "none" or format["vcodec"] == "none" + # just the "type" field def format_type(format): sense = "audio" codecs = [] @@ -45,7 +59,7 @@ class HelloWorld(object): codecs.append(format["acodec"]) return '{}/{}; codecs="{}"'.format(sense, format["ext"], ", ".join(codecs)) - return { + result = { "type": "video", "title": info["title"], "videoId": info["id"], @@ -57,6 +71,8 @@ class HelloWorld(object): "publishedText": None, "keywords": None, "viewCount": info["view_count"], + "second__viewCountText": None, + "second__viewCountTextShort": None, "likeCount": info["like_count"], "dislikeCount": info["dislike_count"], "paid": None, @@ -115,6 +131,71 @@ class HelloWorld(object): "recommendedVideos": [] } + # Now try to get more stuff by manually examining the saved file + # Figure out what the name of the saved file was + possible_files = [f for f in os.listdir() if f.startswith("{}_".format(info["id"]))] + try: + if len(possible_files) == 1: + filename = possible_files[0] + with open(filename) as file: + r = re.compile(r"""^\s*window\["ytInitialData"\] = (\{.*\});\n?$""") + for line in file: + match_result = re.search(r, line) + if match_result: + yt_initial_data = json.loads(match_result.group(1)) + views = yt_initial_data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][0]\ + ["videoPrimaryInfoRenderer"]["viewCount"]["videoViewCountRenderer"] + result["second__viewCountText"] = views["viewCount"]["simpleText"] + result["second__viewCountTextShort"] = views["shortViewCount"]["simpleText"] + recommendations = yt_initial_data["contents"]["twoColumnWatchNextResults"]["secondaryResults"]\ + ["secondaryResults"]["results"] + + def get_useful_recommendation_data(r): + if "compactVideoRenderer" in r: + return r["compactVideoRenderer"] + if "compactAutoplayRenderer" in r: + return r["compactAutoplayRenderer"]["contents"][0]["compactVideoRenderer"] + return None + + def get_view_count(r): + text = r["viewCountText"]["simpleText"] + if text == "Recommended for you": + return 0 # subject to change? + else: + return int(text.replace(",", "").split(" ")[0]) + + def get_view_count_text(r): + text = r["viewCountText"]["simpleText"] + if text == "Recommended for you": + return "Recommended for you" # subject to change? + else: + return text + + # result["recommendedVideos"] = recommendations + # return result + + result["recommendedVideos"] = list({ + "videoId": r["videoId"], + "title": r["title"]["simpleText"], + "videoThumbnails": [], + "author": r["longBylineText"]["runs"][0]["text"], + "authorUrl": r["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["canonicalBaseUrl"], + "authorId": r["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"], + "lengthSeconds": length_text_to_seconds(r["lengthText"]["simpleText"]), + "second__lengthText": r["lengthText"]["simpleText"], + "viewCountText": get_view_count_text(r), + "viewCount": get_view_count(r) + } for r in [get_useful_recommendation_data(r) for r in recommendations if get_useful_recommendation_data(r)]) + + except Exception: + traceback.print_exc() + + finally: + for file in possible_files: + os.unlink(file) + + return result + except youtube_dl.DownloadError: return { "error": "Video unavailable", @@ -186,4 +267,4 @@ class HelloWorld(object): } cherrypy.config.update({"server.socket_port": 3000}) -cherrypy.quickstart(HelloWorld()) +cherrypy.quickstart(Second())