From 29a3894337cd084cf3cd54e60c970ae817db2076 Mon Sep 17 00:00:00 2001 From: Lomanic Date: Sat, 13 Nov 2021 19:55:08 +0100 Subject: [PATCH 1/2] #42 Return UNKNOWN error for not explicitly handled errors for channel extraction instead of stacktrace --- extractors/channel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extractors/channel.py b/extractors/channel.py index 1cf35c7..36c96b1 100644 --- a/extractors/channel.py +++ b/extractors/channel.py @@ -35,7 +35,10 @@ def extract_channel(ucid): "identifier": "ACCOUNT_TERMINATED" } else: - print("Seen alert text '{}'".format(alert_text)) + return { + "error": alert_text, + "identifier": "UNKNOWN" + } header = yt_initial_data["header"]["c4TabbedHeaderRenderer"] if "c4TabbedHeaderRenderer" in yt_initial_data["header"] else {} channel_metadata = yt_initial_data["metadata"]["channelMetadataRenderer"] From ac1aa0710815a84f10421bb3f81cac2927ed580a Mon Sep 17 00:00:00 2001 From: Lomanic Date: Sat, 13 Nov 2021 20:15:16 +0100 Subject: [PATCH 2/2] #29 Extract named channels using dynamic endpoint with second__path param instead of /user/ --- extractors/channel.py | 4 ++-- index.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/extractors/channel.py b/extractors/channel.py index 36c96b1..fad39b3 100644 --- a/extractors/channel.py +++ b/extractors/channel.py @@ -12,12 +12,12 @@ channel_cache_lock = Lock() channel_latest_cache = TTLCache(maxsize=500, ttl=300) channel_latest_cache_lock = Lock() -def extract_channel(ucid): +def extract_channel(ucid, second__path="user"): with channel_cache_lock: if ucid in channel_cache: return channel_cache[ucid] - channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user" + channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else second__path r = requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies=eu_consent_cookie()) r.raise_for_status() yt_initial_data = extract_yt_initial_data(r.content.decode("utf8")) diff --git a/index.py b/index.py index 21c9444..482c6d5 100644 --- a/index.py +++ b/index.py @@ -58,7 +58,7 @@ class NewLeaf(object): @cherrypy.expose @cherrypy.tools.json_out() - def channels(self, *suffix, **kwargs): + def channels(self, *suffix, second__path="user", **kwargs): ucid = "" part = "" possible_parts = ("videos", "latest", "playlists") @@ -74,15 +74,21 @@ class NewLeaf(object): "error": "Two components specified in URL, but neither component was recognised as a part keyword.", "identifier": "PART_KEYWORD_NOT_RECOGNISED" } + possible_paths = ("channel", "c", "user") + if second__path not in possible_paths: + return { + "error": "second__path parameter must be one of: " + str(possible_paths), + "identifier": "PATH_PARAMETER_NOT_RECOGNISED" + } if part == "playlists": return [] elif part == "latest": - return extract_channel_latest(ucid) + return extract_channel_latest(ucid, second__path) elif part == "videos": - return extract_channel_videos(ucid) + return extract_channel_videos(ucid, second__path) else: # part == "", so extract whole channel - return extract_channel(ucid) + return extract_channel(ucid, second__path) @cherrypy.expose @cherrypy.tools.json_out()