From 0a13ab88cb34c8a06271c4b9d48906aa6e30dfaa Mon Sep 17 00:00:00 2001 From: Lomanic Date: Sat, 6 Nov 2021 20:10:27 +0100 Subject: [PATCH 1/3] Stream responses on /vi and /ggpht endpoints The chunk_size=None parameter to iter_content lets us consume data as soon as it arrives https://docs.python-requests.org/en/master/api/#requests.Response.iter_content --- index.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/index.py b/index.py index ffc35c7..91cdcfd 100644 --- a/index.py +++ b/index.py @@ -123,17 +123,17 @@ class NewLeaf(object): @cherrypy.expose def vi(self, id, file): - with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r: + with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file), stream=True) as r: r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] - return r # no idea if this is a good way to do it, but it definitely works! :D + return next(r.iter_content(chunk_size=None)) @cherrypy.expose def ggpht(self, *path): - with requests.get("https://yt3.ggpht.com/{}".format("/".join(path))) as r: + with requests.get("https://yt3.ggpht.com/{}".format("/".join(path)), stream=True) as r: r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] - return r + return next(r.iter_content(chunk_size=None)) bind_port = getattr(configuration, "bind_port", 3000) bind_host = getattr(configuration, "bind_host", "0.0.0.0") From 36ae18c12f08eba668b7b9e623493419d32781ee Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Mon, 10 Jan 2022 13:02:08 +1300 Subject: [PATCH 2/3] Report errors when an account has been terminated --- extractors/channel.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/extractors/channel.py b/extractors/channel.py index 454a4e5..58c4adf 100644 --- a/extractors/channel.py +++ b/extractors/channel.py @@ -29,6 +29,11 @@ def extract_channel(ucid): "error": alert_text, "identifier": "NOT_FOUND" } + elif alert_text.startswith("This account has been terminated"): + return { + "error": alert_text, + "identifier": "ACCOUNT_TERMINATED" + } else: print("Seen alert text '{}'".format(alert_text)) @@ -165,6 +170,9 @@ def extract_channel_latest(ucid): with requests.get("https://www.youtube.com/feeds/videos.xml?channel_id={}".format(ucid)) as r: if r.status_code == 404: cherrypy.response.status = 404 + # write out page data for debugging + with open("channel_not_found_{}.xml".format(ucid), "wb") as f: + f.write(r.content) return { "error": "This channel does not exist.", "identifier": "NOT_FOUND" From 73b4fbabf7c5f46975ebc69cc3d2a66600e98514 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Mon, 10 Jan 2022 13:23:04 +1300 Subject: [PATCH 3/3] Do not actually write out pages. --- extractors/channel.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/extractors/channel.py b/extractors/channel.py index 58c4adf..39475d9 100644 --- a/extractors/channel.py +++ b/extractors/channel.py @@ -170,9 +170,6 @@ def extract_channel_latest(ucid): with requests.get("https://www.youtube.com/feeds/videos.xml?channel_id={}".format(ucid)) as r: if r.status_code == 404: cherrypy.response.status = 404 - # write out page data for debugging - with open("channel_not_found_{}.xml".format(ucid), "wb") as f: - f.write(r.content) return { "error": "This channel does not exist.", "identifier": "NOT_FOUND"