From 7d3b79b1cd78629672a2ac18448489e7d3ec115e Mon Sep 17 00:00:00 2001 From: Lomanic Date: Fri, 14 May 2021 18:49:25 +0200 Subject: [PATCH] Change cookies to skip EU cookie consent page See https://github.com/benbusby/whoogle-search/issues/311 for some context. We're now implementing https://github.com/ytdl-org/youtube-dl/blob/a7260099873acc6dc7d76cafad2f6b139087afd0/youtube_dl/extractor/youtube.py#L263-L264 --- extractors/channel.py | 4 ++-- extractors/search.py | 4 ++-- tools/extractors.py | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/extractors/channel.py b/extractors/channel.py index fb8ccea..655bbe3 100644 --- a/extractors/channel.py +++ b/extractors/channel.py @@ -3,7 +3,7 @@ import dateutil.parser import requests import xml.etree.ElementTree as ET from tools.converters import * -from tools.extractors import extract_yt_initial_data +from tools.extractors import extract_yt_initial_data, eu_consent_cookie from threading import Lock from cachetools import TTLCache @@ -18,7 +18,7 @@ def extract_channel(ucid): return channel_cache[ucid] channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user" - with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies={"CONSENT": "PENDING+999"}) as r: + with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies=eu_consent_cookie()) as r: r.raise_for_status() yt_initial_data = extract_yt_initial_data(r.content.decode("utf8")) diff --git a/extractors/search.py b/extractors/search.py index 487ccf8..e100b6d 100644 --- a/extractors/search.py +++ b/extractors/search.py @@ -2,7 +2,7 @@ import requests import traceback import yt_dlp from tools.converters import * -from tools.extractors import extract_yt_initial_data +from tools.extractors import extract_yt_initial_data, eu_consent_cookie from cachetools import TTLCache search_cache = TTLCache(maxsize=50, ttl=300) @@ -17,7 +17,7 @@ ytdl = yt_dlp.YoutubeDL(ytdl_opts) def extract_search(q): try: - with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies={"CONSENT": "PENDING+999"}) as r: + with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies=eu_consent_cookie()) as r: r.raise_for_status() content = r.content.decode("utf8") yt_initial_data = extract_yt_initial_data(content) diff --git a/tools/extractors.py b/tools/extractors.py index b379628..40e0c06 100644 --- a/tools/extractors.py +++ b/tools/extractors.py @@ -1,5 +1,6 @@ import re import json +import random r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|)""", re.S + re.M) r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$||var )""", re.S + re.M) @@ -19,3 +20,6 @@ def extract_yt_initial_player_response(content): return yt_initial_player_response else: raise Exception("Could not match ytInitialPlayerResponse in content") + +def eu_consent_cookie(): + return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}