Fix extracting with cookie consent page in EU

Fix #27 use maintained yt-dlp lib instead of youtube-dlc

Because of the following changes in YT, we have to switch to a
maintained library https://github.com/ytdl-org/youtube-dl/issues/28604
While yt-dlp is not fixed today, youtube-dl is fixed in master and as
yt-dlp is quick to merge upstream changes back to their repo, we can
hope the issue will also be fixed there timely.

For requests sent by us directly, we include the cookies.

Ref https://github.com/ytdl-org/youtube-dl/issues/28604
This commit is contained in:
Lomanic 2021-04-01 00:31:33 +02:00 committed by Cadence Ember
parent fe04a4dbd6
commit 5f47e1a71b
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17
5 changed files with 9 additions and 9 deletions

View File

@ -18,7 +18,7 @@ def extract_channel(ucid):
return channel_cache[ucid] return channel_cache[ucid]
channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user" channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user"
with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid)) as r: with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies={"CONSENT": "YES+cb.20210328-17-p0.en+FX+101"}) as r:
r.raise_for_status() r.raise_for_status()
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8")) yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))

View File

@ -1,6 +1,6 @@
import requests import requests
import traceback import traceback
import youtube_dlc import yt_dlp
from tools.converters import * from tools.converters import *
from tools.extractors import extract_yt_initial_data from tools.extractors import extract_yt_initial_data
from cachetools import TTLCache from cachetools import TTLCache
@ -13,11 +13,11 @@ ytdl_opts = {
"playlist_items": "1-100", "playlist_items": "1-100",
"extract_flat": "in_playlist" "extract_flat": "in_playlist"
} }
ytdl = youtube_dlc.YoutubeDL(ytdl_opts) ytdl = yt_dlp.YoutubeDL(ytdl_opts)
def extract_search(q): def extract_search(q):
try: try:
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}) as r: with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies={"CONSENT": "YES+cb.20210328-17-p0.en+FX+101"}) as r:
r.raise_for_status() r.raise_for_status()
content = r.content.decode("utf8") content = r.content.decode("utf8")
yt_initial_data = extract_yt_initial_data(content) yt_initial_data = extract_yt_initial_data(content)

View File

@ -4,7 +4,7 @@ import json
import os import os
import re import re
import traceback import traceback
import youtube_dlc import yt_dlp
import urllib.error import urllib.error
from tools.converters import * from tools.converters import *
from tools.extractors import extract_yt_initial_data, extract_yt_initial_player_response from tools.extractors import extract_yt_initial_data, extract_yt_initial_player_response
@ -24,7 +24,7 @@ ytdl_opts = {
"writesubtitles": True, "writesubtitles": True,
"allsubtitles": True, "allsubtitles": True,
} }
ytdl = youtube_dlc.YoutubeDL(ytdl_opts) ytdl = yt_dlp.YoutubeDL(ytdl_opts)
def format_order(format): def format_order(format):
# most significant to least significant # most significant to least significant
@ -185,7 +185,7 @@ def extract_video(id):
return result return result
except youtube_dlc.DownloadError as e: except yt_dlp.DownloadError as e:
files.clean_up_temp_files(id) files.clean_up_temp_files(id)
if isinstance(e.exc_info[1], urllib.error.HTTPError): if isinstance(e.exc_info[1], urllib.error.HTTPError):

View File

@ -2,7 +2,7 @@ import cherrypy
import json import json
import pathlib import pathlib
import requests import requests
import youtube_dlc import yt_dlp
from extractors.video import extract_video from extractors.video import extract_video
from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest
from extractors.manifest import extract_manifest from extractors.manifest import extract_manifest

View File

@ -1,5 +1,5 @@
cherrypy cherrypy
youtube-dlc yt-dlp
cachetools cachetools
python-dateutil python-dateutil
requests requests