1
0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2025-01-24 21:56:58 +00:00

Fix extracting with cookie consent page in EU

Fix #27 use maintained yt-dlp lib instead of youtube-dlc

Because of the following changes in YT, we have to switch to a
maintained library https://github.com/ytdl-org/youtube-dl/issues/28604
While yt-dlp is not fixed today, youtube-dl is fixed in master and as
yt-dlp is quick to merge upstream changes back to their repo, we can
hope the issue will also be fixed there timely.

For requests sent by us directly, we include the cookies.

Ref https://github.com/ytdl-org/youtube-dl/issues/28604
This commit is contained in:
Lomanic 2021-04-01 00:31:33 +02:00 committed by Cadence Ember
parent fe04a4dbd6
commit 5f47e1a71b
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17
5 changed files with 9 additions and 9 deletions

View File

@ -18,7 +18,7 @@ def extract_channel(ucid):
return channel_cache[ucid]
channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user"
with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid)) as r:
with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies={"CONSENT": "YES+cb.20210328-17-p0.en+FX+101"}) as r:
r.raise_for_status()
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))

View File

@ -1,6 +1,6 @@
import requests
import traceback
import youtube_dlc
import yt_dlp
from tools.converters import *
from tools.extractors import extract_yt_initial_data
from cachetools import TTLCache
@ -13,11 +13,11 @@ ytdl_opts = {
"playlist_items": "1-100",
"extract_flat": "in_playlist"
}
ytdl = youtube_dlc.YoutubeDL(ytdl_opts)
ytdl = yt_dlp.YoutubeDL(ytdl_opts)
def extract_search(q):
try:
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}) as r:
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies={"CONSENT": "YES+cb.20210328-17-p0.en+FX+101"}) as r:
r.raise_for_status()
content = r.content.decode("utf8")
yt_initial_data = extract_yt_initial_data(content)

View File

@ -4,7 +4,7 @@ import json
import os
import re
import traceback
import youtube_dlc
import yt_dlp
import urllib.error
from tools.converters import *
from tools.extractors import extract_yt_initial_data, extract_yt_initial_player_response
@ -24,7 +24,7 @@ ytdl_opts = {
"writesubtitles": True,
"allsubtitles": True,
}
ytdl = youtube_dlc.YoutubeDL(ytdl_opts)
ytdl = yt_dlp.YoutubeDL(ytdl_opts)
def format_order(format):
# most significant to least significant
@ -185,7 +185,7 @@ def extract_video(id):
return result
except youtube_dlc.DownloadError as e:
except yt_dlp.DownloadError as e:
files.clean_up_temp_files(id)
if isinstance(e.exc_info[1], urllib.error.HTTPError):

View File

@ -2,7 +2,7 @@ import cherrypy
import json
import pathlib
import requests
import youtube_dlc
import yt_dlp
from extractors.video import extract_video
from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest
from extractors.manifest import extract_manifest

View File

@ -1,5 +1,5 @@
cherrypy
youtube-dlc
yt-dlp
cachetools
python-dateutil
requests