2020-08-13 14:20:11 +00:00
|
|
|
import requests
|
|
|
|
import traceback
|
2021-03-31 22:31:33 +00:00
|
|
|
import yt_dlp
|
2020-08-13 14:20:11 +00:00
|
|
|
from tools.converters import *
|
2021-05-14 16:49:25 +00:00
|
|
|
from tools.extractors import extract_yt_initial_data, eu_consent_cookie
|
2020-08-13 14:20:11 +00:00
|
|
|
from cachetools import TTLCache
|
|
|
|
|
|
|
|
search_cache = TTLCache(maxsize=50, ttl=300)
|
|
|
|
|
|
|
|
ytdl_opts = {
|
|
|
|
"quiet": True,
|
|
|
|
"dump_single_json": True,
|
|
|
|
"playlist_items": "1-100",
|
|
|
|
"extract_flat": "in_playlist"
|
|
|
|
}
|
2021-03-31 22:31:33 +00:00
|
|
|
ytdl = yt_dlp.YoutubeDL(ytdl_opts)
|
2020-08-13 14:20:11 +00:00
|
|
|
|
|
|
|
def extract_search(q):
|
|
|
|
try:
|
2021-05-14 16:49:25 +00:00
|
|
|
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies=eu_consent_cookie()) as r:
|
2020-08-13 14:20:11 +00:00
|
|
|
r.raise_for_status()
|
|
|
|
content = r.content.decode("utf8")
|
|
|
|
yt_initial_data = extract_yt_initial_data(content)
|
2021-05-13 00:46:26 +00:00
|
|
|
|
2020-10-23 11:36:20 +00:00
|
|
|
sections = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"]
|
2021-05-13 00:46:26 +00:00
|
|
|
# youtube searches contain a lot of random stuff, just grab it all for now, then filter to `videoRenderer` later
|
|
|
|
itemSections = [s for s in sections if "itemSectionRenderer" in s]
|
|
|
|
|
|
|
|
items = []
|
|
|
|
for section in itemSections:
|
|
|
|
items += section["itemSectionRenderer"]["contents"]
|
|
|
|
|
2020-08-13 14:20:11 +00:00
|
|
|
results = []
|
|
|
|
for item in items:
|
|
|
|
if "videoRenderer" in item:
|
|
|
|
video = item["videoRenderer"]
|
|
|
|
published = 0
|
|
|
|
published_text = "Live now"
|
|
|
|
if "publishedTimeText" in video:
|
|
|
|
published_text = video["publishedTimeText"]["simpleText"]
|
|
|
|
published = past_text_to_time(published_text)
|
|
|
|
results.append({
|
|
|
|
"type": "video",
|
|
|
|
"title": combine_runs(video["title"]),
|
|
|
|
"videoId": video["videoId"],
|
|
|
|
"author": combine_runs(video["longBylineText"]),
|
|
|
|
"authorId": video["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"],
|
|
|
|
"authorUrl": video["longBylineText"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"],
|
|
|
|
"videoThumbnails": generate_video_thumbnails(video["videoId"]),
|
|
|
|
"description": combine_runs(video["descriptionSnippet"]) if "descriptionSnippet" in video else "",
|
|
|
|
"descriptionHtml": combine_runs_html(video["descriptionSnippet"]) if "descriptionSnippet" in video else "",
|
|
|
|
"viewCount": get_view_count_or_recommended(video),
|
|
|
|
"second__viewCountText": get_view_count_text_or_recommended(video),
|
|
|
|
"published": published,
|
|
|
|
"publishedText": published_text,
|
|
|
|
"lengthSeconds": get_length_or_live_now(video),
|
|
|
|
"second__lengthText": get_length_text_or_live_now(video),
|
|
|
|
"liveNow": is_live(video),
|
|
|
|
"paid": None,
|
|
|
|
"premium": None,
|
|
|
|
"isUpcoming": None
|
|
|
|
})
|
|
|
|
search_cache[q] = results # only cache full extraction
|
|
|
|
return results
|
|
|
|
|
|
|
|
except Exception:
|
|
|
|
print("messed up extracting search, using youtube-dl instead")
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
info = ytdl.extract_info("ytsearchall:{}".format(q), download=False)
|
|
|
|
return [{
|
|
|
|
"type": "video",
|
|
|
|
"title": video["title"],
|
|
|
|
"videoId": video["id"],
|
|
|
|
"author": None,
|
|
|
|
"authorId": None,
|
|
|
|
"authorUrl": None,
|
|
|
|
"videoThumbnails": generate_video_thumbnails(video["id"]),
|
|
|
|
"description": None,
|
|
|
|
"descriptionHtml": None,
|
|
|
|
"viewCount": None,
|
|
|
|
"published": None,
|
|
|
|
"publishedText": None,
|
|
|
|
"lengthSeconds": None,
|
|
|
|
"liveNow": None,
|
|
|
|
"paid": None,
|
|
|
|
"premium": None,
|
|
|
|
"isUpcoming": None
|
|
|
|
} for video in info["entries"] if "title" in video]
|