From 861f441f9f911bb6e9901d08a451251ac0ffa92f Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sat, 24 Oct 2020 00:36:20 +1300 Subject: [PATCH] Fix search --- extractors/search.py | 5 ++++- tools/extractors.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/extractors/search.py b/extractors/search.py index f7ac6b1..8b9dd2e 100644 --- a/extractors/search.py +++ b/extractors/search.py @@ -21,7 +21,10 @@ def extract_search(q): r.raise_for_status() content = r.content.decode("utf8") yt_initial_data = extract_yt_initial_data(content) - items = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"] + sections = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"] + # find the section with the videos, not the one with the ads + section = next(s for s in sections if "itemSectionRenderer" in s and not (len(s["itemSectionRenderer"]["contents"]) >= 1 and "carouselAdRenderer" in s["itemSectionRenderer"]["contents"][0])) + items = section["itemSectionRenderer"]["contents"] results = [] for item in items: if "videoRenderer" in item: diff --git a/tools/extractors.py b/tools/extractors.py index 4bb121b..b62cfba 100644 --- a/tools/extractors.py +++ b/tools/extractors.py @@ -1,7 +1,7 @@ import re import json -r_yt_initial_data = re.compile(r"""^\s*window\["ytInitialData"\] = (\{.*\});\n?$""", re.M) +r_yt_initial_data = re.compile(r"""^(?:\s*window\["ytInitialData"\]|var ytInitialData) = (\{.*\});\s*\n?$""", re.M) def extract_yt_initial_data(content): m_yt_initial_data = re.search(r_yt_initial_data, content)