1
0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2024-11-22 07:37:29 +00:00

Fix search extractor ad section filtering

The ads sections had a carouselAdRenderer property, now they have a
promotedSparklesTextSearchRenderer property instead. As this may
change again in the future, we should just get all items as we
discriminate/filter them as videos afterwards with the videoRenderer
property.
This commit is contained in:
Lomanic 2021-05-13 02:46:26 +02:00 committed by Cadence Ember
parent 57b0a88a2e
commit f0c9708d99
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17

View File

@ -21,10 +21,15 @@ def extract_search(q):
r.raise_for_status() r.raise_for_status()
content = r.content.decode("utf8") content = r.content.decode("utf8")
yt_initial_data = extract_yt_initial_data(content) yt_initial_data = extract_yt_initial_data(content)
sections = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"] sections = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"]
# find the section with the videos, not the one with the ads # youtube searches contain a lot of random stuff, just grab it all for now, then filter to `videoRenderer` later
section = next(s for s in sections if "itemSectionRenderer" in s and not (len(s["itemSectionRenderer"]["contents"]) >= 1 and "carouselAdRenderer" in s["itemSectionRenderer"]["contents"][0])) itemSections = [s for s in sections if "itemSectionRenderer" in s]
items = section["itemSectionRenderer"]["contents"]
items = []
for section in itemSections:
items += section["itemSectionRenderer"]["contents"]
results = [] results = []
for item in items: for item in items:
if "videoRenderer" in item: if "videoRenderer" in item: