mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2024-11-22 07:37:29 +00:00
fix channel extraction when header is not available
This commit is contained in:
parent
adb5dc93d5
commit
6cc921c2dc
@ -22,13 +22,21 @@ def extract_channel(ucid):
|
|||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
||||||
|
|
||||||
header = yt_initial_data["header"]["c4TabbedHeaderRenderer"]
|
header = yt_initial_data["header"]["c4TabbedHeaderRenderer"] if "c4TabbedHeaderRenderer" in yt_initial_data["header"] else []
|
||||||
|
channel_metadata = yt_initial_data["metadata"]["channelMetadataRenderer"]
|
||||||
|
|
||||||
|
if header:
|
||||||
author = header["title"]
|
author = header["title"]
|
||||||
author_id = header["channelId"]
|
author_id = header["channelId"]
|
||||||
author_url = header["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
|
author_url = header["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
|
||||||
|
else:
|
||||||
|
author = channel_metadata["title"]
|
||||||
|
author_id = channel_metadata["externalId"]
|
||||||
|
author_url = channel_metadata["channelUrl"]
|
||||||
|
|
||||||
subscriber_count = combine_runs(header["subscriberCountText"]) if "subscribeCountText" in header else "Unknown subscribers"
|
subscriber_count = combine_runs(header["subscriberCountText"]) if "subscribeCountText" in header else "Unknown subscribers"
|
||||||
description = yt_initial_data["metadata"]["channelMetadataRenderer"]["description"]
|
description = channel_metadata["description"]
|
||||||
allowed_regions = yt_initial_data["metadata"]["channelMetadataRenderer"]["availableCountryCodes"]
|
allowed_regions = channel_metadata["availableCountryCodes"]
|
||||||
|
|
||||||
author_banners = []
|
author_banners = []
|
||||||
if "banner" in header:
|
if "banner" in header:
|
||||||
@ -39,6 +47,8 @@ def extract_channel(ucid):
|
|||||||
author_thumbnails = []
|
author_thumbnails = []
|
||||||
if "avatar" in header:
|
if "avatar" in header:
|
||||||
author_thumbnails = generate_full_author_thumbnails(header["avatar"]["thumbnails"])
|
author_thumbnails = generate_full_author_thumbnails(header["avatar"]["thumbnails"])
|
||||||
|
elif "avatar" in channel_metadata:
|
||||||
|
author_thumbnails = generate_full_author_thumbnails(channel_metadata["avatar"]["thumbnails"])
|
||||||
|
|
||||||
latest_videos = []
|
latest_videos = []
|
||||||
tabs = yt_initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
|
tabs = yt_initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
|
||||||
@ -66,6 +76,10 @@ def extract_channel(ucid):
|
|||||||
if "publishedTimeText" in v:
|
if "publishedTimeText" in v:
|
||||||
published_text = v["publishedTimeText"]["simpleText"]
|
published_text = v["publishedTimeText"]["simpleText"]
|
||||||
published = past_text_to_time(published_text)
|
published = past_text_to_time(published_text)
|
||||||
|
|
||||||
|
view_count_text = combine_runs(v["viewCountText"]) if "viewCountText" in v else None
|
||||||
|
view_count_text_short = combine_runs(v["shortViewCountText"]) if "shortViewCountText" in v else None
|
||||||
|
|
||||||
latest_videos.append({
|
latest_videos.append({
|
||||||
"type": "video",
|
"type": "video",
|
||||||
"title": combine_runs(v["title"]),
|
"title": combine_runs(v["title"]),
|
||||||
@ -76,9 +90,9 @@ def extract_channel(ucid):
|
|||||||
"videoThumbnails": generate_video_thumbnails(v["videoId"]),
|
"videoThumbnails": generate_video_thumbnails(v["videoId"]),
|
||||||
"description": "",
|
"description": "",
|
||||||
"descriptionHtml": "",
|
"descriptionHtml": "",
|
||||||
"viewCount": view_count_text_to_number(combine_runs(v["viewCountText"])),
|
"viewCount": view_count_text_to_number(view_count_text),
|
||||||
"second__viewCountText": combine_runs(v["viewCountText"]),
|
"second__viewCountText": view_count_text,
|
||||||
"second__viewCountTextShort": combine_runs(v["shortViewCountText"]),
|
"second__viewCountTextShort": view_count_text_short,
|
||||||
"published": published,
|
"published": published,
|
||||||
"publishedText": published_text,
|
"publishedText": published_text,
|
||||||
"lengthSeconds": length_seconds,
|
"lengthSeconds": length_seconds,
|
||||||
|
@ -44,6 +44,9 @@ def add_html_links(text):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def view_count_text_to_number(text):
|
def view_count_text_to_number(text):
|
||||||
|
if text is None:
|
||||||
|
return 0
|
||||||
|
|
||||||
first_word = text.split(" ")[0].replace(",", "")
|
first_word = text.split(" ")[0].replace(",", "")
|
||||||
if first_word == "No":
|
if first_word == "No":
|
||||||
return 0
|
return 0
|
||||||
|
Loading…
Reference in New Issue
Block a user