mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2024-11-22 07:37:29 +00:00
Fix extracting empty description
This commit is contained in:
parent
e18efc9591
commit
caee795b7e
@ -1,3 +1,4 @@
|
|||||||
|
import cherrypy
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import requests
|
import requests
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
@ -125,33 +126,45 @@ def extract_channel_latest(ucid):
|
|||||||
author_url = author_container.find("{http://www.w3.org/2005/Atom}uri").text
|
author_url = author_container.find("{http://www.w3.org/2005/Atom}uri").text
|
||||||
channel_id = feed.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
|
channel_id = feed.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
|
||||||
results = []
|
results = []
|
||||||
|
missing_published = False
|
||||||
for entry in feed.findall("{http://www.w3.org/2005/Atom}entry"):
|
for entry in feed.findall("{http://www.w3.org/2005/Atom}entry"):
|
||||||
id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text
|
id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text
|
||||||
media_group = entry.find("{http://search.yahoo.com/mrss/}group")
|
media_group = entry.find("{http://search.yahoo.com/mrss/}group")
|
||||||
description = media_group.find("{http://search.yahoo.com/mrss/}description").text
|
description = media_group.find("{http://search.yahoo.com/mrss/}description").text
|
||||||
media_community = media_group.find("{http://search.yahoo.com/mrss/}community")
|
media_community = media_group.find("{http://search.yahoo.com/mrss/}community")
|
||||||
published = int(dateutil.parser.isoparse(entry.find("{http://www.w3.org/2005/Atom}published").text).timestamp())
|
published_entry = entry.find("{http://www.w3.org/2005/Atom}published")
|
||||||
results.append({
|
if published_entry is not None: # sometimes youtube does not provide published dates, no idea why.
|
||||||
"type": "video",
|
published = int(dateutil.parser.isoparse(published_entry.text).timestamp())
|
||||||
"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
|
results.append({
|
||||||
"videoId": id,
|
"type": "video",
|
||||||
"author": author,
|
"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
|
||||||
"authorId": channel_id,
|
"videoId": id,
|
||||||
"authorUrl": author_url,
|
"author": author,
|
||||||
"videoThumbnails": generate_video_thumbnails(id),
|
"authorId": channel_id,
|
||||||
"description": description,
|
"authorUrl": author_url,
|
||||||
"descriptionHtml": add_html_links(escape_html_textcontent(description)),
|
"videoThumbnails": generate_video_thumbnails(id),
|
||||||
"viewCount": int(media_community.find("{http://search.yahoo.com/mrss/}statistics").attrib["views"]),
|
"description": description,
|
||||||
"published": published,
|
"descriptionHtml": description and add_html_links(escape_html_textcontent(description)),
|
||||||
"publishedText": time_to_past_text(published),
|
"viewCount": int(media_community.find("{http://search.yahoo.com/mrss/}statistics").attrib["views"]),
|
||||||
"lengthSeconds": None,
|
"published": published,
|
||||||
"liveNow": None,
|
"publishedText": time_to_past_text(published),
|
||||||
"paid": None,
|
"lengthSeconds": None,
|
||||||
"premium": None,
|
"liveNow": None,
|
||||||
"isUpcoming": None
|
"paid": None,
|
||||||
})
|
"premium": None,
|
||||||
|
"isUpcoming": None
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
missing_published = True
|
||||||
|
|
||||||
with channel_latest_cache_lock:
|
if len(results) == 0 and missing_published: # no results due to all missing published
|
||||||
channel_latest_cache[ucid] = results
|
cherrypy.response.status = 503
|
||||||
|
return {
|
||||||
|
"error": "YouTube did not provide published dates for any feed items. This is usually temporary - refresh in a few minutes.",
|
||||||
|
"identifier": "PUBLISHED_DATES_NOT_PROVIDED"
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
with channel_latest_cache_lock:
|
||||||
|
channel_latest_cache[ucid] = results
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
@ -18,7 +18,8 @@ ytdl_opts = {
|
|||||||
"dump_single_json": True,
|
"dump_single_json": True,
|
||||||
"playlist_items": "1-100",
|
"playlist_items": "1-100",
|
||||||
"extract_flat": "in_playlist",
|
"extract_flat": "in_playlist",
|
||||||
"write_pages": True
|
"write_pages": True,
|
||||||
|
"source_address": "0.0.0.0"
|
||||||
}
|
}
|
||||||
ytdl = youtube_dl.YoutubeDL(ytdl_opts)
|
ytdl = youtube_dl.YoutubeDL(ytdl_opts)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user