mirror of
				https://git.sr.ht/~cadence/NewLeaf
				synced 2025-11-04 13:35:36 +00:00 
			
		
		
		
	Fix extracting empty description
This commit is contained in:
		
							parent
							
								
									e18efc9591
								
							
						
					
					
						commit
						caee795b7e
					
				@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					import cherrypy
 | 
				
			||||||
import dateutil.parser
 | 
					import dateutil.parser
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import xml.etree.ElementTree as ET
 | 
					import xml.etree.ElementTree as ET
 | 
				
			||||||
@ -125,12 +126,15 @@ def extract_channel_latest(ucid):
 | 
				
			|||||||
		author_url = author_container.find("{http://www.w3.org/2005/Atom}uri").text
 | 
							author_url = author_container.find("{http://www.w3.org/2005/Atom}uri").text
 | 
				
			||||||
		channel_id = feed.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
 | 
							channel_id = feed.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
 | 
				
			||||||
		results = []
 | 
							results = []
 | 
				
			||||||
 | 
							missing_published = False
 | 
				
			||||||
		for entry in feed.findall("{http://www.w3.org/2005/Atom}entry"):
 | 
							for entry in feed.findall("{http://www.w3.org/2005/Atom}entry"):
 | 
				
			||||||
			id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text
 | 
								id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text
 | 
				
			||||||
			media_group = entry.find("{http://search.yahoo.com/mrss/}group")
 | 
								media_group = entry.find("{http://search.yahoo.com/mrss/}group")
 | 
				
			||||||
			description = media_group.find("{http://search.yahoo.com/mrss/}description").text
 | 
								description = media_group.find("{http://search.yahoo.com/mrss/}description").text
 | 
				
			||||||
			media_community = media_group.find("{http://search.yahoo.com/mrss/}community")
 | 
								media_community = media_group.find("{http://search.yahoo.com/mrss/}community")
 | 
				
			||||||
			published = int(dateutil.parser.isoparse(entry.find("{http://www.w3.org/2005/Atom}published").text).timestamp())
 | 
								published_entry = entry.find("{http://www.w3.org/2005/Atom}published")
 | 
				
			||||||
 | 
								if published_entry is not None: # sometimes youtube does not provide published dates, no idea why.
 | 
				
			||||||
 | 
									published = int(dateutil.parser.isoparse(published_entry.text).timestamp())
 | 
				
			||||||
				results.append({
 | 
									results.append({
 | 
				
			||||||
					"type": "video",
 | 
										"type": "video",
 | 
				
			||||||
					"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
 | 
										"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
 | 
				
			||||||
@ -140,7 +144,7 @@ def extract_channel_latest(ucid):
 | 
				
			|||||||
					"authorUrl": author_url,
 | 
										"authorUrl": author_url,
 | 
				
			||||||
					"videoThumbnails": generate_video_thumbnails(id),
 | 
										"videoThumbnails": generate_video_thumbnails(id),
 | 
				
			||||||
					"description": description,
 | 
										"description": description,
 | 
				
			||||||
				"descriptionHtml": add_html_links(escape_html_textcontent(description)),
 | 
										"descriptionHtml": description and add_html_links(escape_html_textcontent(description)),
 | 
				
			||||||
					"viewCount": int(media_community.find("{http://search.yahoo.com/mrss/}statistics").attrib["views"]),
 | 
										"viewCount": int(media_community.find("{http://search.yahoo.com/mrss/}statistics").attrib["views"]),
 | 
				
			||||||
					"published": published,
 | 
										"published": published,
 | 
				
			||||||
					"publishedText": time_to_past_text(published),
 | 
										"publishedText": time_to_past_text(published),
 | 
				
			||||||
@ -150,7 +154,16 @@ def extract_channel_latest(ucid):
 | 
				
			|||||||
					"premium": None,
 | 
										"premium": None,
 | 
				
			||||||
					"isUpcoming": None
 | 
										"isUpcoming": None
 | 
				
			||||||
				})
 | 
									})
 | 
				
			||||||
 | 
								else:
 | 
				
			||||||
 | 
									missing_published = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if len(results) == 0 and missing_published: # no results due to all missing published
 | 
				
			||||||
 | 
								cherrypy.response.status = 503
 | 
				
			||||||
 | 
								return {
 | 
				
			||||||
 | 
									"error": "YouTube did not provide published dates for any feed items. This is usually temporary - refresh in a few minutes.",
 | 
				
			||||||
 | 
									"identifier": "PUBLISHED_DATES_NOT_PROVIDED"
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							else:
 | 
				
			||||||
			with channel_latest_cache_lock:
 | 
								with channel_latest_cache_lock:
 | 
				
			||||||
				channel_latest_cache[ucid] = results
 | 
									channel_latest_cache[ucid] = results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -18,7 +18,8 @@ ytdl_opts = {
 | 
				
			|||||||
	"dump_single_json": True,
 | 
						"dump_single_json": True,
 | 
				
			||||||
	"playlist_items": "1-100",
 | 
						"playlist_items": "1-100",
 | 
				
			||||||
	"extract_flat": "in_playlist",
 | 
						"extract_flat": "in_playlist",
 | 
				
			||||||
	"write_pages": True
 | 
						"write_pages": True,
 | 
				
			||||||
 | 
					        "source_address": "0.0.0.0"
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
ytdl = youtube_dl.YoutubeDL(ytdl_opts)
 | 
					ytdl = youtube_dl.YoutubeDL(ytdl_opts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user