mirror of
				https://git.sr.ht/~cadence/NewLeaf
				synced 2025-11-03 21:15:35 +00:00 
			
		
		
		
	Extract fact check notices to second__clarification
This commit is contained in:
		
							parent
							
								
									65bb7a2c4c
								
							
						
					
					
						commit
						e3854a6050
					
				@ -7,7 +7,7 @@ import traceback
 | 
				
			|||||||
import yt_dlp
 | 
					import yt_dlp
 | 
				
			||||||
import urllib.error
 | 
					import urllib.error
 | 
				
			||||||
from tools.converters import *
 | 
					from tools.converters import *
 | 
				
			||||||
from tools.extractors import extract_yt_initial_data, extract_yt_initial_player_response
 | 
					from tools.extractors import extract_yt_initial_data, extract_yt_initial_player_response, deep_get
 | 
				
			||||||
import tools.files as files
 | 
					import tools.files as files
 | 
				
			||||||
from math import floor
 | 
					from math import floor
 | 
				
			||||||
from urllib.parse import parse_qs, urlparse, urlencode
 | 
					from urllib.parse import parse_qs, urlparse, urlencode
 | 
				
			||||||
@ -317,6 +317,12 @@ def get_more_stuff_from_file(id, result):
 | 
				
			|||||||
							"second__remoteUrl": url
 | 
												"second__remoteUrl": url
 | 
				
			||||||
						})
 | 
											})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									# fact check notices! aka "clarifications".
 | 
				
			||||||
 | 
									# for now, we just return the data as-is for the renderer to deal with (or not).
 | 
				
			||||||
 | 
									def get_clarification(section):
 | 
				
			||||||
 | 
										return deep_get(section, ["itemSectionRenderer", "contents", 0, "clarificationRenderer"])
 | 
				
			||||||
 | 
									result["second__clarification"] = next((get_clarification(s) for s in main_sections if get_clarification(s)), None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	except Exception:
 | 
						except Exception:
 | 
				
			||||||
		print("messed up extracting recommendations.")
 | 
							print("messed up extracting recommendations.")
 | 
				
			||||||
		traceback.print_exc()
 | 
							traceback.print_exc()
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import random
 | 
					import random
 | 
				
			||||||
 | 
					from functools import reduce
 | 
				
			||||||
 | 
					
 | 
				
			||||||
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
 | 
					r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
 | 
				
			||||||
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
 | 
					r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
 | 
				
			||||||
@ -30,3 +31,12 @@ def extract_yt_cfg(content):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def eu_consent_cookie():
 | 
					def eu_consent_cookie():
 | 
				
			||||||
	return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}
 | 
						return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def is_in(o, key):
 | 
				
			||||||
 | 
						if isinstance(o, list):
 | 
				
			||||||
 | 
							return type(key) == int and key >= 0 and key < len(o)
 | 
				
			||||||
 | 
						else:
 | 
				
			||||||
 | 
							return key in o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def deep_get(o, properties):
 | 
				
			||||||
 | 
						return reduce(lambda a, b: a and is_in(a, b) and a[b] or None, [o, *properties])
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user