1
0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2024-11-22 15:47:30 +00:00
NewLeaf/tools/extractors.py

22 lines
964 B
Python
Raw Normal View History

2020-08-13 14:20:11 +00:00
import re
import json
2020-12-03 04:00:06 +00:00
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
2020-12-18 06:54:06 +00:00
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
2020-08-13 14:20:11 +00:00
def extract_yt_initial_data(content):
m_yt_initial_data = re.search(r_yt_initial_data, content)
if m_yt_initial_data:
yt_initial_data = json.loads(m_yt_initial_data.group(1))
return yt_initial_data
else:
raise Exception("Could not match ytInitialData in content")
2020-12-18 06:54:06 +00:00
def extract_yt_initial_player_response(content):
m_yt_initial_player_response = re.search(r_yt_initial_player_response, content)
if m_yt_initial_player_response:
yt_initial_player_response = json.loads(m_yt_initial_player_response.group(1))
return yt_initial_player_response
else:
raise Exception("Could not match ytInitialPlayerResponse in content")