2020-08-13 14:20:11 +00:00
|
|
|
import re
|
|
|
|
import json
|
|
|
|
|
2020-12-03 03:32:31 +00:00
|
|
|
r_yt_initial_data = re.compile(r"""(?:\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+\});</script>""")
|
2020-08-13 14:20:11 +00:00
|
|
|
|
|
|
|
def extract_yt_initial_data(content):
|
2020-12-03 03:32:31 +00:00
|
|
|
content = content.replace("\n", "")
|
2020-08-13 14:20:11 +00:00
|
|
|
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
|
|
|
if m_yt_initial_data:
|
2020-12-03 03:32:31 +00:00
|
|
|
print(m_yt_initial_data.group(1))
|
2020-08-13 14:20:11 +00:00
|
|
|
yt_initial_data = json.loads(m_yt_initial_data.group(1))
|
|
|
|
return yt_initial_data
|
|
|
|
else:
|
|
|
|
raise Exception("Could not match ytInitialData in content")
|