1
0
Fork 0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2026-03-05 12:01:38 +00:00

Improve ytInitialData extraction

This commit is contained in:
Cadence Ember 2020-12-03 17:00:06 +13:00
parent ba88c53857
commit 554cd8cc3a
No known key found for this signature in database
GPG key ID: BC1C2C61CF521B17
2 changed files with 9 additions and 6 deletions

View file

@ -1,13 +1,11 @@
import re
import json
r_yt_initial_data = re.compile(r"""(?:\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+\});</script>""")
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
def extract_yt_initial_data(content):
content = content.replace("\n", "")
m_yt_initial_data = re.search(r_yt_initial_data, content)
if m_yt_initial_data:
print(m_yt_initial_data.group(1))
yt_initial_data = json.loads(m_yt_initial_data.group(1))
return yt_initial_data
else: