mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2026-03-05 12:01:38 +00:00
Improve ytInitialData extraction
This commit is contained in:
parent
ba88c53857
commit
554cd8cc3a
2 changed files with 9 additions and 6 deletions
|
|
@ -1,13 +1,11 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
r_yt_initial_data = re.compile(r"""(?:\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+\});</script>""")
|
||||
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
||||
|
||||
def extract_yt_initial_data(content):
|
||||
content = content.replace("\n", "")
|
||||
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
||||
if m_yt_initial_data:
|
||||
print(m_yt_initial_data.group(1))
|
||||
yt_initial_data = json.loads(m_yt_initial_data.group(1))
|
||||
return yt_initial_data
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue