2020-08-13 14:20:11 +00:00
|
|
|
import re
|
|
|
|
import json
|
2021-11-03 13:01:52 +00:00
|
|
|
from functools import reduce
|
2020-08-13 14:20:11 +00:00
|
|
|
|
2020-12-03 04:00:06 +00:00
|
|
|
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
2020-12-18 06:54:06 +00:00
|
|
|
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
|
2021-06-27 02:06:13 +00:00
|
|
|
r_yt_cfg = re.compile(r"""ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;""")
|
2020-08-13 14:20:11 +00:00
|
|
|
|
|
|
|
def extract_yt_initial_data(content):
|
|
|
|
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
|
|
|
if m_yt_initial_data:
|
|
|
|
yt_initial_data = json.loads(m_yt_initial_data.group(1))
|
|
|
|
return yt_initial_data
|
|
|
|
else:
|
|
|
|
raise Exception("Could not match ytInitialData in content")
|
2020-12-18 06:54:06 +00:00
|
|
|
|
|
|
|
def extract_yt_initial_player_response(content):
|
|
|
|
m_yt_initial_player_response = re.search(r_yt_initial_player_response, content)
|
|
|
|
if m_yt_initial_player_response:
|
|
|
|
yt_initial_player_response = json.loads(m_yt_initial_player_response.group(1))
|
|
|
|
return yt_initial_player_response
|
|
|
|
else:
|
|
|
|
raise Exception("Could not match ytInitialPlayerResponse in content")
|
2021-05-14 16:49:25 +00:00
|
|
|
|
2021-06-27 02:06:13 +00:00
|
|
|
def extract_yt_cfg(content):
|
|
|
|
m_yt_cfg = re.search(r_yt_cfg, content)
|
|
|
|
if m_yt_cfg:
|
|
|
|
return json.loads(m_yt_cfg.group(1))
|
|
|
|
raise Exception("Could not match ytcfg in content")
|
|
|
|
|
2021-05-14 16:49:25 +00:00
|
|
|
def eu_consent_cookie():
|
2023-08-25 17:19:58 +00:00
|
|
|
return {"SOCS": "CAI"}
|
2021-11-03 13:01:52 +00:00
|
|
|
|
|
|
|
def is_in(o, key):
|
|
|
|
if isinstance(o, list):
|
|
|
|
return type(key) == int and key >= 0 and key < len(o)
|
|
|
|
else:
|
|
|
|
return key in o
|
|
|
|
|
|
|
|
def deep_get(o, properties):
|
|
|
|
return reduce(lambda a, b: a and is_in(a, b) and a[b] or None, [o, *properties])
|