1
0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2024-09-21 19:17:29 +00:00
NewLeaf/tools/extractors.py
Lomanic 3f57d50893
Retrieve the first 20 comments of a video on /api/v1/comments/:videoid
Got some inspiration from https://github.com/nlitsme/youtube_tool (for the x-youtube-client-X
headers).
This is not a complete reimplementation of Invidious API as continuation is not implemented
(to retrieve more than the first 20 comments and comments replies), likes and replies count
are also missing.
2021-07-02 00:53:05 +12:00

33 lines
1.3 KiB
Python

import re
import json
import random
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
r_yt_cfg = re.compile(r"""ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;""")
def extract_yt_initial_data(content):
m_yt_initial_data = re.search(r_yt_initial_data, content)
if m_yt_initial_data:
yt_initial_data = json.loads(m_yt_initial_data.group(1))
return yt_initial_data
else:
raise Exception("Could not match ytInitialData in content")
def extract_yt_initial_player_response(content):
m_yt_initial_player_response = re.search(r_yt_initial_player_response, content)
if m_yt_initial_player_response:
yt_initial_player_response = json.loads(m_yt_initial_player_response.group(1))
return yt_initial_player_response
else:
raise Exception("Could not match ytInitialPlayerResponse in content")
def extract_yt_cfg(content):
m_yt_cfg = re.search(r_yt_cfg, content)
if m_yt_cfg:
return json.loads(m_yt_cfg.group(1))
raise Exception("Could not match ytcfg in content")
def eu_consent_cookie():
return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}