mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2026-03-07 13:01:37 +00:00
Retrieve the first 20 comments of a video on /api/v1/comments/:videoid
Got some inspiration from https://github.com/nlitsme/youtube_tool (for the x-youtube-client-X headers). This is not a complete reimplementation of Invidious API as continuation is not implemented (to retrieve more than the first 20 comments and comments replies), likes and replies count are also missing.
This commit is contained in:
parent
1ea86101fd
commit
3f57d50893
3 changed files with 62 additions and 1 deletions
47
extractors/comments.py
Normal file
47
extractors/comments.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import json
|
||||
import requests
|
||||
import urllib.parse
|
||||
from tools.converters import *
|
||||
from tools.extractors import extract_yt_initial_data, extract_yt_cfg, eu_consent_cookie
|
||||
|
||||
def extract_comments(id, **kwargs):
|
||||
s = requests.session()
|
||||
s.headers.update({"accept-language": "en-US,en;q=0.9"})
|
||||
s.cookies.set("CONSENT", eu_consent_cookie().get("CONSENT"))
|
||||
with s.get("https://www.youtube.com/watch?v={}".format(id)) as r:
|
||||
r.raise_for_status()
|
||||
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
||||
item = yt_initial_data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][2]["itemSectionRenderer"]
|
||||
continuation = item["continuations"][0]["nextContinuationData"]["continuation"]
|
||||
itct = item["continuations"][0]["nextContinuationData"]["clickTrackingParams"]
|
||||
xsrf_token = extract_yt_cfg(r.content.decode("utf8")).get("XSRF_TOKEN", None)
|
||||
if not xsrf_token:
|
||||
cherrypy.response.status = 500
|
||||
return {
|
||||
"error": "NewLeaf was unable to obtain XSRF_TOKEN from ytcfg.",
|
||||
"identifier": "XSRF_TOKEN_NOT_FOUND"
|
||||
}
|
||||
url = "https://www.youtube.com/comment_service_ajax?action_get_comments=1&pbj=1&ctoken={}&continuation={}&type=next&itct={}".format(continuation, continuation, urllib.parse.quote_plus(itct))
|
||||
with s.post(url, headers={"x-youtube-client-name": "1", "x-youtube-client-version": "2.20210422.04.00"}, data={"session_token": xsrf_token}) as rr:
|
||||
data = json.loads(rr.content.decode("utf8"))
|
||||
return {
|
||||
"videoId": id,
|
||||
"comments": [
|
||||
{
|
||||
"author": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorText"]["simpleText"],
|
||||
"authorThumbnails": [x for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorThumbnail"]["thumbnails"]],
|
||||
"authorId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["browseId"],
|
||||
"authorUrl": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"],
|
||||
"isEdited": " (edited)" in "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
|
||||
"content": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]]),
|
||||
"contentHtml": escape_html_textcontent("".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]])),
|
||||
"publishedText": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
|
||||
# "likeCount": int(c["commentThreadRenderer"]["comment"]["commentRenderer"]["voteCount"]["simpleText"].replace(",", ""))
|
||||
"commentId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["commentId"],
|
||||
"authorIsChannelOwner": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorIsChannelOwner"],
|
||||
# "replies": {
|
||||
# "replyCount": c["commentThreadRenderer"]["comment"]["commentRenderer"]["replyCount"]
|
||||
# }
|
||||
} for c in data["response"]["continuationContents"]["itemSectionContinuation"]["contents"]
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue