1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2025-01-05 03:26:58 +00:00

Support loading shortcodes of a single image

This commit is contained in:
Cadence Ember 2021-11-05 17:01:46 +13:00
parent d660c84941
commit 91022aa5da
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17
7 changed files with 136 additions and 22 deletions

21
package-lock.json generated
View File

@ -1107,6 +1107,11 @@
"picomatch": "^2.0.4"
}
},
"apollojs": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/apollojs/-/apollojs-1.3.0.tgz",
"integrity": "sha1-X3sAME2XQOKnvltSx8CAfVH5JV4="
},
"append-transform": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/append-transform/-/append-transform-2.0.0.tgz",
@ -1948,6 +1953,22 @@
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
"dev": true
},
"fast-html-parser": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/fast-html-parser/-/fast-html-parser-1.0.1.tgz",
"integrity": "sha1-TsyWg7i7ea/hGlCAe3hT55JWzqI=",
"requires": {
"apollojs": "^1.3.0",
"entities": "^1.1.1"
},
"dependencies": {
"entities": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
"integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
}
}
},
"fast-json-stable-stringify": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",

View File

@ -18,6 +18,7 @@
"dependencies": {
"better-sqlite3": "^7.4.4",
"cookie": "^0.4.1",
"fast-html-parser": "^1.0.1",
"feed": "git+https://git.sr.ht/~cadence/nodejs-feed#3dde82f8296d7a6f5659323e497e0c684f03ab71",
"get-stream": "^6.0.1",
"gm": "^1.23.1",

View File

@ -4,6 +4,7 @@ const switcher = require("./utils/torswitcher")
const {extractSharedData} = require("./utils/body")
const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory")
const fhp = require("fast-html-parser")
const db = require("./db")
require("./testimports")(constants, request, extractSharedData, UserRequestCache, RequestHistory, db)
@ -398,6 +399,7 @@ async function getOrFetchShortcode(shortcode) {
const {result, fromCache} = await fetchShortcodeData(shortcode)
const entry = getOrCreateShortcode(shortcode)
entry.applyN3(result)
entry.fullyUpdated = true // we already called fetchShortcodeData, which fetches the greatest amount of data possible. it's no use trying to fetch that again with .update().
return {post: entry, fromCache}
}
}
@ -412,11 +414,92 @@ function fetchShortcodeData(shortcode) {
return switcher.request("post_graphql", `https://www.instagram.com/p/${shortcode}/embed/captioned/`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
}).then(res => res.text()).then(text => {
const textData = text.match(/window\.__additionalDataLoaded\('extra',(.*)\);<\/script>/)[1]
let data = JSON.parse(textData)
let data = null
const match = text.match(/window\.__additionalDataLoaded\('extra',(.*)\);<\/script>/)
if (match) {
const textData = match[1]
data = JSON.parse(textData)
}
if (data == null) {
// the thing doesn't exist
throw constants.symbols.NOT_FOUND
// we have to actually parse the HTML to get the data
const root = fhp.parse(text)
// Check if post really exists
if (root.querySelector(".EmbedIsBroken")) {
throw constants.symbols.NOT_FOUND
}
// find embed
const e_embed = root.querySelector(".Embed")
// find avatar
const e_avatar = root.querySelector(".Avatar")
const e_avatarImage = e_avatar.querySelector("img")
// find username
const e_usernameText = root.querySelector(".UsernameText")
const e_viewProfile = root.querySelector(".ViewProfileButton")
// find verified
const e_verified = root.querySelector(".VerifiedSprite")
// find media
const e_media = root.querySelector(".EmbeddedMediaImage")
// find caption
const e_caption = root.querySelector(".Caption")
// extract owner
const owner = {
id: e_embed.attributes["data-owner-id"],
is_verified: !!e_verified,
profile_pic_url: e_avatarImage.attributes.src,
username: e_viewProfile.attributes.href.replace(new RegExp(`^https:\/\/www\.instagram\.com\/(${constants.external.username_regex}).*$`, "s"), "$1")
}
// extract media type
let mediaType = e_embed.attributes["data-media-type"]
const videoData = {}
if (mediaType === "GraphVideo") {
Object.assign(videoData, {
video_url: null,
video_view_count: null
})
} else {
mediaType = "GraphImage"
}
// extract display resources
const display_resources = e_media.attributes.srcset.split(",").map(source => {
source = source.trim()
const [url, widthString] = source.split(" ")
const width = +widthString.match(/\d+/)[0]
return {
src: url,
config_width: width,
config_height: width // best guess!
}
})
// extract caption text
const captionText = e_caption.childNodes.slice(4, -3).map(node => { // slice removes unneeded starting and ending whitespace and user handles
if (node.tagName === "br") {
return "\n"
} else {
return node.text
}
}).join("")
return {
__typename: mediaType,
id: e_embed.attributes["data-media-id"],
display_url: e_media.attributes.src,
display_resources,
is_video: mediaType === "GraphVideo",
shortcode,
accessibility_caption: e_media.attributes.alt,
...videoData,
owner,
edge_media_to_caption: {
edges: [
{
node: {
text: captionText
}
}
]
}
}
} else {
data = data.shortcode_media
history.report("post", true)

View File

@ -5,10 +5,10 @@ const {compile} = require("pug")
require("../testimports")(collectors)
const rssImageTemplate = compile(`
img(src=constants.website_origin+entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions.width height=entry.data.dimensions.height)
img(src=constants.website_origin+entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height)
`)
const rssVideoTemplate = compile(`
video(src=constants.website_origin+entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions.width height=entry.data.dimensions.height)
video(src=constants.website_origin+entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height)
`)
class TimelineChild extends TimelineBaseMethods {

View File

@ -19,6 +19,7 @@ each child in children
class TimelineEntry extends TimelineBaseMethods {
constructor() {
super()
this.fullyUpdated = false
/** @type {import("../types").TimelineEntryAll} some properties may not be available yet! */
// @ts-ignore
this.data = {}
@ -38,12 +39,16 @@ class TimelineEntry extends TimelineBaseMethods {
}
async update() {
return collectors.fetchShortcodeData(this.data.shortcode).then(data => {
this.applyN3(data.result)
}).catch(error => {
console.error("TimelineEntry could not self-update; trying to continue anyway...")
console.error("E:", error)
})
if (!this.fullyUpdated) {
return collectors.fetchShortcodeData(this.data.shortcode).then(data => {
this.applyN3(data.result)
}).catch(error => {
console.error("TimelineEntry could not self-update; trying to continue anyway...")
console.error("E:", error)
}).finally(() => {
this.fullyUpdated = true
})
}
}
/**
@ -88,6 +93,7 @@ class TimelineEntry extends TimelineBaseMethods {
* All mutations should act exactly once and have no effect on already mutated data.
*/
fixData() {
this.hasDate = !!this.data.taken_at_timestamp
this.date = new Date(this.data.taken_at_timestamp*1000)
}
@ -237,7 +243,7 @@ class TimelineEntry extends TimelineBaseMethods {
let fromCache = true
const clone = await (async () => {
// Do we just already have the extended owner?
if (this.data.owner.full_name) { // this property is on extended owner and not basic owner
if (this.data.owner.profile_pic_url) { // this property is on extended owner and not basic owner
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
@ -246,7 +252,7 @@ class TimelineEntry extends TimelineBaseMethods {
else if (collectors.userRequestCache.getByID(this.data.owner.id)) {
/** @type {import("./User")} */
const user = collectors.userRequestCache.getByID(this.data.owner.id)
if (user.data.full_name !== undefined) {
if (user.data.profile_pic_url !== undefined) {
this.data.owner = {
id: user.data.id,
username: user.data.username,

View File

@ -38,15 +38,16 @@ mixin post(post, headerWithNavigation)
- let caption = post.children[0].data.accessibility_caption
if caption
p.description= caption
p.description
span!= ll.pug_post_timestamp({post})
if post.hasDate
p.description
span!= ll.pug_post_timestamp({post})
section.images-gallery
for entry in post.children
if entry.isVideo()
video(src=entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions.width height=entry.data.dimensions.height).sized-video
video(src=entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height).sized-video
else
img(src=entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions.width height=entry.data.dimensions.height).sized-image
img(src=entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height).sized-image
if willDisplayAltInGallery
- let caption = entry.data.accessibility_caption
if caption

View File

@ -33,13 +33,15 @@ html
if firstEntry.isVideo()
meta(property="og:video" content=`${website_origin}${firstEntry.getVideoUrlP()}`)
meta(property="og:video:type" content="video/mp4")
meta(property="og:video:width" content=firstEntry.data.dimensions.width)
meta(property="og:video:height" content=firstEntry.data.dimensions.height)
if firstEntry.data.dimensions
meta(property="og:video:width" content=firstEntry.data.dimensions.width)
meta(property="og:video:height" content=firstEntry.data.dimensions.height)
meta(property="og:video:alt" content=firstEntry.getAlt())
else
meta(property="og:image" content=`${website_origin}${firstEntry.getDisplayUrlP()}`)
meta(property="og:image:width" content=firstEntry.data.dimensions.width)
meta(property="og:image:height" content=firstEntry.data.dimensions.height)
if firstEntry.data.dimensions
meta(property="og:image:width" content=firstEntry.data.dimensions.width)
meta(property="og:image:height" content=firstEntry.data.dimensions.height)
meta(property="og:image:type" content="image/jpeg")
meta(property="og:image:alt" content=firstEntry.getAlt())
meta(property="og:site_name" content="Bibliogram")