From 44c8e96a945ec70eecdc88b2a4735dd7b3b4d43f Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 29 Jul 2020 21:51:41 +1200 Subject: [PATCH] Cache enhancements: - Use quota for /p/ requests - Correctly detect owner.full_name to save unneeded requests out - Unify quota reached pages - Unify post presentation into function that fetches prerequisites - Add getByID method to userRequestCache --- src/lib/cache.js | 11 +++ src/lib/collectors.js | 8 +- src/lib/structures/TimelineEntry.js | 110 ++++++++++++++---------- src/site/api/routes.js | 70 ++++++++------- src/site/html/static/js/post_overlay.js | 7 ++ src/site/html/static/js/quota.js | 5 ++ src/site/pug/quota_reached.pug | 14 +++ 7 files changed, 145 insertions(+), 80 deletions(-) create mode 100644 src/site/pug/quota_reached.pug diff --git a/src/lib/cache.js b/src/lib/cache.js index fbdb280..6d6a8dd 100644 --- a/src/lib/cache.js +++ b/src/lib/cache.js @@ -143,6 +143,8 @@ class UserRequestCache extends TtlCache { super(ttl) /** @type {Map} */ this.cache + /** @type {Map} */ + this.idCache = new Map() } /** @@ -155,6 +157,7 @@ class UserRequestCache extends TtlCache { // Preserve html failure status if now requesting as reel const htmlFailed = isReel && existing && existing.htmlFailed this.cache.set(key, {data, isReel, isFailedPromise: false, htmlFailed, reelFailed: false, time: Date.now()}) + if (data && data.data && data.data.id) this.idCache.set(data.data.id, key) // this if statement is bad } /** @@ -200,6 +203,14 @@ class UserRequestCache extends TtlCache { this.set(key, willFetchReel, pending) return pending } + + getByID(id) { + const key = this.idCache.get(id) + if (key == null) return null + const data = this.getWithoutClean(key) + if (data == null) return null + return data + } } module.exports.TtlCache = TtlCache diff --git a/src/lib/collectors.js b/src/lib/collectors.js index 6e23fd1..0c493a1 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -380,12 +380,12 @@ function getOrCreateShortcode(shortcode) { async function getOrFetchShortcode(shortcode) { if (timelineEntryCache.has(shortcode)) { - return timelineEntryCache.get(shortcode) + return {post: timelineEntryCache.get(shortcode), fromCache: true} } else { - const data = await fetchShortcodeData(shortcode) + const {result, fromCache} = await fetchShortcodeData(shortcode) const entry = getOrCreateShortcode(shortcode) - entry.applyN3(data.result) - return entry + entry.applyN3(result) + return {post: entry, fromCache} } } diff --git a/src/lib/structures/TimelineEntry.js b/src/lib/structures/TimelineEntry.js index 4593fc5..ba49963 100644 --- a/src/lib/structures/TimelineEntry.js +++ b/src/lib/structures/TimelineEntry.js @@ -196,72 +196,88 @@ class TimelineEntry extends TimelineBaseMethods { } async fetchChildren() { - // Cached children? - if (this.children) return this.children - // Not a gallery? Convert self to a child and return. - if (this.getType() !== constants.symbols.TYPE_GALLERY) { - return this.children = [new TimelineChild(this.data)] - } - /** @type {import("../types").Edges|import("../types").Edges} */ - // @ts-ignore - const children = this.data.edge_sidecar_to_children - // It's a gallery, so we may need to fetch its children - // We need to fetch children if one of them is a video, because N1 has no video_url. - if (!children || !children.edges.length || children.edges.some(edge => edge.node.is_video && !edge.node.video_url)) { - await this.update() - } - // Create children - return this.children = this.data.edge_sidecar_to_children.edges.map(e => new TimelineChild(e.node)) + let fromCache = true + await (async () => { + // Cached children? + if (this.children) return + // Not a gallery? Convert self to a child and return. + if (this.getType() !== constants.symbols.TYPE_GALLERY) { + this.children = [new TimelineChild(this.data)] + return + } + /** @type {import("../types").Edges|import("../types").Edges} */ + // @ts-ignore + const children = this.data.edge_sidecar_to_children + // It's a gallery, so we may need to fetch its children + // We need to fetch children if one of them is a video, because N1 has no video_url. + if (!children || !children.edges.length || children.edges.some(edge => edge.node.is_video && !edge.node.video_url)) { + fromCache = false + await this.update() + } + // Create children + this.children = this.data.edge_sidecar_to_children.edges.map(e => new TimelineChild(e.node)) + })() + return {fromCache, children: this.children} } /** * Returns a proxied profile pic URL (P) - * @returns {Promise} + * @returns {Promise<{owner: import("../types").ExtendedOwner, fromCache: boolean}>} */ async fetchExtendedOwnerP() { - // Do we just already have the extended owner? - if (this.data.owner.full_name) { // this property is on extended owner and not basic owner - const clone = proxyExtendedOwner(this.data.owner) - this.ownerPfpCacheP = clone.profile_pic_url - return clone - } - // The owner may be in the user cache, so copy from that. - // This could be implemented better. - else if (collectors.userRequestCache.hasNotPromise("user/"+this.data.owner.username)) { - /** @type {import("./User")} */ - const user = collectors.userRequestCache.getWithoutClean("user/"+this.data.owner.username) - if (user.data.full_name) { - this.data.owner = { - id: user.data.id, - username: user.data.username, - is_verified: user.data.is_verified, - full_name: user.data.full_name, - profile_pic_url: user.data.profile_pic_url // _hd is also available here. - } + let fromCache = true + const clone = await (async () => { + // Do we just already have the extended owner? + if (this.data.owner.full_name) { // this property is on extended owner and not basic owner const clone = proxyExtendedOwner(this.data.owner) this.ownerPfpCacheP = clone.profile_pic_url return clone } - // That didn't work, so just fall through... - } - // We'll have to re-request ourselves. - await this.update() - const clone = proxyExtendedOwner(this.data.owner) - this.ownerPfpCacheP = clone.profile_pic_url - return clone + // The owner may be in the user cache, so copy from that. + else if (collectors.userRequestCache.getByID(this.data.owner.id)) { + /** @type {import("./User")} */ + const user = collectors.userRequestCache.getByID(this.data.owner.id) + if (user.data.full_name !== undefined) { + this.data.owner = { + id: user.data.id, + username: user.data.username, + is_verified: user.data.is_verified, + full_name: user.data.full_name, + profile_pic_url: user.data.profile_pic_url // _hd is also available here. + } + const clone = proxyExtendedOwner(this.data.owner) + this.ownerPfpCacheP = clone.profile_pic_url + return clone + } + // That didn't work, so just fall through... + } + // We'll have to re-request ourselves. + fromCache = false + await this.update() + const clone = proxyExtendedOwner(this.data.owner) + this.ownerPfpCacheP = clone.profile_pic_url + return clone + })() + return {owner: clone, fromCache} } fetchVideoURL() { - if (!this.isVideo()) return Promise.resolve(null) - else if (this.data.video_url) return Promise.resolve(this.getVideoUrlP()) - else return this.update().then(() => this.getVideoUrlP()) + if (!this.isVideo()) { + return Promise.resolve({fromCache: true, videoURL: null}) + } else if (this.data.video_url) { + return Promise.resolve({fromCache: true, videoURL: this.getVideoUrlP()}) + } else { + return this.update().then(() => { + return {fromCache: false, videoURL: this.getVideoUrlP()} + }) + } } /** * @returns {Promise} */ async fetchFeedData() { - const children = await this.fetchChildren() + const {children} = await this.fetchChildren() return { title: this.getCaptionIntroduction() || `New post from @${this.getBasicOwner().username}`, description: rssDescriptionTemplate({ diff --git a/src/site/api/routes.js b/src/site/api/routes.js index f512b2b..bc48b86 100644 --- a/src/site/api/routes.js +++ b/src/site/api/routes.js @@ -13,6 +13,24 @@ function getPageTitle(post) { return (post.getCaptionIntroduction() || `Post from @${post.getBasicOwner().username}`) + " | Bibliogram" } +function getPostAndQuota(req, shortcode) { + if (quota.remaining(req) === 0) { + throw constants.symbols.QUOTA_REACHED + } + + return getOrFetchShortcode(shortcode).then(async ({post, fromCache: fromCache1}) => { + const {fromCache: fromCache2} = await post.fetchChildren() + const {fromCache: fromCache3} = await post.fetchExtendedOwnerP() // serial await is okay since intermediate fetch result is cached + const {fromCache: fromCache4} = await post.fetchVideoURL() // if post is not a video, function will just return, so this is fine + + // I'd _love_ to be able to put these in an array, but I can't destructure directly into one, so this is easier. + const quotaUsed = (fromCache1 && fromCache2 && fromCache3 && fromCache4) ? 0 : 1 // if any of them is false then one request was needed to get the post. + const remaining = quota.add(req, quotaUsed) + + return {post, remaining} + }) +} + module.exports = [ { route: "/", methods: ["GET"], code: async ({req}) => { @@ -141,16 +159,7 @@ module.exports = [ } else if (error === constants.symbols.extractor_results.AGE_RESTRICTED) { return render(403, "pug/age_gated.pug", {settings}) } else if (error === constants.symbols.QUOTA_REACHED) { - return render(429, "pug/friendlyerror.pug", { - title: "Quota reached", - statusCode: 429, - message: "Quota reached", - explanation: - "Each person has a limited number of requests to Bibliogram." - +"\nYou have reached that limit." - +"\nWait a while to for your counter to reset.\n", - withInstancesLink: true - }) + return render(429, "pug/quota_reached.pug") } else { throw error } @@ -212,7 +221,7 @@ module.exports = [ } else if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) { return render(503, "pug/fragments/timeline_loading_blocked.pug") } else if (error === constants.symbols.QUOTA_REACHED) { - return render(429, "pug/fragments/timeline_quota_reached.pug") + return render(429, "pug/fragments/quota_reached.pug") } else { throw error } @@ -220,25 +229,26 @@ module.exports = [ } }, { - route: `/fragment/post/(${constants.external.shortcode_regex})`, methods: ["GET"], code: ({req, fill}) => { + route: `/fragment/post/(${constants.external.shortcode_regex})`, methods: ["GET"], code: async ({req, fill}) => { const shortcode = fill[0] - return getOrFetchShortcode(shortcode).then(async post => { - await post.fetchChildren() - await post.fetchExtendedOwnerP() // serial await is okay since intermediate fetch result is cached - if (post.isVideo()) await post.fetchVideoURL() - const settings = getSettings(req) + const settings = getSettings(req) + + try { + const {post, remaining} = await getPostAndQuota(req, shortcode) return { statusCode: 200, contentType: "application/json", content: { title: getPageTitle(post), - html: pugCache.get("pug/fragments/post.pug").web({lang, post, settings, getStaticURL}) + html: pugCache.get("pug/fragments/post.pug").web({lang, post, settings, getStaticURL}), + quota: remaining } } - }).catch(error => { - if (error === constants.symbols.NOT_FOUND || constants.symbols.RATE_LIMITED) { + } catch (error) { + if (error === constants.symbols.NOT_FOUND || constants.symbols.RATE_LIMITED || error === constants.symbols.QUOTA_REACHED) { + const statusCode = error === constants.symbols.QUOTA_REACHED ? 429 : 503 return { - statusCode: 503, + statusCode, contentType: "application/json", content: { redirectTo: `/p/${shortcode}` @@ -247,7 +257,7 @@ module.exports = [ } else { throw error } - }) + } } }, { @@ -268,19 +278,19 @@ module.exports = [ } }, { - route: `/p/(${constants.external.shortcode_regex})`, methods: ["GET"], code: ({req, fill}) => { + route: `/p/(${constants.external.shortcode_regex})`, methods: ["GET"], code: async ({req, fill}) => { + const shortcode = fill[0] const settings = getSettings(req) - return getOrFetchShortcode(fill[0]).then(async post => { - await post.fetchChildren() - await post.fetchExtendedOwnerP() // serial await is okay since intermediate fetch result is cached - if (post.isVideo()) await post.fetchVideoURL() + + try { + const {post} = await getPostAndQuota(req, shortcode) return render(200, "pug/post.pug", { title: getPageTitle(post), post, website_origin: constants.website_origin, settings }) - }).catch(error => { + } catch (error) { if (error === constants.symbols.NOT_FOUND) { return render(404, "pug/friendlyerror.pug", { statusCode: 404, @@ -291,10 +301,12 @@ module.exports = [ }) } else if (error === constants.symbols.RATE_LIMITED) { return render(503, "pug/blocked_graphql.pug") + } else if (error === constants.symbols.QUOTA_REACHED) { + return render(429, "pug/quota_reached.pug") } else { throw error } - }) + } } } ] diff --git a/src/site/html/static/js/post_overlay.js b/src/site/html/static/js/post_overlay.js index e5182e2..380336f 100644 --- a/src/site/html/static/js/post_overlay.js +++ b/src/site/html/static/js/post_overlay.js @@ -1,5 +1,6 @@ import {q, ElemJS} from "./elemjs/elemjs.js" import {timeline} from "./post_series.js" +import {quota} from "./quota.js" /** @type {PostOverlay[]} */ const postOverlays = [] @@ -139,6 +140,12 @@ function loadPostOverlay(shortcode, stateChangeType) { window.location.assign(root.redirectTo) return } + + if (root.quota) { + quota.set(root.quota) + delete root.quota // don't apply the old quota next time the post is opened + } + shortcodeDataMap.set(shortcode, root) if (overlay.available) { const {title, html} = root diff --git a/src/site/html/static/js/quota.js b/src/site/html/static/js/quota.js index 68edd1b..8fe30f6 100644 --- a/src/site/html/static/js/quota.js +++ b/src/site/html/static/js/quota.js @@ -6,6 +6,11 @@ class Quota extends ElemJS { this.value = +this.element.textContent } + set(value) { + this.value = value + this.text(this.value) + } + change(difference) { this.value += difference this.value = Math.max(0, this.value) diff --git a/src/site/pug/quota_reached.pug b/src/site/pug/quota_reached.pug new file mode 100644 index 0000000..ac12497 --- /dev/null +++ b/src/site/pug/quota_reached.pug @@ -0,0 +1,14 @@ +include includes/error.pug + +doctype html +html + head + title= `Quota reached | Bibliogram` + include includes/head + body.error-page + +error(429, "Quota reached", true) + | Each person has a limited number of requests to Bibliogram. + | You have reached that limit. You cannot load any more data on this instance. + | Your quota will reset automatically after some time has passed. + | + |