From 6e136dc77afd9d27b40b0308b8ec178da4fc7636 Mon Sep 17 00:00:00 2001 From: Cadence Fish Date: Mon, 3 Feb 2020 03:53:37 +1300 Subject: [PATCH] Add preferRSS setting --- src/lib/cache.js | 57 ++++++++++++++++++++++++++++- src/lib/collectors.js | 27 ++++++++------ src/lib/constants.js | 2 +- src/lib/structures/TimelineEntry.js | 4 +- src/site/api/feed.js | 8 ++-- src/site/api/routes.js | 10 ++--- src/site/repl.js | 2 +- 7 files changed, 84 insertions(+), 26 deletions(-) diff --git a/src/lib/cache.js b/src/lib/cache.js index 3f9f149..cf5a26c 100644 --- a/src/lib/cache.js +++ b/src/lib/cache.js @@ -85,6 +85,10 @@ class TtlCache { } } +/** + * @extends TtlCache + * @template T + */ class RequestCache extends TtlCache { /** * @param {number} ttl time to keep each resource in milliseconds @@ -97,7 +101,6 @@ class RequestCache extends TtlCache { * @param {string} key * @param {() => Promise} callback * @returns {Promise} - * @template T */ getOrFetch(key, callback) { this.cleanKey(key) @@ -116,7 +119,6 @@ class RequestCache extends TtlCache { * @param {string} key * @param {() => Promise} callback * @returns {Promise} - * @template T */ getOrFetchPromise(key, callback) { return this.getOrFetch(key, callback).then(result => { @@ -126,5 +128,56 @@ class RequestCache extends TtlCache { } } +/** + * @template T + */ +class UserRequestCache extends TtlCache { + constructor(ttl) { + super(ttl) + /** @type {Map} */ + this.cache + } + + /** + * @param {string} key + * @param {boolean} isReel + * @param {any} [data] + */ + set(key, isReel, data) { + const existing = this.cache.get(key) + // Preserve html failure status if now requesting as reel + const htmlFailed = isReel && existing && existing.htmlFailed + this.cache.set(key, {data, isReel, isFailedPromise: false, htmlFailed, time: Date.now()}) + } + + /** + * @param {string} key + * @param {boolean} isHtmlPreferred + * @param {boolean} willFetchReel + * @param {() => Promise} callback + * @returns {Promise} + */ + getOrFetch(key, willFetchReel, isHtmlPreferred, callback) { + this.cleanKey(key) + if (this.cache.has(key)) { + const existing = this.cache.get(key) + if ((!existing.isReel || !isHtmlPreferred || existing.htmlFailed) && !existing.isFailedPromise) return Promise.resolve(existing.data) + } + const pending = callback().then(result => { + if (this.getWithoutClean(key) === pending) { // if nothing has replaced the current cache in the meantime + this.set(key, willFetchReel, result) + } + return result + }).catch(error => { + this.cache.get(key).htmlFailed = true + this.cache.get(key).isFailedPromise = true + throw error + }) + this.set(key, willFetchReel, pending) + return pending + } +} + module.exports.TtlCache = TtlCache module.exports.RequestCache = RequestCache +module.exports.UserRequestCache = UserRequestCache diff --git a/src/lib/collectors.js b/src/lib/collectors.js index c791313..67e74e1 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -2,20 +2,26 @@ const constants = require("./constants") const {request} = require("./utils/request") const switcher = require("./utils/torswitcher") const {extractSharedData} = require("./utils/body") -const {TtlCache, RequestCache} = require("./cache") +const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const RequestHistory = require("./structures/RequestHistory") const db = require("./db") -require("./testimports")(constants, request, extractSharedData, RequestCache, RequestHistory) +require("./testimports")(constants, request, extractSharedData, UserRequestCache, RequestHistory) const requestCache = new RequestCache(constants.caching.resource_cache_time) +const userRequestCache = new UserRequestCache(constants.caching.resource_cache_time) /** @type {import("./cache").TtlCache} */ const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time) const history = new RequestHistory(["user", "timeline", "post", "reel"]) -async function fetchUser(username) { - if (constants.allow_user_from_reel === "never") { +async function fetchUser(username, isRSS) { + let mode = constants.allow_user_from_reel + if (mode === "preferForRSS") { + if (isRSS) mode = "prefer" + else mode = "fallback" + } + if (mode === "never") { return fetchUserFromHTML(username) - } else if (constants.allow_user_from_reel === "prefer") { + } else if (mode === "prefer") { const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username) if (userID) return fetchUserFromCombined(userID, username) else return fetchUserFromHTML(username) @@ -24,7 +30,6 @@ async function fetchUser(username) { if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) { const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username) if (userID) { - requestCache.cache.delete("user/"+username) return fetchUserFromCombined(userID, username) } } @@ -34,7 +39,7 @@ async function fetchUser(username) { } function fetchUserFromHTML(username) { - return requestCache.getOrFetch("user/"+username, () => { + return userRequestCache.getOrFetch("user/"+username, false, true, () => { return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => { if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN if (res.status === 429) throw constants.symbols.RATE_LIMITED @@ -74,7 +79,7 @@ function fetchUserFromCombined(userID, username) { user_id: userID, include_reel: true })) - return requestCache.getOrFetch("user/"+username, () => { + return userRequestCache.getOrFetch("user/"+username, true, false, () => { return switcher.request("reel_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { if (res.status === 429) throw constants.symbols.RATE_LIMITED return res @@ -192,8 +197,8 @@ function fetchShortcodeData(shortcode) { .run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)}) } // if we have the owner but only a reelUser, update it. this code is gross. - if (requestCache.hasNotPromise("user/"+data.owner.username)) { - const user = requestCache.getWithoutClean("user/"+data.owner.username) + if (userRequestCache.hasNotPromise("user/"+data.owner.username)) { + const user = userRequestCache.getWithoutClean("user/"+data.owner.username) if (user.fromReel) { user.data.full_name = data.owner.full_name user.data.is_verified = data.owner.is_verified @@ -214,7 +219,7 @@ module.exports.fetchUser = fetchUser module.exports.fetchTimelinePage = fetchTimelinePage module.exports.getOrCreateShortcode = getOrCreateShortcode module.exports.fetchShortcodeData = fetchShortcodeData -module.exports.requestCache = requestCache +module.exports.userRequestCache = userRequestCache module.exports.timelineEntryCache = timelineEntryCache module.exports.getOrFetchShortcode = getOrFetchShortcode module.exports.history = history diff --git a/src/lib/constants.js b/src/lib/constants.js index 3c04bde..b02e46a 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -20,7 +20,7 @@ let constants = { } }, - allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer". + allow_user_from_reel: "preferForRSS", // one of: "never", "fallback", "prefer", "preferForRSS" settings: { rss_enabled: true diff --git a/src/lib/structures/TimelineEntry.js b/src/lib/structures/TimelineEntry.js index 847ea88..6839e54 100644 --- a/src/lib/structures/TimelineEntry.js +++ b/src/lib/structures/TimelineEntry.js @@ -180,9 +180,9 @@ class TimelineEntry extends TimelineBaseMethods { } // The owner may be in the user cache, so copy from that. // This could be implemented better. - else if (collectors.requestCache.hasNotPromise("user/"+this.data.owner.username)) { + else if (collectors.userRequestCache.hasNotPromise("user/"+this.data.owner.username)) { /** @type {import("./User")} */ - const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username) + const user = collectors.userRequestCache.getWithoutClean("user/"+this.data.owner.username) if (user.data.full_name) { this.data.owner = { id: user.data.id, diff --git a/src/site/api/feed.js b/src/site/api/feed.js index 3dc4472..76a5d2b 100644 --- a/src/site/api/feed.js +++ b/src/site/api/feed.js @@ -1,12 +1,12 @@ const constants = require("../../lib/constants") -const {fetchUser, requestCache} = require("../../lib/collectors") +const {fetchUser, userRequestCache} = require("../../lib/collectors") const {render} = require("pinski/plugins") const {pugCache} = require("../passthrough") module.exports = [ {route: `/u/(${constants.external.username_regex})/rss.xml`, methods: ["GET"], code: ({fill}) => { if (constants.settings.rss_enabled) { - return fetchUser(fill[0]).then(async user => { + return fetchUser(fill[0], true).then(async user => { const content = await user.timeline.fetchFeed() const xml = content.xml() return { @@ -27,10 +27,10 @@ module.exports = [ statusCode: 503, contentType: "text/html", headers: { - "Retry-After": requestCache.getTtl("user/"+fill[0], 1000) + "Retry-After": userRequestCache.getTtl("user/"+fill[0], 1000) }, content: pugCache.get("pug/blocked.pug").web({ - expiresMinutes: requestCache.getTtl("user/"+fill[0], 1000*60) + expiresMinutes: userRequestCache.getTtl("user/"+fill[0], 1000*60) }) } } else { diff --git a/src/site/api/routes.js b/src/site/api/routes.js index e0f1fca..f480f66 100644 --- a/src/site/api/routes.js +++ b/src/site/api/routes.js @@ -1,5 +1,5 @@ const constants = require("../../lib/constants") -const {fetchUser, getOrFetchShortcode, requestCache, history} = require("../../lib/collectors") +const {fetchUser, getOrFetchShortcode, userRequestCache, history} = require("../../lib/collectors") const {render, redirect} = require("pinski/plugins") const {pugCache} = require("../passthrough") @@ -34,7 +34,7 @@ module.exports = [ { route: `/u/(${constants.external.username_regex})`, methods: ["GET"], code: ({url, fill}) => { const params = url.searchParams - return fetchUser(fill[0]).then(async user => { + return fetchUser(fill[0], false).then(async user => { const page = +params.get("page") if (typeof page === "number" && !isNaN(page) && page >= 1) { await user.timeline.fetchUpToPage(page - 1) @@ -53,10 +53,10 @@ module.exports = [ statusCode: 503, contentType: "text/html", headers: { - "Retry-After": requestCache.getTtl("user/"+fill[0], 1000) + "Retry-After": userRequestCache.getTtl("user/"+fill[0], 1000) }, content: pugCache.get("pug/blocked.pug").web({ - expiresMinutes: requestCache.getTtl("user/"+fill[0], 1000*60) + expiresMinutes: userRequestCache.getTtl("user/"+fill[0], 1000*60) }) } } else { @@ -67,7 +67,7 @@ module.exports = [ }, { route: `/fragment/user/(${constants.external.username_regex})/(\\d+)`, methods: ["GET"], code: async ({url, fill}) => { - return fetchUser(fill[0]).then(async user => { + return fetchUser(fill[0], false).then(async user => { const pageNumber = +fill[1] const pageIndex = pageNumber - 1 await user.timeline.fetchUpToPage(pageIndex) diff --git a/src/site/repl.js b/src/site/repl.js index fa8d747..4e249fe 100644 --- a/src/site/repl.js +++ b/src/site/repl.js @@ -1,5 +1,5 @@ const {instance, pugCache, wss} = require("./passthrough") -const {requestCache, timelineEntryCache, history} = require("../lib/collectors") +const {userRequestCache, timelineEntryCache, history} = require("../lib/collectors") const constants = require("../lib/constants") const util = require("util") const repl = require("repl")