diff --git a/src/lib/cache.js b/src/lib/cache.js index 8cdb10c..3f9f149 100644 --- a/src/lib/cache.js +++ b/src/lib/cache.js @@ -34,6 +34,14 @@ class TtlCache { return this.cache.has(key) } + hasNotPromise(key) { + const has = this.has(key) + if (!has) return false + const value = this.get(key) + if (value instanceof Promise || (value.constructor && value.constructor.name === "Promise")) return false + return true + } + /** * @param {string} key */ diff --git a/src/lib/collectors.js b/src/lib/collectors.js index 5f07753..055f6fe 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -12,28 +12,87 @@ const requestCache = new RequestCache(constants.caching.resource_cache_time) const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time) const history = new RequestHistory(["user", "timeline", "post"]) -function fetchUser(username) { - return requestCache.getOrFetch("user/"+username, () => { - return request(`https://www.instagram.com/${username}/`).then(res => { - if (res.status === 302) { - history.report("user", false) - throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN - } else if (res.status === 404) { - throw constants.symbols.NOT_FOUND - } else return res.text().then(text => { - // require down here or have to deal with require loop. require cache will take care of it anyway. - // User -> Timeline -> TimelineImage -> collectors -/> User - const User = require("./structures/User") - const sharedData = extractSharedData(text) - const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) - history.report("user", true) - if (constants.caching.db_user_id) { - db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)") - .run({username: user.data.username, user_id: user.data.id}) - } - return user - }) +async function fetchUser(username) { + if (constants.allow_user_from_reel === "never") { + return fetchUserFromHTML(username) + } else if (constants.allow_user_from_reel === "prefer") { + const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username) + if (userID) return fetchUserFromCombined(userID, username) + else return fetchUserFromHTML(username) + } else { // === "fallback" + return fetchUserFromHTML(username).catch(error => { + if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) { + const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username) + if (userID) return fetchUserFromCombined(userID, username) + } + throw error }) + } +} + +function fetchUserFromHTML(username) { + return requestCache.getOrFetch("user/"+username, () => { + return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => { + if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN + if (res.status === 429) throw constants.symbols.RATE_LIMITED + return res + }).then(res => { + if (res.status === 404) { + throw constants.symbols.NOT_FOUND + } else { + return res.text().then(text => { + // require down here or have to deal with require loop. require cache will take care of it anyway. + // User -> Timeline -> TimelineEntry -> collectors -/> User + const User = require("./structures/User") + const sharedData = extractSharedData(text) + const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) + history.report("user", true) + if (constants.caching.db_user_id) { + db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)") + .run({username: user.data.username, user_id: user.data.id}) + } + return user + }) + } + }).catch(error => { + if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) { + history.report("user", false) + } + throw error + }) + }) +} + +function fetchUserFromCombined(userID, username) { + // Fetch basic user information + const p = new URLSearchParams() + p.set("query_hash", constants.external.reel_query_hash) + p.set("variables", JSON.stringify({ + user_id: userID, + include_reel: true + })) + return requestCache.getOrFetch("user/"+username, () => { + return switcher.request("reel_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { + if (res.status === 429) throw constants.symbols.RATE_LIMITED + return res + }).then(res => res.json()).then(root => { + const result = root.data.user + if (!result) throw constants.symbols.NOT_FOUND + // require down here or have to deal with require loop. require cache will take care of it anyway. + // ReelUser -> Timeline -> TimelineEntry -> collectors -/> User + const ReelUser = require("./structures/ReelUser") + const user = new ReelUser(result.reel.user) + return user + }).catch(error => { + throw error + }) + }).then(async user => { + // Add first timeline page + if (!user.timeline.pages[0]) { + const page = await fetchTimelinePage(userID, "") + user.timeline.addPage(page) + } + return user }) } @@ -50,8 +109,8 @@ function fetchTimelinePage(userID, after) { first: constants.external.timeline_fetch_first, after: after })) - return requestCache.getOrFetchPromise("page/"+after, () => { - return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { + return requestCache.getOrFetchPromise(`page/${userID}/${after}`, () => { + return switcher.request("timeline_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { if (res.status === 429) throw constants.symbols.RATE_LIMITED return res }).then(res => res.json()).then(root => { @@ -77,7 +136,7 @@ function getOrCreateShortcode(shortcode) { return timelineEntryCache.get(shortcode) } else { // require down here or have to deal with require loop. require cache will take care of it anyway. - // TimelineImage -> collectors -/> TimelineImage + // TimelineEntry -> collectors -/> TimelineEntry const TimelineEntry = require("./structures/TimelineEntry") const result = new TimelineEntry() timelineEntryCache.set(shortcode, result) @@ -108,7 +167,7 @@ function fetchShortcodeData(shortcode) { p.set("query_hash", constants.external.shortcode_query_hash) p.set("variables", JSON.stringify({shortcode})) return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => { - return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { + return switcher.request("post_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { if (res.status === 429) throw constants.symbols.RATE_LIMITED return res }).then(res => res.json()).then(root => { @@ -123,6 +182,14 @@ function fetchShortcodeData(shortcode) { db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)") .run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)}) } + // if we have the owner but only a reelUser, update it. this code is gross. + if (requestCache.hasNotPromise("user/"+data.owner.username)) { + const user = requestCache.getWithoutClean("user/"+data.owner.username) + if (user.fromReel) { + user.data.full_name = data.owner.full_name + user.data.is_verified = data.owner.is_verified + } + } return data } }).catch(error => { diff --git a/src/lib/constants.js b/src/lib/constants.js index 32dafe6..3c04bde 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -7,10 +7,21 @@ let constants = { // Things that server owners _should_ change! website_origin: "http://localhost:10407", - use_tor: false, // Whether to enable Tor support at all - tor_password: null, // No effect without `use_tor = true`. If `null`, node will run its own Tor process instead. // Things that server owners _could_ change if they want to. + tor: { + enabled: false, // If false, everything else in this block has no effect. + password: null, // If `null`, Bibliogram will run its own Tor process instead. + for: { + user_html: false, // User HTML page seems to have less forgiving rates, and Tor always fails, so it's disabled by default. + timeline_graphql: true, + post_graphql: true, + reel_graphql: true + } + }, + + allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer". + settings: { rss_enabled: true }, @@ -25,7 +36,7 @@ let constants = { // Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream. external: { - user_query_hash: "c9100bf9110dd6361671f113dd02e7d6", + reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6", timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08", timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30 shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90", diff --git a/src/lib/structures/ReelUser.js b/src/lib/structures/ReelUser.js new file mode 100644 index 0000000..48aa559 --- /dev/null +++ b/src/lib/structures/ReelUser.js @@ -0,0 +1,32 @@ +const constants = require("../constants") +const {proxyImage} = require("../utils/proxyurl") +const Timeline = require("./Timeline") +require("../testimports")(constants, Timeline) + +class ReelUser { + /** + * @param {import("../types").GraphUser} data + */ + constructor(data) { + this.data = data + this.fromReel = true + this.following = 0 + this.followedBy = 0 + this.posts = 0 + this.timeline = new Timeline(this) + this.cachedAt = Date.now() + this.proxyProfilePicture = proxyImage(this.data.profile_pic_url) + } + + getTtl(scale = 1) { + const expiresAt = this.cachedAt + constants.caching.resource_cache_time + const ttl = expiresAt - Date.now() + return Math.ceil(Math.max(ttl, 0) / scale) + } + + export() { + return this.data + } +} + +module.exports = ReelUser diff --git a/src/lib/structures/Timeline.js b/src/lib/structures/Timeline.js index 392ef2a..1ff7f98 100644 --- a/src/lib/structures/Timeline.js +++ b/src/lib/structures/Timeline.js @@ -19,14 +19,15 @@ function transformEdges(edges) { class Timeline { /** - * @param {import("./User")} user + * @param {import("./User")|import("./ReelUser")} user */ constructor(user) { this.user = user /** @type {import("./TimelineEntry")[][]} */ this.pages = [] - this.addPage(this.user.data.edge_owner_to_timeline_media) - this.page_info = this.user.data.edge_owner_to_timeline_media.page_info + if (this.user.data.edge_owner_to_timeline_media) { + this.addPage(this.user.data.edge_owner_to_timeline_media) + } } hasNextPage() { diff --git a/src/lib/structures/TimelineEntry.js b/src/lib/structures/TimelineEntry.js index 26fba3d..847ea88 100644 --- a/src/lib/structures/TimelineEntry.js +++ b/src/lib/structures/TimelineEntry.js @@ -180,27 +180,28 @@ class TimelineEntry extends TimelineBaseMethods { } // The owner may be in the user cache, so copy from that. // This could be implemented better. - else if (collectors.requestCache.hasWithoutClean("user/"+this.data.owner.username)) { + else if (collectors.requestCache.hasNotPromise("user/"+this.data.owner.username)) { /** @type {import("./User")} */ const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username) - this.data.owner = { - id: user.data.id, - username: user.data.username, - is_verified: user.data.is_verified, - full_name: user.data.full_name, - profile_pic_url: user.data.profile_pic_url // _hd is also available here. + if (user.data.full_name) { + this.data.owner = { + id: user.data.id, + username: user.data.username, + is_verified: user.data.is_verified, + full_name: user.data.full_name, + profile_pic_url: user.data.profile_pic_url // _hd is also available here. + } + const clone = proxyExtendedOwner(this.data.owner) + this.ownerPfpCacheP = clone.profile_pic_url + return clone } - const clone = proxyExtendedOwner(this.data.owner) - this.ownerPfpCacheP = clone.profile_pic_url - return clone + // That didn't work, so just fall through... } // We'll have to re-request ourselves. - else { - await this.update() - const clone = proxyExtendedOwner(this.data.owner) - this.ownerPfpCacheP = clone.profile_pic_url - return clone - } + await this.update() + const clone = proxyExtendedOwner(this.data.owner) + this.ownerPfpCacheP = clone.profile_pic_url + return clone } fetchVideoURL() { diff --git a/src/lib/types.js b/src/lib/types.js index e55074a..cd72491 100644 --- a/src/lib/types.js +++ b/src/lib/types.js @@ -399,7 +399,7 @@ * @property {GraphEdgeCount} edge_followed_by * @property {any} edge_media_collections todo: doc * @property {GraphEdgeCount} edge_mutual_followed_by - * @property {PagedEdges} edge_owner_to_timeline_media + * @property {PagedEdges} edge_owner_to_timeline_media * @property {any} edge_saved_media todo: doc * @property {string | null} external_url * @property {string | null} external_url_linkshimmed diff --git a/src/lib/utils/tor.js b/src/lib/utils/tor.js index 12c5372..f64750f 100644 --- a/src/lib/utils/tor.js +++ b/src/lib/utils/tor.js @@ -44,12 +44,12 @@ module.exports = new Promise(resolve => { /** @type {import("@deadcanaries/granax/lib/controller")} */ // @ts-ignore let tor - if (constants.tor_password == null) { + if (constants.tor.password == null) { // @ts-ignore tor = new granax() } else { tor = new granax.TorController(connect(9051), {authOnConnect: false}) - tor.authenticate(`"${constants.tor_password}"`, err => { + tor.authenticate(`"${constants.tor.password}"`, err => { if (err) console.log("Tor auth error:", err) }) } diff --git a/src/lib/utils/torswitcher.js b/src/lib/utils/torswitcher.js index e1d9348..ab8a840 100644 --- a/src/lib/utils/torswitcher.js +++ b/src/lib/utils/torswitcher.js @@ -21,8 +21,8 @@ class TorSwitcher { * @returns {Promise} * @template T the return value of the test function */ - request(url, test) { - if (this.torManager) { + request(type, url, test) { + if (this.torManager && constants.tor.for[type]) { return this.torManager.request(url, test) } else { return request(url).then(res => test(res)) @@ -32,7 +32,7 @@ class TorSwitcher { const switcher = new TorSwitcher() -if (constants.use_tor) { +if (constants.tor.enabled) { require("./tor").then(torManager => { if (torManager) switcher.setManager(torManager) }) diff --git a/src/site/pug/user.pug b/src/site/pug/user.pug index fd7e41a..2b5119d 100644 --- a/src/site/pug/user.pug +++ b/src/site/pug/user.pug @@ -10,27 +10,33 @@ html head meta(charset="utf-8") meta(name="viewport" content="width=device-width, initial-scale=1") - title - = `${user.data.full_name} (@${user.data.username}) | Bibliogram` + if user.data.full_name + title= `${user.data.full_name} (@${user.data.username}) | Bibliogram` + else + title= `@${user.data.username} | Bibliogram` link(rel="stylesheet" type="text/css" href="/static/css/main.css") script(src="/static/js/pagination.js" type="module") body .main-divider header.profile-overview .profile-sticky - img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name}'s profile picture.`).pfp + img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp //- Instagram only uses the above URL, but an HD version is also available. The alt text is pathetic, I know. I don't have much to work with. - h1.full-name= user.data.full_name - h2.username= `@${user.data.username}` - p.bio= user.data.biography - if user.data.external_url - p.website - a(href=user.data.external_url)= user.data.external_url - div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts - div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following - div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by + if user.data.full_name + h1.full-name= user.data.full_name + h2.username= `@${user.data.username}` + else + h1.full-name= `@${user.data.username}` + if !user.fromReel + p.bio= user.data.biography + if user.data.external_url + p.website + a(href=user.data.external_url)= user.data.external_url + div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts + div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following + div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by div.links if constants.settings.rss_enabled a(rel="alternate" type="application/rss+xml" href=`/u/${user.data.username}/rss.xml`) RSS