1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2024-11-26 09:37:28 +00:00

Add alternative method to fetch user

This commit is contained in:
Cadence Fish 2020-02-03 02:24:14 +13:00
parent 96fa4758c0
commit 272f4b6e3b
No known key found for this signature in database
GPG Key ID: 81015DF9AA8607E1
10 changed files with 191 additions and 65 deletions

View File

@ -34,6 +34,14 @@ class TtlCache {
return this.cache.has(key) return this.cache.has(key)
} }
hasNotPromise(key) {
const has = this.has(key)
if (!has) return false
const value = this.get(key)
if (value instanceof Promise || (value.constructor && value.constructor.name === "Promise")) return false
return true
}
/** /**
* @param {string} key * @param {string} key
*/ */

View File

@ -12,17 +12,37 @@ const requestCache = new RequestCache(constants.caching.resource_cache_time)
const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time) const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time)
const history = new RequestHistory(["user", "timeline", "post"]) const history = new RequestHistory(["user", "timeline", "post"])
function fetchUser(username) { async function fetchUser(username) {
if (constants.allow_user_from_reel === "never") {
return fetchUserFromHTML(username)
} else if (constants.allow_user_from_reel === "prefer") {
const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
if (userID) return fetchUserFromCombined(userID, username)
else return fetchUserFromHTML(username)
} else { // === "fallback"
return fetchUserFromHTML(username).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
if (userID) return fetchUserFromCombined(userID, username)
}
throw error
})
}
}
function fetchUserFromHTML(username) {
return requestCache.getOrFetch("user/"+username, () => { return requestCache.getOrFetch("user/"+username, () => {
return request(`https://www.instagram.com/${username}/`).then(res => { return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => {
if (res.status === 302) { if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
history.report("user", false) if (res.status === 429) throw constants.symbols.RATE_LIMITED
throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN return res
} else if (res.status === 404) { }).then(res => {
if (res.status === 404) {
throw constants.symbols.NOT_FOUND throw constants.symbols.NOT_FOUND
} else return res.text().then(text => { } else {
return res.text().then(text => {
// require down here or have to deal with require loop. require cache will take care of it anyway. // require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineImage -> collectors -/> User // User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User") const User = require("./structures/User")
const sharedData = extractSharedData(text) const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
@ -33,10 +53,49 @@ function fetchUser(username) {
} }
return user return user
}) })
}
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
history.report("user", false)
}
throw error
}) })
}) })
} }
function fetchUserFromCombined(userID, username) {
// Fetch basic user information
const p = new URLSearchParams()
p.set("query_hash", constants.external.reel_query_hash)
p.set("variables", JSON.stringify({
user_id: userID,
include_reel: true
}))
return requestCache.getOrFetch("user/"+username, () => {
return switcher.request("reel_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
const result = root.data.user
if (!result) throw constants.symbols.NOT_FOUND
// require down here or have to deal with require loop. require cache will take care of it anyway.
// ReelUser -> Timeline -> TimelineEntry -> collectors -/> User
const ReelUser = require("./structures/ReelUser")
const user = new ReelUser(result.reel.user)
return user
}).catch(error => {
throw error
})
}).then(async user => {
// Add first timeline page
if (!user.timeline.pages[0]) {
const page = await fetchTimelinePage(userID, "")
user.timeline.addPage(page)
}
return user
})
}
/** /**
* @param {string} userID * @param {string} userID
* @param {string} after * @param {string} after
@ -50,8 +109,8 @@ function fetchTimelinePage(userID, after) {
first: constants.external.timeline_fetch_first, first: constants.external.timeline_fetch_first,
after: after after: after
})) }))
return requestCache.getOrFetchPromise("page/"+after, () => { return requestCache.getOrFetchPromise(`page/${userID}/${after}`, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { return switcher.request("timeline_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res return res
}).then(res => res.json()).then(root => { }).then(res => res.json()).then(root => {
@ -77,7 +136,7 @@ function getOrCreateShortcode(shortcode) {
return timelineEntryCache.get(shortcode) return timelineEntryCache.get(shortcode)
} else { } else {
// require down here or have to deal with require loop. require cache will take care of it anyway. // require down here or have to deal with require loop. require cache will take care of it anyway.
// TimelineImage -> collectors -/> TimelineImage // TimelineEntry -> collectors -/> TimelineEntry
const TimelineEntry = require("./structures/TimelineEntry") const TimelineEntry = require("./structures/TimelineEntry")
const result = new TimelineEntry() const result = new TimelineEntry()
timelineEntryCache.set(shortcode, result) timelineEntryCache.set(shortcode, result)
@ -108,7 +167,7 @@ function fetchShortcodeData(shortcode) {
p.set("query_hash", constants.external.shortcode_query_hash) p.set("query_hash", constants.external.shortcode_query_hash)
p.set("variables", JSON.stringify({shortcode})) p.set("variables", JSON.stringify({shortcode}))
return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => { return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { return switcher.request("post_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res return res
}).then(res => res.json()).then(root => { }).then(res => res.json()).then(root => {
@ -123,6 +182,14 @@ function fetchShortcodeData(shortcode) {
db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)") db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)")
.run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)}) .run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)})
} }
// if we have the owner but only a reelUser, update it. this code is gross.
if (requestCache.hasNotPromise("user/"+data.owner.username)) {
const user = requestCache.getWithoutClean("user/"+data.owner.username)
if (user.fromReel) {
user.data.full_name = data.owner.full_name
user.data.is_verified = data.owner.is_verified
}
}
return data return data
} }
}).catch(error => { }).catch(error => {

View File

@ -7,10 +7,21 @@
let constants = { let constants = {
// Things that server owners _should_ change! // Things that server owners _should_ change!
website_origin: "http://localhost:10407", website_origin: "http://localhost:10407",
use_tor: false, // Whether to enable Tor support at all
tor_password: null, // No effect without `use_tor = true`. If `null`, node will run its own Tor process instead.
// Things that server owners _could_ change if they want to. // Things that server owners _could_ change if they want to.
tor: {
enabled: false, // If false, everything else in this block has no effect.
password: null, // If `null`, Bibliogram will run its own Tor process instead.
for: {
user_html: false, // User HTML page seems to have less forgiving rates, and Tor always fails, so it's disabled by default.
timeline_graphql: true,
post_graphql: true,
reel_graphql: true
}
},
allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer".
settings: { settings: {
rss_enabled: true rss_enabled: true
}, },
@ -25,7 +36,7 @@ let constants = {
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream. // Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
external: { external: {
user_query_hash: "c9100bf9110dd6361671f113dd02e7d6", reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08", timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30 timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90", shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",

View File

@ -0,0 +1,32 @@
const constants = require("../constants")
const {proxyImage} = require("../utils/proxyurl")
const Timeline = require("./Timeline")
require("../testimports")(constants, Timeline)
class ReelUser {
/**
* @param {import("../types").GraphUser} data
*/
constructor(data) {
this.data = data
this.fromReel = true
this.following = 0
this.followedBy = 0
this.posts = 0
this.timeline = new Timeline(this)
this.cachedAt = Date.now()
this.proxyProfilePicture = proxyImage(this.data.profile_pic_url)
}
getTtl(scale = 1) {
const expiresAt = this.cachedAt + constants.caching.resource_cache_time
const ttl = expiresAt - Date.now()
return Math.ceil(Math.max(ttl, 0) / scale)
}
export() {
return this.data
}
}
module.exports = ReelUser

View File

@ -19,14 +19,15 @@ function transformEdges(edges) {
class Timeline { class Timeline {
/** /**
* @param {import("./User")} user * @param {import("./User")|import("./ReelUser")} user
*/ */
constructor(user) { constructor(user) {
this.user = user this.user = user
/** @type {import("./TimelineEntry")[][]} */ /** @type {import("./TimelineEntry")[][]} */
this.pages = [] this.pages = []
if (this.user.data.edge_owner_to_timeline_media) {
this.addPage(this.user.data.edge_owner_to_timeline_media) this.addPage(this.user.data.edge_owner_to_timeline_media)
this.page_info = this.user.data.edge_owner_to_timeline_media.page_info }
} }
hasNextPage() { hasNextPage() {

View File

@ -180,9 +180,10 @@ class TimelineEntry extends TimelineBaseMethods {
} }
// The owner may be in the user cache, so copy from that. // The owner may be in the user cache, so copy from that.
// This could be implemented better. // This could be implemented better.
else if (collectors.requestCache.hasWithoutClean("user/"+this.data.owner.username)) { else if (collectors.requestCache.hasNotPromise("user/"+this.data.owner.username)) {
/** @type {import("./User")} */ /** @type {import("./User")} */
const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username) const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username)
if (user.data.full_name) {
this.data.owner = { this.data.owner = {
id: user.data.id, id: user.data.id,
username: user.data.username, username: user.data.username,
@ -194,14 +195,14 @@ class TimelineEntry extends TimelineBaseMethods {
this.ownerPfpCacheP = clone.profile_pic_url this.ownerPfpCacheP = clone.profile_pic_url
return clone return clone
} }
// That didn't work, so just fall through...
}
// We'll have to re-request ourselves. // We'll have to re-request ourselves.
else {
await this.update() await this.update()
const clone = proxyExtendedOwner(this.data.owner) const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url this.ownerPfpCacheP = clone.profile_pic_url
return clone return clone
} }
}
fetchVideoURL() { fetchVideoURL() {
if (!this.isVideo()) return Promise.resolve(null) if (!this.isVideo()) return Promise.resolve(null)

View File

@ -399,7 +399,7 @@
* @property {GraphEdgeCount} edge_followed_by * @property {GraphEdgeCount} edge_followed_by
* @property {any} edge_media_collections todo: doc * @property {any} edge_media_collections todo: doc
* @property {GraphEdgeCount} edge_mutual_followed_by * @property {GraphEdgeCount} edge_mutual_followed_by
* @property {PagedEdges<GraphImage>} edge_owner_to_timeline_media * @property {PagedEdges<TimelineEntryN1>} edge_owner_to_timeline_media
* @property {any} edge_saved_media todo: doc * @property {any} edge_saved_media todo: doc
* @property {string | null} external_url * @property {string | null} external_url
* @property {string | null} external_url_linkshimmed * @property {string | null} external_url_linkshimmed

View File

@ -44,12 +44,12 @@ module.exports = new Promise(resolve => {
/** @type {import("@deadcanaries/granax/lib/controller")} */ /** @type {import("@deadcanaries/granax/lib/controller")} */
// @ts-ignore // @ts-ignore
let tor let tor
if (constants.tor_password == null) { if (constants.tor.password == null) {
// @ts-ignore // @ts-ignore
tor = new granax() tor = new granax()
} else { } else {
tor = new granax.TorController(connect(9051), {authOnConnect: false}) tor = new granax.TorController(connect(9051), {authOnConnect: false})
tor.authenticate(`"${constants.tor_password}"`, err => { tor.authenticate(`"${constants.tor.password}"`, err => {
if (err) console.log("Tor auth error:", err) if (err) console.log("Tor auth error:", err)
}) })
} }

View File

@ -21,8 +21,8 @@ class TorSwitcher {
* @returns {Promise<T>} * @returns {Promise<T>}
* @template T the return value of the test function * @template T the return value of the test function
*/ */
request(url, test) { request(type, url, test) {
if (this.torManager) { if (this.torManager && constants.tor.for[type]) {
return this.torManager.request(url, test) return this.torManager.request(url, test)
} else { } else {
return request(url).then(res => test(res)) return request(url).then(res => test(res))
@ -32,7 +32,7 @@ class TorSwitcher {
const switcher = new TorSwitcher() const switcher = new TorSwitcher()
if (constants.use_tor) { if (constants.tor.enabled) {
require("./tor").then(torManager => { require("./tor").then(torManager => {
if (torManager) switcher.setManager(torManager) if (torManager) switcher.setManager(torManager)
}) })

View File

@ -10,20 +10,26 @@ html
head head
meta(charset="utf-8") meta(charset="utf-8")
meta(name="viewport" content="width=device-width, initial-scale=1") meta(name="viewport" content="width=device-width, initial-scale=1")
title if user.data.full_name
= `${user.data.full_name} (@${user.data.username}) | Bibliogram` title= `${user.data.full_name} (@${user.data.username}) | Bibliogram`
else
title= `@${user.data.username} | Bibliogram`
link(rel="stylesheet" type="text/css" href="/static/css/main.css") link(rel="stylesheet" type="text/css" href="/static/css/main.css")
script(src="/static/js/pagination.js" type="module") script(src="/static/js/pagination.js" type="module")
body body
.main-divider .main-divider
header.profile-overview header.profile-overview
.profile-sticky .profile-sticky
img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name}'s profile picture.`).pfp img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp
//- //-
Instagram only uses the above URL, but an HD version is also available. Instagram only uses the above URL, but an HD version is also available.
The alt text is pathetic, I know. I don't have much to work with. The alt text is pathetic, I know. I don't have much to work with.
if user.data.full_name
h1.full-name= user.data.full_name h1.full-name= user.data.full_name
h2.username= `@${user.data.username}` h2.username= `@${user.data.username}`
else
h1.full-name= `@${user.data.username}`
if !user.fromReel
p.bio= user.data.biography p.bio= user.data.biography
if user.data.external_url if user.data.external_url
p.website p.website