1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2024-11-23 00:27:30 +00:00

Add alternative method to fetch user

This commit is contained in:
Cadence Fish 2020-02-03 02:24:14 +13:00
parent 96fa4758c0
commit 272f4b6e3b
No known key found for this signature in database
GPG Key ID: 81015DF9AA8607E1
10 changed files with 191 additions and 65 deletions

View File

@ -34,6 +34,14 @@ class TtlCache {
return this.cache.has(key) return this.cache.has(key)
} }
hasNotPromise(key) {
const has = this.has(key)
if (!has) return false
const value = this.get(key)
if (value instanceof Promise || (value.constructor && value.constructor.name === "Promise")) return false
return true
}
/** /**
* @param {string} key * @param {string} key
*/ */

View File

@ -12,28 +12,87 @@ const requestCache = new RequestCache(constants.caching.resource_cache_time)
const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time) const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time)
const history = new RequestHistory(["user", "timeline", "post"]) const history = new RequestHistory(["user", "timeline", "post"])
function fetchUser(username) { async function fetchUser(username) {
return requestCache.getOrFetch("user/"+username, () => { if (constants.allow_user_from_reel === "never") {
return request(`https://www.instagram.com/${username}/`).then(res => { return fetchUserFromHTML(username)
if (res.status === 302) { } else if (constants.allow_user_from_reel === "prefer") {
history.report("user", false) const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN if (userID) return fetchUserFromCombined(userID, username)
} else if (res.status === 404) { else return fetchUserFromHTML(username)
throw constants.symbols.NOT_FOUND } else { // === "fallback"
} else return res.text().then(text => { return fetchUserFromHTML(username).catch(error => {
// require down here or have to deal with require loop. require cache will take care of it anyway. if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
// User -> Timeline -> TimelineImage -> collectors -/> User const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
const User = require("./structures/User") if (userID) return fetchUserFromCombined(userID, username)
const sharedData = extractSharedData(text) }
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) throw error
history.report("user", true)
if (constants.caching.db_user_id) {
db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)")
.run({username: user.data.username, user_id: user.data.id})
}
return user
})
}) })
}
}
function fetchUserFromHTML(username) {
return requestCache.getOrFetch("user/"+username, () => {
return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => {
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => {
if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else {
return res.text().then(text => {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)")
.run({username: user.data.username, user_id: user.data.id})
}
return user
})
}
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
history.report("user", false)
}
throw error
})
})
}
function fetchUserFromCombined(userID, username) {
// Fetch basic user information
const p = new URLSearchParams()
p.set("query_hash", constants.external.reel_query_hash)
p.set("variables", JSON.stringify({
user_id: userID,
include_reel: true
}))
return requestCache.getOrFetch("user/"+username, () => {
return switcher.request("reel_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
const result = root.data.user
if (!result) throw constants.symbols.NOT_FOUND
// require down here or have to deal with require loop. require cache will take care of it anyway.
// ReelUser -> Timeline -> TimelineEntry -> collectors -/> User
const ReelUser = require("./structures/ReelUser")
const user = new ReelUser(result.reel.user)
return user
}).catch(error => {
throw error
})
}).then(async user => {
// Add first timeline page
if (!user.timeline.pages[0]) {
const page = await fetchTimelinePage(userID, "")
user.timeline.addPage(page)
}
return user
}) })
} }
@ -50,8 +109,8 @@ function fetchTimelinePage(userID, after) {
first: constants.external.timeline_fetch_first, first: constants.external.timeline_fetch_first,
after: after after: after
})) }))
return requestCache.getOrFetchPromise("page/"+after, () => { return requestCache.getOrFetchPromise(`page/${userID}/${after}`, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { return switcher.request("timeline_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res return res
}).then(res => res.json()).then(root => { }).then(res => res.json()).then(root => {
@ -77,7 +136,7 @@ function getOrCreateShortcode(shortcode) {
return timelineEntryCache.get(shortcode) return timelineEntryCache.get(shortcode)
} else { } else {
// require down here or have to deal with require loop. require cache will take care of it anyway. // require down here or have to deal with require loop. require cache will take care of it anyway.
// TimelineImage -> collectors -/> TimelineImage // TimelineEntry -> collectors -/> TimelineEntry
const TimelineEntry = require("./structures/TimelineEntry") const TimelineEntry = require("./structures/TimelineEntry")
const result = new TimelineEntry() const result = new TimelineEntry()
timelineEntryCache.set(shortcode, result) timelineEntryCache.set(shortcode, result)
@ -108,7 +167,7 @@ function fetchShortcodeData(shortcode) {
p.set("query_hash", constants.external.shortcode_query_hash) p.set("query_hash", constants.external.shortcode_query_hash)
p.set("variables", JSON.stringify({shortcode})) p.set("variables", JSON.stringify({shortcode}))
return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => { return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => { return switcher.request("post_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res return res
}).then(res => res.json()).then(root => { }).then(res => res.json()).then(root => {
@ -123,6 +182,14 @@ function fetchShortcodeData(shortcode) {
db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)") db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)")
.run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)}) .run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)})
} }
// if we have the owner but only a reelUser, update it. this code is gross.
if (requestCache.hasNotPromise("user/"+data.owner.username)) {
const user = requestCache.getWithoutClean("user/"+data.owner.username)
if (user.fromReel) {
user.data.full_name = data.owner.full_name
user.data.is_verified = data.owner.is_verified
}
}
return data return data
} }
}).catch(error => { }).catch(error => {

View File

@ -7,10 +7,21 @@
let constants = { let constants = {
// Things that server owners _should_ change! // Things that server owners _should_ change!
website_origin: "http://localhost:10407", website_origin: "http://localhost:10407",
use_tor: false, // Whether to enable Tor support at all
tor_password: null, // No effect without `use_tor = true`. If `null`, node will run its own Tor process instead.
// Things that server owners _could_ change if they want to. // Things that server owners _could_ change if they want to.
tor: {
enabled: false, // If false, everything else in this block has no effect.
password: null, // If `null`, Bibliogram will run its own Tor process instead.
for: {
user_html: false, // User HTML page seems to have less forgiving rates, and Tor always fails, so it's disabled by default.
timeline_graphql: true,
post_graphql: true,
reel_graphql: true
}
},
allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer".
settings: { settings: {
rss_enabled: true rss_enabled: true
}, },
@ -25,7 +36,7 @@ let constants = {
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream. // Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
external: { external: {
user_query_hash: "c9100bf9110dd6361671f113dd02e7d6", reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08", timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30 timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90", shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",

View File

@ -0,0 +1,32 @@
const constants = require("../constants")
const {proxyImage} = require("../utils/proxyurl")
const Timeline = require("./Timeline")
require("../testimports")(constants, Timeline)
class ReelUser {
/**
* @param {import("../types").GraphUser} data
*/
constructor(data) {
this.data = data
this.fromReel = true
this.following = 0
this.followedBy = 0
this.posts = 0
this.timeline = new Timeline(this)
this.cachedAt = Date.now()
this.proxyProfilePicture = proxyImage(this.data.profile_pic_url)
}
getTtl(scale = 1) {
const expiresAt = this.cachedAt + constants.caching.resource_cache_time
const ttl = expiresAt - Date.now()
return Math.ceil(Math.max(ttl, 0) / scale)
}
export() {
return this.data
}
}
module.exports = ReelUser

View File

@ -19,14 +19,15 @@ function transformEdges(edges) {
class Timeline { class Timeline {
/** /**
* @param {import("./User")} user * @param {import("./User")|import("./ReelUser")} user
*/ */
constructor(user) { constructor(user) {
this.user = user this.user = user
/** @type {import("./TimelineEntry")[][]} */ /** @type {import("./TimelineEntry")[][]} */
this.pages = [] this.pages = []
this.addPage(this.user.data.edge_owner_to_timeline_media) if (this.user.data.edge_owner_to_timeline_media) {
this.page_info = this.user.data.edge_owner_to_timeline_media.page_info this.addPage(this.user.data.edge_owner_to_timeline_media)
}
} }
hasNextPage() { hasNextPage() {

View File

@ -180,27 +180,28 @@ class TimelineEntry extends TimelineBaseMethods {
} }
// The owner may be in the user cache, so copy from that. // The owner may be in the user cache, so copy from that.
// This could be implemented better. // This could be implemented better.
else if (collectors.requestCache.hasWithoutClean("user/"+this.data.owner.username)) { else if (collectors.requestCache.hasNotPromise("user/"+this.data.owner.username)) {
/** @type {import("./User")} */ /** @type {import("./User")} */
const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username) const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username)
this.data.owner = { if (user.data.full_name) {
id: user.data.id, this.data.owner = {
username: user.data.username, id: user.data.id,
is_verified: user.data.is_verified, username: user.data.username,
full_name: user.data.full_name, is_verified: user.data.is_verified,
profile_pic_url: user.data.profile_pic_url // _hd is also available here. full_name: user.data.full_name,
profile_pic_url: user.data.profile_pic_url // _hd is also available here.
}
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
} }
const clone = proxyExtendedOwner(this.data.owner) // That didn't work, so just fall through...
this.ownerPfpCacheP = clone.profile_pic_url
return clone
} }
// We'll have to re-request ourselves. // We'll have to re-request ourselves.
else { await this.update()
await this.update() const clone = proxyExtendedOwner(this.data.owner)
const clone = proxyExtendedOwner(this.data.owner) this.ownerPfpCacheP = clone.profile_pic_url
this.ownerPfpCacheP = clone.profile_pic_url return clone
return clone
}
} }
fetchVideoURL() { fetchVideoURL() {

View File

@ -399,7 +399,7 @@
* @property {GraphEdgeCount} edge_followed_by * @property {GraphEdgeCount} edge_followed_by
* @property {any} edge_media_collections todo: doc * @property {any} edge_media_collections todo: doc
* @property {GraphEdgeCount} edge_mutual_followed_by * @property {GraphEdgeCount} edge_mutual_followed_by
* @property {PagedEdges<GraphImage>} edge_owner_to_timeline_media * @property {PagedEdges<TimelineEntryN1>} edge_owner_to_timeline_media
* @property {any} edge_saved_media todo: doc * @property {any} edge_saved_media todo: doc
* @property {string | null} external_url * @property {string | null} external_url
* @property {string | null} external_url_linkshimmed * @property {string | null} external_url_linkshimmed

View File

@ -44,12 +44,12 @@ module.exports = new Promise(resolve => {
/** @type {import("@deadcanaries/granax/lib/controller")} */ /** @type {import("@deadcanaries/granax/lib/controller")} */
// @ts-ignore // @ts-ignore
let tor let tor
if (constants.tor_password == null) { if (constants.tor.password == null) {
// @ts-ignore // @ts-ignore
tor = new granax() tor = new granax()
} else { } else {
tor = new granax.TorController(connect(9051), {authOnConnect: false}) tor = new granax.TorController(connect(9051), {authOnConnect: false})
tor.authenticate(`"${constants.tor_password}"`, err => { tor.authenticate(`"${constants.tor.password}"`, err => {
if (err) console.log("Tor auth error:", err) if (err) console.log("Tor auth error:", err)
}) })
} }

View File

@ -21,8 +21,8 @@ class TorSwitcher {
* @returns {Promise<T>} * @returns {Promise<T>}
* @template T the return value of the test function * @template T the return value of the test function
*/ */
request(url, test) { request(type, url, test) {
if (this.torManager) { if (this.torManager && constants.tor.for[type]) {
return this.torManager.request(url, test) return this.torManager.request(url, test)
} else { } else {
return request(url).then(res => test(res)) return request(url).then(res => test(res))
@ -32,7 +32,7 @@ class TorSwitcher {
const switcher = new TorSwitcher() const switcher = new TorSwitcher()
if (constants.use_tor) { if (constants.tor.enabled) {
require("./tor").then(torManager => { require("./tor").then(torManager => {
if (torManager) switcher.setManager(torManager) if (torManager) switcher.setManager(torManager)
}) })

View File

@ -10,27 +10,33 @@ html
head head
meta(charset="utf-8") meta(charset="utf-8")
meta(name="viewport" content="width=device-width, initial-scale=1") meta(name="viewport" content="width=device-width, initial-scale=1")
title if user.data.full_name
= `${user.data.full_name} (@${user.data.username}) | Bibliogram` title= `${user.data.full_name} (@${user.data.username}) | Bibliogram`
else
title= `@${user.data.username} | Bibliogram`
link(rel="stylesheet" type="text/css" href="/static/css/main.css") link(rel="stylesheet" type="text/css" href="/static/css/main.css")
script(src="/static/js/pagination.js" type="module") script(src="/static/js/pagination.js" type="module")
body body
.main-divider .main-divider
header.profile-overview header.profile-overview
.profile-sticky .profile-sticky
img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name}'s profile picture.`).pfp img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp
//- //-
Instagram only uses the above URL, but an HD version is also available. Instagram only uses the above URL, but an HD version is also available.
The alt text is pathetic, I know. I don't have much to work with. The alt text is pathetic, I know. I don't have much to work with.
h1.full-name= user.data.full_name if user.data.full_name
h2.username= `@${user.data.username}` h1.full-name= user.data.full_name
p.bio= user.data.biography h2.username= `@${user.data.username}`
if user.data.external_url else
p.website h1.full-name= `@${user.data.username}`
a(href=user.data.external_url)= user.data.external_url if !user.fromReel
div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts p.bio= user.data.biography
div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following if user.data.external_url
div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by p.website
a(href=user.data.external_url)= user.data.external_url
div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts
div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following
div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by
div.links div.links
if constants.settings.rss_enabled if constants.settings.rss_enabled
a(rel="alternate" type="application/rss+xml" href=`/u/${user.data.username}/rss.xml`) RSS a(rel="alternate" type="application/rss+xml" href=`/u/${user.data.username}/rss.xml`) RSS