Add alternative method to fetch user

This commit is contained in:
Cadence Fish 2020-02-03 02:24:14 +13:00
parent 96fa4758c0
commit 272f4b6e3b
No known key found for this signature in database
GPG Key ID: 81015DF9AA8607E1
10 changed files with 191 additions and 65 deletions

View File

@ -34,6 +34,14 @@ class TtlCache {
return this.cache.has(key)
}
hasNotPromise(key) {
const has = this.has(key)
if (!has) return false
const value = this.get(key)
if (value instanceof Promise || (value.constructor && value.constructor.name === "Promise")) return false
return true
}
/**
* @param {string} key
*/

View File

@ -12,28 +12,87 @@ const requestCache = new RequestCache(constants.caching.resource_cache_time)
const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time)
const history = new RequestHistory(["user", "timeline", "post"])
function fetchUser(username) {
return requestCache.getOrFetch("user/"+username, () => {
return request(`https://www.instagram.com/${username}/`).then(res => {
if (res.status === 302) {
history.report("user", false)
throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
} else if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else return res.text().then(text => {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineImage -> collectors -/> User
const User = require("./structures/User")
const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)")
.run({username: user.data.username, user_id: user.data.id})
}
return user
})
async function fetchUser(username) {
if (constants.allow_user_from_reel === "never") {
return fetchUserFromHTML(username)
} else if (constants.allow_user_from_reel === "prefer") {
const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
if (userID) return fetchUserFromCombined(userID, username)
else return fetchUserFromHTML(username)
} else { // === "fallback"
return fetchUserFromHTML(username).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
const userID = db.prepare("SELECT user_id FROM Users WHERE username = ?").pluck().get(username)
if (userID) return fetchUserFromCombined(userID, username)
}
throw error
})
}
}
function fetchUserFromHTML(username) {
return requestCache.getOrFetch("user/"+username, () => {
return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => {
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => {
if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else {
return res.text().then(text => {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
db.prepare("INSERT OR IGNORE INTO Users (username, user_id) VALUES (@username, @user_id)")
.run({username: user.data.username, user_id: user.data.id})
}
return user
})
}
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
history.report("user", false)
}
throw error
})
})
}
function fetchUserFromCombined(userID, username) {
// Fetch basic user information
const p = new URLSearchParams()
p.set("query_hash", constants.external.reel_query_hash)
p.set("variables", JSON.stringify({
user_id: userID,
include_reel: true
}))
return requestCache.getOrFetch("user/"+username, () => {
return switcher.request("reel_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
const result = root.data.user
if (!result) throw constants.symbols.NOT_FOUND
// require down here or have to deal with require loop. require cache will take care of it anyway.
// ReelUser -> Timeline -> TimelineEntry -> collectors -/> User
const ReelUser = require("./structures/ReelUser")
const user = new ReelUser(result.reel.user)
return user
}).catch(error => {
throw error
})
}).then(async user => {
// Add first timeline page
if (!user.timeline.pages[0]) {
const page = await fetchTimelinePage(userID, "")
user.timeline.addPage(page)
}
return user
})
}
@ -50,8 +109,8 @@ function fetchTimelinePage(userID, after) {
first: constants.external.timeline_fetch_first,
after: after
}))
return requestCache.getOrFetchPromise("page/"+after, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
return requestCache.getOrFetchPromise(`page/${userID}/${after}`, () => {
return switcher.request("timeline_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
@ -77,7 +136,7 @@ function getOrCreateShortcode(shortcode) {
return timelineEntryCache.get(shortcode)
} else {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// TimelineImage -> collectors -/> TimelineImage
// TimelineEntry -> collectors -/> TimelineEntry
const TimelineEntry = require("./structures/TimelineEntry")
const result = new TimelineEntry()
timelineEntryCache.set(shortcode, result)
@ -108,7 +167,7 @@ function fetchShortcodeData(shortcode) {
p.set("query_hash", constants.external.shortcode_query_hash)
p.set("variables", JSON.stringify({shortcode}))
return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
return switcher.request("post_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
@ -123,6 +182,14 @@ function fetchShortcodeData(shortcode) {
db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)")
.run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)})
}
// if we have the owner but only a reelUser, update it. this code is gross.
if (requestCache.hasNotPromise("user/"+data.owner.username)) {
const user = requestCache.getWithoutClean("user/"+data.owner.username)
if (user.fromReel) {
user.data.full_name = data.owner.full_name
user.data.is_verified = data.owner.is_verified
}
}
return data
}
}).catch(error => {

View File

@ -7,10 +7,21 @@
let constants = {
// Things that server owners _should_ change!
website_origin: "http://localhost:10407",
use_tor: false, // Whether to enable Tor support at all
tor_password: null, // No effect without `use_tor = true`. If `null`, node will run its own Tor process instead.
// Things that server owners _could_ change if they want to.
tor: {
enabled: false, // If false, everything else in this block has no effect.
password: null, // If `null`, Bibliogram will run its own Tor process instead.
for: {
user_html: false, // User HTML page seems to have less forgiving rates, and Tor always fails, so it's disabled by default.
timeline_graphql: true,
post_graphql: true,
reel_graphql: true
}
},
allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer".
settings: {
rss_enabled: true
},
@ -25,7 +36,7 @@ let constants = {
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
external: {
user_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",

View File

@ -0,0 +1,32 @@
const constants = require("../constants")
const {proxyImage} = require("../utils/proxyurl")
const Timeline = require("./Timeline")
require("../testimports")(constants, Timeline)
class ReelUser {
/**
* @param {import("../types").GraphUser} data
*/
constructor(data) {
this.data = data
this.fromReel = true
this.following = 0
this.followedBy = 0
this.posts = 0
this.timeline = new Timeline(this)
this.cachedAt = Date.now()
this.proxyProfilePicture = proxyImage(this.data.profile_pic_url)
}
getTtl(scale = 1) {
const expiresAt = this.cachedAt + constants.caching.resource_cache_time
const ttl = expiresAt - Date.now()
return Math.ceil(Math.max(ttl, 0) / scale)
}
export() {
return this.data
}
}
module.exports = ReelUser

View File

@ -19,14 +19,15 @@ function transformEdges(edges) {
class Timeline {
/**
* @param {import("./User")} user
* @param {import("./User")|import("./ReelUser")} user
*/
constructor(user) {
this.user = user
/** @type {import("./TimelineEntry")[][]} */
this.pages = []
this.addPage(this.user.data.edge_owner_to_timeline_media)
this.page_info = this.user.data.edge_owner_to_timeline_media.page_info
if (this.user.data.edge_owner_to_timeline_media) {
this.addPage(this.user.data.edge_owner_to_timeline_media)
}
}
hasNextPage() {

View File

@ -180,27 +180,28 @@ class TimelineEntry extends TimelineBaseMethods {
}
// The owner may be in the user cache, so copy from that.
// This could be implemented better.
else if (collectors.requestCache.hasWithoutClean("user/"+this.data.owner.username)) {
else if (collectors.requestCache.hasNotPromise("user/"+this.data.owner.username)) {
/** @type {import("./User")} */
const user = collectors.requestCache.getWithoutClean("user/"+this.data.owner.username)
this.data.owner = {
id: user.data.id,
username: user.data.username,
is_verified: user.data.is_verified,
full_name: user.data.full_name,
profile_pic_url: user.data.profile_pic_url // _hd is also available here.
if (user.data.full_name) {
this.data.owner = {
id: user.data.id,
username: user.data.username,
is_verified: user.data.is_verified,
full_name: user.data.full_name,
profile_pic_url: user.data.profile_pic_url // _hd is also available here.
}
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
}
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
// That didn't work, so just fall through...
}
// We'll have to re-request ourselves.
else {
await this.update()
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
}
await this.update()
const clone = proxyExtendedOwner(this.data.owner)
this.ownerPfpCacheP = clone.profile_pic_url
return clone
}
fetchVideoURL() {

View File

@ -399,7 +399,7 @@
* @property {GraphEdgeCount} edge_followed_by
* @property {any} edge_media_collections todo: doc
* @property {GraphEdgeCount} edge_mutual_followed_by
* @property {PagedEdges<GraphImage>} edge_owner_to_timeline_media
* @property {PagedEdges<TimelineEntryN1>} edge_owner_to_timeline_media
* @property {any} edge_saved_media todo: doc
* @property {string | null} external_url
* @property {string | null} external_url_linkshimmed

View File

@ -44,12 +44,12 @@ module.exports = new Promise(resolve => {
/** @type {import("@deadcanaries/granax/lib/controller")} */
// @ts-ignore
let tor
if (constants.tor_password == null) {
if (constants.tor.password == null) {
// @ts-ignore
tor = new granax()
} else {
tor = new granax.TorController(connect(9051), {authOnConnect: false})
tor.authenticate(`"${constants.tor_password}"`, err => {
tor.authenticate(`"${constants.tor.password}"`, err => {
if (err) console.log("Tor auth error:", err)
})
}

View File

@ -21,8 +21,8 @@ class TorSwitcher {
* @returns {Promise<T>}
* @template T the return value of the test function
*/
request(url, test) {
if (this.torManager) {
request(type, url, test) {
if (this.torManager && constants.tor.for[type]) {
return this.torManager.request(url, test)
} else {
return request(url).then(res => test(res))
@ -32,7 +32,7 @@ class TorSwitcher {
const switcher = new TorSwitcher()
if (constants.use_tor) {
if (constants.tor.enabled) {
require("./tor").then(torManager => {
if (torManager) switcher.setManager(torManager)
})

View File

@ -10,27 +10,33 @@ html
head
meta(charset="utf-8")
meta(name="viewport" content="width=device-width, initial-scale=1")
title
= `${user.data.full_name} (@${user.data.username}) | Bibliogram`
if user.data.full_name
title= `${user.data.full_name} (@${user.data.username}) | Bibliogram`
else
title= `@${user.data.username} | Bibliogram`
link(rel="stylesheet" type="text/css" href="/static/css/main.css")
script(src="/static/js/pagination.js" type="module")
body
.main-divider
header.profile-overview
.profile-sticky
img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name}'s profile picture.`).pfp
img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp
//-
Instagram only uses the above URL, but an HD version is also available.
The alt text is pathetic, I know. I don't have much to work with.
h1.full-name= user.data.full_name
h2.username= `@${user.data.username}`
p.bio= user.data.biography
if user.data.external_url
p.website
a(href=user.data.external_url)= user.data.external_url
div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts
div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following
div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by
if user.data.full_name
h1.full-name= user.data.full_name
h2.username= `@${user.data.username}`
else
h1.full-name= `@${user.data.username}`
if !user.fromReel
p.bio= user.data.biography
if user.data.external_url
p.website
a(href=user.data.external_url)= user.data.external_url
div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts
div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following
div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by
div.links
if constants.settings.rss_enabled
a(rel="alternate" type="application/rss+xml" href=`/u/${user.data.username}/rss.xml`) RSS