mirror of
https://git.sr.ht/~cadence/bibliogram
synced 2025-01-06 20:16:58 +00:00
Replace all profile fetching methods with IWeb
The previous HTML method is gone due to a page restructure. It was able to consistently bypass Instagram's blocking. The IWeb method has a few hundred uses per X time for selfhosters, and a couple dozen uses per X time for servers. This will likely change in the future. There is no known way to bypass Instagram's IWeb blocking. Feel free to look for a way. Further timeline pages are still blocked. The "next page" button defaults to not automatically loading when scrolled, since it will basically never work anyway. Users running personal instances may be able to get a couple of uses out of it.
This commit is contained in:
parent
d2665ce538
commit
c2d7aca1cb
@ -28,65 +28,18 @@ async function fetchUser(username, context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mode = constants.allow_user_from_reel
|
let mode = constants.allow_user_from_reel
|
||||||
if (mode === "preferForRSS") {
|
if (mode === "iweb") {
|
||||||
if (context === constants.symbols.fetch_context.RSS) mode = "prefer"
|
return fetchUserFromIWeb(username)
|
||||||
else mode = "onlyPreferSaved"
|
|
||||||
}
|
}
|
||||||
if (context === constants.symbols.fetch_context.ASSISTANT) {
|
|
||||||
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
|
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
|
||||||
if (saved && saved.updated_version >= 2) {
|
|
||||||
return fetchUserFromSaved(saved)
|
|
||||||
} else {
|
|
||||||
return fetchUserFromHTML(username)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mode === "never") {
|
|
||||||
return fetchUserFromHTML(username)
|
|
||||||
}
|
|
||||||
if (mode === "prefer") {
|
|
||||||
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
|
|
||||||
if (saved && saved.updated_version >= 2) {
|
|
||||||
return fetchUserFromSaved(saved)
|
|
||||||
} else if (saved && saved.updated_version === 1) {
|
|
||||||
return fetchUserFromCombined(saved.user_id, saved.username)
|
|
||||||
} else {
|
|
||||||
return fetchUserFromHTML(username)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mode === "onlyPreferSaved") {
|
|
||||||
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
|
|
||||||
if (saved && saved.updated_version >= 2) {
|
|
||||||
return fetchUserFromSaved(saved)
|
|
||||||
} else {
|
|
||||||
mode = "fallback"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mode === "fallback") {
|
|
||||||
return fetchUserFromHTML(username).catch(error => {
|
|
||||||
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
|
|
||||||
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
|
|
||||||
if (saved && saved.updated_version === 1) {
|
|
||||||
return fetchUserFromCombined(saved.user_id, username)
|
|
||||||
} else if (saved && saved.updated_version >= 2) {
|
|
||||||
return fetchUserFromSaved(saved)
|
|
||||||
} else if (assistantSwitcher.enabled()) {
|
|
||||||
return assistantSwitcher.requestUser(username).catch(error => {
|
|
||||||
if (error === constants.symbols.NO_ASSISTANTS_AVAILABLE) throw constants.symbols.RATE_LIMITED
|
|
||||||
else throw error
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw error
|
|
||||||
})
|
|
||||||
}
|
|
||||||
throw new Error(`Selected fetch mode ${mode} was unmatched.`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} username
|
* @param {string} username
|
||||||
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
|
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
|
||||||
*/
|
*/
|
||||||
function fetchUserFromHTML(username) {
|
function fetchUserFromIWeb(username) {
|
||||||
const blockedCacheConfig = constants.caching.self_blocked_status.user_html
|
const blockedCacheConfig = constants.caching.self_blocked_status.user_html
|
||||||
if (blockedCacheConfig) {
|
if (blockedCacheConfig) {
|
||||||
if (history.store.has("user")) {
|
if (history.store.has("user")) {
|
||||||
@ -99,56 +52,44 @@ function fetchUserFromHTML(username) {
|
|||||||
let quotaUsed = 0
|
let quotaUsed = 0
|
||||||
return userRequestCache.getOrFetch("user/"+username, false, true, () => {
|
return userRequestCache.getOrFetch("user/"+username, false, true, () => {
|
||||||
quotaUsed++
|
quotaUsed++
|
||||||
return switcher.request("user_html", `https://www.instagram.com/${username}/feed/`, async res => {
|
const params = new URLSearchParams({username})
|
||||||
|
return switcher.request("user_html", `https://i.instagram.com/api/v1/users/web_profile_info/?${params}`, async res => {
|
||||||
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
|
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
|
||||||
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
|
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
|
||||||
if (res.status === 429) throw constants.symbols.RATE_LIMITED
|
if (res.status === 429) throw constants.symbols.RATE_LIMITED
|
||||||
return res
|
return res
|
||||||
}).then(async g => {
|
}).then(async g => {
|
||||||
const res = await g.response()
|
const res = await g.response()
|
||||||
if (res.status === 404) {
|
const json = await g.json()
|
||||||
throw constants.symbols.NOT_FOUND
|
// require down here or have to deal with require loop. require cache will take care of it anyway.
|
||||||
} else {
|
// User -> Timeline -> TimelineEntry -> collectors -/> User
|
||||||
const text = await g.text()
|
const User = require("./structures/User")
|
||||||
// require down here or have to deal with require loop. require cache will take care of it anyway.
|
const user = new User(json.data.user)
|
||||||
// User -> Timeline -> TimelineEntry -> collectors -/> User
|
history.report("user", true)
|
||||||
const User = require("./structures/User")
|
// sure, cache the user info. why not.
|
||||||
const result = extractSharedData(text)
|
if (constants.caching.db_user_id) {
|
||||||
if (result.status === constants.symbols.extractor_results.SUCCESS) {
|
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
|
||||||
const sharedData = result.value
|
db.prepare(
|
||||||
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
|
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
||||||
history.report("user", true)
|
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
||||||
if (constants.caching.db_user_id) {
|
).run({
|
||||||
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
|
username: user.data.username,
|
||||||
db.prepare(
|
user_id: user.data.id,
|
||||||
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
|
||||||
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
updated: Date.now(),
|
||||||
).run({
|
updated_version: constants.database_version,
|
||||||
username: user.data.username,
|
biography: user.data.biography || null,
|
||||||
user_id: user.data.id,
|
post_count: user.posts || 0,
|
||||||
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
|
following_count: user.following || 0,
|
||||||
updated: Date.now(),
|
followed_by_count: user.followedBy || 0,
|
||||||
updated_version: constants.database_version,
|
external_url: user.data.external_url || null,
|
||||||
biography: user.data.biography || null,
|
full_name: user.data.full_name || null,
|
||||||
post_count: user.posts || 0,
|
is_private: +user.data.is_private,
|
||||||
following_count: user.following || 0,
|
is_verified: +user.data.is_verified,
|
||||||
followed_by_count: user.followedBy || 0,
|
profile_pic_url: user.data.profile_pic_url
|
||||||
external_url: user.data.external_url || null,
|
})
|
||||||
full_name: user.data.full_name || null,
|
|
||||||
is_private: +user.data.is_private,
|
|
||||||
is_verified: +user.data.is_verified,
|
|
||||||
profile_pic_url: user.data.profile_pic_url
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return user
|
|
||||||
} else if (result.status === constants.symbols.extractor_results.AGE_RESTRICTED) {
|
|
||||||
// I don't like this code.
|
|
||||||
history.report("user", true)
|
|
||||||
throw constants.symbols.extractor_results.AGE_RESTRICTED
|
|
||||||
} else {
|
|
||||||
throw result.status
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return user
|
||||||
}).catch(error => {
|
}).catch(error => {
|
||||||
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
|
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
|
||||||
history.report("user", false, error)
|
history.report("user", false, error)
|
||||||
@ -239,36 +180,6 @@ function fetchUserFromCombined(userID, username) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
function fetchUserFromSaved(saved) {
|
|
||||||
let quotaUsed = 0
|
|
||||||
return userRequestCache.getOrFetch("user/"+saved.username, false, true, async () => {
|
|
||||||
// require down here or have to deal with require loop. require cache will take care of it anyway.
|
|
||||||
// ReelUser -> Timeline -> TimelineEntry -> collectors -/> ReelUser
|
|
||||||
const ReelUser = require("./structures/ReelUser")
|
|
||||||
const user = new ReelUser({
|
|
||||||
username: saved.username,
|
|
||||||
id: saved.user_id,
|
|
||||||
biography: saved.biography,
|
|
||||||
edge_follow: {count: saved.following_count},
|
|
||||||
edge_followed_by: {count: saved.followed_by_count},
|
|
||||||
external_url: saved.external_url,
|
|
||||||
full_name: saved.full_name,
|
|
||||||
is_private: !!saved.is_private,
|
|
||||||
is_verified: !!saved.is_verified,
|
|
||||||
profile_pic_url: saved.profile_pic_url
|
|
||||||
})
|
|
||||||
// Add first timeline page
|
|
||||||
if (!user.timeline.pages[0]) {
|
|
||||||
const {result: page, fromCache} = await fetchTimelinePage(user.data.id, "")
|
|
||||||
if (!fromCache) quotaUsed++
|
|
||||||
user.timeline.addPage(page)
|
|
||||||
}
|
|
||||||
return user
|
|
||||||
}).then(user => {
|
|
||||||
return {user, quotaUsed}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} userID
|
* @param {string} userID
|
||||||
* @param {string} after
|
* @param {string} after
|
||||||
@ -548,6 +459,5 @@ module.exports.timelineEntryCache = timelineEntryCache
|
|||||||
module.exports.getOrFetchShortcode = getOrFetchShortcode
|
module.exports.getOrFetchShortcode = getOrFetchShortcode
|
||||||
module.exports.updateProfilePictureFromReel = updateProfilePictureFromReel
|
module.exports.updateProfilePictureFromReel = updateProfilePictureFromReel
|
||||||
module.exports.history = history
|
module.exports.history = history
|
||||||
module.exports.fetchUserFromSaved = fetchUserFromSaved
|
|
||||||
module.exports.assistantSwitcher = assistantSwitcher
|
module.exports.assistantSwitcher = assistantSwitcher
|
||||||
module.exports.verifyUserPair = verifyUserPair
|
module.exports.verifyUserPair = verifyUserPair
|
||||||
|
@ -41,7 +41,7 @@ let constants = {
|
|||||||
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
|
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
|
||||||
does_not_track: false,
|
does_not_track: false,
|
||||||
|
|
||||||
allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer", "onlyPreferSaved", "preferForRSS"
|
allow_user_from_reel: "iweb", // legacy. this must be "iweb" now.
|
||||||
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
|
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
|
||||||
image: true,
|
image: true,
|
||||||
video: true,
|
video: true,
|
||||||
@ -155,7 +155,7 @@ let constants = {
|
|||||||
replaceEmptyWithDefault: false
|
replaceEmptyWithDefault: false
|
||||||
},{
|
},{
|
||||||
name: "infinite_scroll",
|
name: "infinite_scroll",
|
||||||
default: "normal",
|
default: "off",
|
||||||
boolean: false,
|
boolean: false,
|
||||||
replaceEmptyWithDefault: true
|
replaceEmptyWithDefault: true
|
||||||
},{
|
},{
|
||||||
@ -239,7 +239,7 @@ let constants = {
|
|||||||
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
|
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
|
||||||
external: {
|
external: {
|
||||||
reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
|
reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
|
||||||
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
|
timeline_query_hash: "69cba40317214236af40e7efa697781d",
|
||||||
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
|
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
|
||||||
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",
|
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",
|
||||||
igtv_query_hash: "bc78b344a68ed16dd5d7f264681c4c76",
|
igtv_query_hash: "bc78b344a68ed16dd5d7f264681c4c76",
|
||||||
|
@ -4,7 +4,12 @@ const SavedRequestManager = require("./saved_requests/manager")
|
|||||||
|
|
||||||
const constants = require("../constants")
|
const constants = require("../constants")
|
||||||
|
|
||||||
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
|
const userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0"
|
||||||
|
|
||||||
|
const headers = {
|
||||||
|
"User-Agent": userAgent,
|
||||||
|
"X-IG-App-ID": 936619743392459 // needed for profile iweb to work
|
||||||
|
}
|
||||||
|
|
||||||
const backendStatusLineMap = new Map([
|
const backendStatusLineMap = new Map([
|
||||||
["node-fetch", "NF "],
|
["node-fetch", "NF "],
|
||||||
@ -23,16 +28,12 @@ function request(url, options = {}, settings = {}) {
|
|||||||
|
|
||||||
if (constants.request_backend === "node-fetch") {
|
if (constants.request_backend === "node-fetch") {
|
||||||
return new NodeFetch(url, Object.assign({
|
return new NodeFetch(url, Object.assign({
|
||||||
headers: {
|
headers,
|
||||||
"User-Agent": userAgent
|
|
||||||
},
|
|
||||||
redirect: "manual"
|
redirect: "manual"
|
||||||
}, options))
|
}, options))
|
||||||
} else if (constants.request_backend === "got") {
|
} else if (constants.request_backend === "got") {
|
||||||
return new Got(url, Object.assign({
|
return new Got(url, Object.assign({
|
||||||
headers: {
|
headers,
|
||||||
"User-Agent": userAgent
|
|
||||||
},
|
|
||||||
followRedirect: false,
|
followRedirect: false,
|
||||||
throwHttpErrors: false
|
throwHttpErrors: false
|
||||||
}, options))
|
}, options))
|
||||||
|
@ -47,11 +47,14 @@ class NextPage extends FreezeWidth {
|
|||||||
this.controller = controller
|
this.controller = controller
|
||||||
this.clicked = false
|
this.clicked = false
|
||||||
this.nextPageNumber = +this.element.getAttribute("data-page")
|
this.nextPageNumber = +this.element.getAttribute("data-page")
|
||||||
|
this.auto = this.element.getAttribute("data-auto")
|
||||||
this.attribute("href", "javascript:void(0)")
|
this.attribute("href", "javascript:void(0)")
|
||||||
this.event("click", event => this.onClick(event))
|
this.event("click", event => this.onClick(event))
|
||||||
|
|
||||||
this.observer = new IntersectionObserver(entries => this.onIntersect(entries), {rootMargin: "0px", threshold: intersectionThreshold})
|
this.observer = new IntersectionObserver(entries => this.onIntersect(entries), {rootMargin: "0px", threshold: intersectionThreshold})
|
||||||
this.observer.observe(this.element)
|
if (this.auto !== "off") {
|
||||||
|
this.observer.observe(this.element)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
onClick(event) {
|
onClick(event) {
|
||||||
|
@ -10,6 +10,7 @@ mixin next_page_button(user, selectedTimeline, url, type)
|
|||||||
data-username=(user.data.username)
|
data-username=(user.data.username)
|
||||||
data-type=type
|
data-type=type
|
||||||
data-loading-text=ll.next_page_button_loading
|
data-loading-text=ll.next_page_button_loading
|
||||||
|
data-auto=settings.infinite_scroll
|
||||||
)#next-page.next-page= ll.next_page_button
|
)#next-page.next-page= ll.next_page_button
|
||||||
else
|
else
|
||||||
div
|
div
|
||||||
|
@ -102,9 +102,8 @@ html(dir=ll.meta_direction, lang=settings.language)
|
|||||||
|
|
||||||
+checkbox("spa", ll.fast_navigation, ll.t_enabled, false)
|
+checkbox("spa", ll.fast_navigation, ll.t_enabled, false)
|
||||||
|
|
||||||
+select("infinite_scroll", ll.infinite_scroll, true, [
|
+select("infinite_scroll", ll.infinite_scroll, false, [
|
||||||
{value: "normal", text: ll.t_normal},
|
{value: "normal", text: ll.t_normal},
|
||||||
{value: "eager", text: ll.t_eager},
|
|
||||||
{value: "off", text: ll.t_manual}
|
{value: "off", text: ll.t_manual}
|
||||||
])
|
])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user