Compare commits

...

3 Commits

Author SHA1 Message Date
Cadence Ember a9c7923b2c
Sure, let's cache self_blocked_status again! 2022-07-25 01:59:06 +12:00
Cadence Ember f20554ddfb
Disable Tor by default.
Feel free to experiment and see if it helps if you enable it.
2022-07-25 01:57:44 +12:00
Cadence Ember c2d7aca1cb
Replace all profile fetching methods with IWeb
The previous HTML method is gone due to a page restructure. It was
able to consistently bypass Instagram's blocking.

The IWeb method has a few hundred uses per X time for selfhosters, and
a couple dozen uses per X time for servers. This will likely change in
the future. There is no known way to bypass Instagram's IWeb blocking.

Feel free to look for a way.

Further timeline pages are still blocked. The "next page" button
defaults to not automatically loading when scrolled, since it will
basically never work anyway. Users running personal instances may be
able to get a couple of uses out of it.
2022-07-25 01:57:44 +12:00
6 changed files with 59 additions and 145 deletions

View File

@ -28,65 +28,18 @@ async function fetchUser(username, context) {
}
let mode = constants.allow_user_from_reel
if (mode === "preferForRSS") {
if (context === constants.symbols.fetch_context.RSS) mode = "prefer"
else mode = "onlyPreferSaved"
if (mode === "iweb") {
return fetchUserFromIWeb(username)
}
if (context === constants.symbols.fetch_context.ASSISTANT) {
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
if (saved && saved.updated_version >= 2) {
return fetchUserFromSaved(saved)
} else {
return fetchUserFromHTML(username)
}
}
if (mode === "never") {
return fetchUserFromHTML(username)
}
if (mode === "prefer") {
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
if (saved && saved.updated_version >= 2) {
return fetchUserFromSaved(saved)
} else if (saved && saved.updated_version === 1) {
return fetchUserFromCombined(saved.user_id, saved.username)
} else {
return fetchUserFromHTML(username)
}
}
if (mode === "onlyPreferSaved") {
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
if (saved && saved.updated_version >= 2) {
return fetchUserFromSaved(saved)
} else {
mode = "fallback"
}
}
if (mode === "fallback") {
return fetchUserFromHTML(username).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
const saved = db.prepare("SELECT username, user_id, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url FROM Users WHERE username = ?").get(username)
if (saved && saved.updated_version === 1) {
return fetchUserFromCombined(saved.user_id, username)
} else if (saved && saved.updated_version >= 2) {
return fetchUserFromSaved(saved)
} else if (assistantSwitcher.enabled()) {
return assistantSwitcher.requestUser(username).catch(error => {
if (error === constants.symbols.NO_ASSISTANTS_AVAILABLE) throw constants.symbols.RATE_LIMITED
else throw error
})
}
}
throw error
})
}
throw new Error(`Selected fetch mode ${mode} was unmatched.`)
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
}
/**
* @param {string} username
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
*/
function fetchUserFromHTML(username) {
function fetchUserFromIWeb(username) {
const blockedCacheConfig = constants.caching.self_blocked_status.user_html
if (blockedCacheConfig) {
if (history.store.has("user")) {
@ -99,56 +52,44 @@ function fetchUserFromHTML(username) {
let quotaUsed = 0
return userRequestCache.getOrFetch("user/"+username, false, true, () => {
quotaUsed++
return switcher.request("user_html", `https://www.instagram.com/${username}/feed/`, async res => {
const params = new URLSearchParams({username})
return switcher.request("user_html", `https://i.instagram.com/api/v1/users/web_profile_info/?${params}`, async res => {
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(async g => {
const res = await g.response()
if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else {
const text = await g.text()
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const result = extractSharedData(text)
if (result.status === constants.symbols.extractor_results.SUCCESS) {
const sharedData = result.value
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
db.prepare(
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
).run({
username: user.data.username,
user_id: user.data.id,
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
updated: Date.now(),
updated_version: constants.database_version,
biography: user.data.biography || null,
post_count: user.posts || 0,
following_count: user.following || 0,
followed_by_count: user.followedBy || 0,
external_url: user.data.external_url || null,
full_name: user.data.full_name || null,
is_private: +user.data.is_private,
is_verified: +user.data.is_verified,
profile_pic_url: user.data.profile_pic_url
})
}
return user
} else if (result.status === constants.symbols.extractor_results.AGE_RESTRICTED) {
// I don't like this code.
history.report("user", true)
throw constants.symbols.extractor_results.AGE_RESTRICTED
} else {
throw result.status
}
const json = await g.json()
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const user = new User(json.data.user)
history.report("user", true)
// sure, cache the user info. why not.
if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
db.prepare(
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
).run({
username: user.data.username,
user_id: user.data.id,
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
updated: Date.now(),
updated_version: constants.database_version,
biography: user.data.biography || null,
post_count: user.posts || 0,
following_count: user.following || 0,
followed_by_count: user.followedBy || 0,
external_url: user.data.external_url || null,
full_name: user.data.full_name || null,
is_private: +user.data.is_private,
is_verified: +user.data.is_verified,
profile_pic_url: user.data.profile_pic_url
})
}
return user
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
history.report("user", false, error)
@ -239,36 +180,6 @@ function fetchUserFromCombined(userID, username) {
})
}
function fetchUserFromSaved(saved) {
let quotaUsed = 0
return userRequestCache.getOrFetch("user/"+saved.username, false, true, async () => {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// ReelUser -> Timeline -> TimelineEntry -> collectors -/> ReelUser
const ReelUser = require("./structures/ReelUser")
const user = new ReelUser({
username: saved.username,
id: saved.user_id,
biography: saved.biography,
edge_follow: {count: saved.following_count},
edge_followed_by: {count: saved.followed_by_count},
external_url: saved.external_url,
full_name: saved.full_name,
is_private: !!saved.is_private,
is_verified: !!saved.is_verified,
profile_pic_url: saved.profile_pic_url
})
// Add first timeline page
if (!user.timeline.pages[0]) {
const {result: page, fromCache} = await fetchTimelinePage(user.data.id, "")
if (!fromCache) quotaUsed++
user.timeline.addPage(page)
}
return user
}).then(user => {
return {user, quotaUsed}
})
}
/**
* @param {string} userID
* @param {string} after
@ -548,6 +459,5 @@ module.exports.timelineEntryCache = timelineEntryCache
module.exports.getOrFetchShortcode = getOrFetchShortcode
module.exports.updateProfilePictureFromReel = updateProfilePictureFromReel
module.exports.history = history
module.exports.fetchUserFromSaved = fetchUserFromSaved
module.exports.assistantSwitcher = assistantSwitcher
module.exports.verifyUserPair = verifyUserPair

View File

@ -26,11 +26,11 @@ let constants = {
// Things that server owners _could_ change if they want to.
tor: {
enabled: true, // If false, everything else in this block has no effect.
enabled: false, // If false, everything else in this block has no effect.
password: null, // If `null`, Bibliogram will run its own Tor process instead.
port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.)
for: {
user_html: true,
user_html: false,
timeline_graphql: false,
post_graphql: false,
reel_graphql: false
@ -41,7 +41,7 @@ let constants = {
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
does_not_track: false,
allow_user_from_reel: "fallback", // one of: "never", "fallback", "prefer", "onlyPreferSaved", "preferForRSS"
allow_user_from_reel: "iweb", // legacy. this must be "iweb" now.
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
image: true,
video: true,
@ -155,7 +155,7 @@ let constants = {
replaceEmptyWithDefault: false
},{
name: "infinite_scroll",
default: "normal",
default: "off",
boolean: false,
replaceEmptyWithDefault: true
},{
@ -223,12 +223,12 @@ let constants = {
csrf_time: 60*60*1000,
self_blocked_status: {
user_html: {
enabled: false,
time: 15*60*1000
enabled: true,
time: 2*60*60*1000
},
timeline_graphql: {
enabled: false,
time: 24*60*60*1000
enabled: true,
time: 2*60*60*1000
}
},
db_user_id: true,
@ -239,7 +239,7 @@ let constants = {
// Instagram uses this stuff. This shouldn't be changed, except to fix a bug that hasn't yet been fixed upstream.
external: {
reel_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
timeline_query_hash: "69cba40317214236af40e7efa697781d",
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",
igtv_query_hash: "bc78b344a68ed16dd5d7f264681c4c76",

View File

@ -4,7 +4,12 @@ const SavedRequestManager = require("./saved_requests/manager")
const constants = require("../constants")
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
const userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0"
const headers = {
"User-Agent": userAgent,
"X-IG-App-ID": 936619743392459 // needed for profile iweb to work
}
const backendStatusLineMap = new Map([
["node-fetch", "NF "],
@ -23,16 +28,12 @@ function request(url, options = {}, settings = {}) {
if (constants.request_backend === "node-fetch") {
return new NodeFetch(url, Object.assign({
headers: {
"User-Agent": userAgent
},
headers,
redirect: "manual"
}, options))
} else if (constants.request_backend === "got") {
return new Got(url, Object.assign({
headers: {
"User-Agent": userAgent
},
headers,
followRedirect: false,
throwHttpErrors: false
}, options))

View File

@ -47,11 +47,14 @@ class NextPage extends FreezeWidth {
this.controller = controller
this.clicked = false
this.nextPageNumber = +this.element.getAttribute("data-page")
this.auto = this.element.getAttribute("data-auto")
this.attribute("href", "javascript:void(0)")
this.event("click", event => this.onClick(event))
this.observer = new IntersectionObserver(entries => this.onIntersect(entries), {rootMargin: "0px", threshold: intersectionThreshold})
this.observer.observe(this.element)
if (this.auto !== "off") {
this.observer.observe(this.element)
}
}
onClick(event) {

View File

@ -10,6 +10,7 @@ mixin next_page_button(user, selectedTimeline, url, type)
data-username=(user.data.username)
data-type=type
data-loading-text=ll.next_page_button_loading
data-auto=settings.infinite_scroll
)#next-page.next-page= ll.next_page_button
else
div

View File

@ -102,9 +102,8 @@ html(dir=ll.meta_direction, lang=settings.language)
+checkbox("spa", ll.fast_navigation, ll.t_enabled, false)
+select("infinite_scroll", ll.infinite_scroll, true, [
+select("infinite_scroll", ll.infinite_scroll, false, [
{value: "normal", text: ll.t_normal},
{value: "eager", text: ll.t_eager},
{value: "off", text: ll.t_manual}
])