mirror of
https://git.sr.ht/~cadence/bibliogram
synced 2025-01-06 20:16:58 +00:00
Restore HTML request method with preload extractor
This commit is contained in:
parent
48c6e4a8a6
commit
ab58306cee
@ -1,12 +1,12 @@
|
|||||||
const constants = require("./constants")
|
const constants = require("./constants")
|
||||||
const {request} = require("./utils/request")
|
const {request} = require("./utils/request")
|
||||||
const switcher = require("./utils/torswitcher")
|
const switcher = require("./utils/torswitcher")
|
||||||
const {extractSharedData} = require("./utils/body")
|
const {extractPreloader} = require("./utils/body")
|
||||||
const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
|
const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
|
||||||
const RequestHistory = require("./structures/RequestHistory")
|
const RequestHistory = require("./structures/RequestHistory")
|
||||||
const fhp = require("fast-html-parser")
|
const fhp = require("fast-html-parser")
|
||||||
const db = require("./db")
|
const db = require("./db")
|
||||||
require("./testimports")(constants, request, extractSharedData, UserRequestCache, RequestHistory, db)
|
require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db)
|
||||||
|
|
||||||
const requestCache = new RequestCache(constants.caching.resource_cache_time)
|
const requestCache = new RequestCache(constants.caching.resource_cache_time)
|
||||||
/** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */
|
/** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */
|
||||||
@ -30,11 +30,84 @@ async function fetchUser(username, context) {
|
|||||||
let mode = constants.allow_user_from_reel
|
let mode = constants.allow_user_from_reel
|
||||||
if (mode === "iweb") {
|
if (mode === "iweb") {
|
||||||
return fetchUserFromIWeb(username)
|
return fetchUserFromIWeb(username)
|
||||||
|
} else if (mode === "html") {
|
||||||
|
return fetchUserFromHTML(username)
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
|
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} username
|
||||||
|
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
|
||||||
|
*/
|
||||||
|
function fetchUserFromHTML(username) {
|
||||||
|
const blockedCacheConfig = constants.caching.self_blocked_status.user_html
|
||||||
|
if (blockedCacheConfig) {
|
||||||
|
if (history.store.has("user")) {
|
||||||
|
const entry = history.store.get("user")
|
||||||
|
if (!entry.lastRequestSuccessful && Date.now() < entry.lastRequestAt + blockedCacheConfig.time) {
|
||||||
|
return Promise.reject(entry.kind || constants.symbols.RATE_LIMITED)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let quotaUsed = 0
|
||||||
|
return userRequestCache.getOrFetch("user/"+username, false, true, () => {
|
||||||
|
quotaUsed++
|
||||||
|
return switcher.request("user_html", `https://www.instagram.com/${username}/feed/`, async res => {
|
||||||
|
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
|
||||||
|
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
|
||||||
|
if (res.status === 429) throw constants.symbols.RATE_LIMITED
|
||||||
|
return res
|
||||||
|
}).then(async g => {
|
||||||
|
const res = await g.response()
|
||||||
|
if (res.status === 404) {
|
||||||
|
throw constants.symbols.NOT_FOUND
|
||||||
|
} else {
|
||||||
|
const text = await g.text()
|
||||||
|
// require down here or have to deal with require loop. require cache will take care of it anyway.
|
||||||
|
// User -> Timeline -> TimelineEntry -> collectors -/> User
|
||||||
|
const User = require("./structures/User")
|
||||||
|
const preloader = extractPreloader(text)
|
||||||
|
const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/")
|
||||||
|
if (!profileInfoResponse) {
|
||||||
|
throw new Error("No profile info in the preloader.")
|
||||||
|
}
|
||||||
|
const user = new User(JSON.parse(profileInfoResponse.result.response).data.user)
|
||||||
|
history.report("user", true)
|
||||||
|
if (constants.caching.db_user_id) {
|
||||||
|
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
|
||||||
|
db.prepare(
|
||||||
|
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
||||||
|
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
||||||
|
).run({
|
||||||
|
username: user.data.username,
|
||||||
|
user_id: user.data.id,
|
||||||
|
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
|
||||||
|
updated: Date.now(),
|
||||||
|
updated_version: constants.database_version,
|
||||||
|
biography: user.data.biography || null,
|
||||||
|
post_count: user.posts || 0,
|
||||||
|
following_count: user.following || 0,
|
||||||
|
followed_by_count: user.followedBy || 0,
|
||||||
|
external_url: user.data.external_url || null,
|
||||||
|
full_name: user.data.full_name || null,
|
||||||
|
is_private: +user.data.is_private,
|
||||||
|
is_verified: +user.data.is_verified,
|
||||||
|
profile_pic_url: user.data.profile_pic_url
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return user
|
||||||
|
}
|
||||||
|
}).catch(error => {
|
||||||
|
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
|
||||||
|
history.report("user", false, error)
|
||||||
|
}
|
||||||
|
throw error
|
||||||
|
})
|
||||||
|
}).then(user => ({user, quotaUsed}))
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} username
|
* @param {string} username
|
||||||
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
|
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
|
||||||
|
@ -41,7 +41,7 @@ let constants = {
|
|||||||
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
|
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
|
||||||
does_not_track: false,
|
does_not_track: false,
|
||||||
|
|
||||||
allow_user_from_reel: "iweb", // legacy. this must be "iweb" now.
|
allow_user_from_reel: "html", // "iweb" or "html", whichever one works for you
|
||||||
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
|
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
|
||||||
image: true,
|
image: true,
|
||||||
video: true,
|
video: true,
|
||||||
@ -223,7 +223,7 @@ let constants = {
|
|||||||
csrf_time: 60*60*1000,
|
csrf_time: 60*60*1000,
|
||||||
self_blocked_status: {
|
self_blocked_status: {
|
||||||
user_html: {
|
user_html: {
|
||||||
enabled: true,
|
enabled: false, // enable this if you're using iweb method AND a high traffic instance
|
||||||
time: 60*60*1000
|
time: 60*60*1000
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -29,6 +29,23 @@ function extractSharedData(text) {
|
|||||||
return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData}
|
return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} text
|
||||||
|
* @returns {any}
|
||||||
|
*/
|
||||||
|
function extractPreloader(text) {
|
||||||
|
const entries = []
|
||||||
|
const parser = new Parser(text)
|
||||||
|
while (parser.seek('{"require":[["PolarisQueryPreloaderCache"', {moveToMatch: true, useEnd: true}) !== -1) {
|
||||||
|
if (parser.seek('{"complete":', {moveToMatch: true, useEnd: false}) !== -1) {
|
||||||
|
let details = parser.get({split: ',"status_code":'}) + "}}"
|
||||||
|
let data = JSON.parse(details)
|
||||||
|
entries.push(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} text
|
* @param {string} text
|
||||||
*/
|
*/
|
||||||
@ -45,3 +62,4 @@ function getRestrictedAge(text) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports.extractSharedData = extractSharedData
|
module.exports.extractSharedData = extractSharedData
|
||||||
|
module.exports.extractPreloader = extractPreloader
|
||||||
|
@ -8,7 +8,8 @@ const userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:102.0) Gecko/20100
|
|||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
"User-Agent": userAgent,
|
"User-Agent": userAgent,
|
||||||
"X-IG-App-ID": 936619743392459 // needed for profile iweb to work
|
"X-IG-App-ID": "936619743392459", // needed for profile iweb to work
|
||||||
|
"Sec-Fetch-Mode": "navigate", // needed for profile html to work
|
||||||
}
|
}
|
||||||
|
|
||||||
const backendStatusLineMap = new Map([
|
const backendStatusLineMap = new Map([
|
||||||
|
Loading…
Reference in New Issue
Block a user