1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2025-01-06 20:16:58 +00:00

Restore HTML request method with preload extractor

This commit is contained in:
Cadence Ember 2022-07-28 23:31:05 +12:00
parent 48c6e4a8a6
commit ab58306cee
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17
4 changed files with 97 additions and 5 deletions

View File

@ -1,12 +1,12 @@
const constants = require("./constants") const constants = require("./constants")
const {request} = require("./utils/request") const {request} = require("./utils/request")
const switcher = require("./utils/torswitcher") const switcher = require("./utils/torswitcher")
const {extractSharedData} = require("./utils/body") const {extractPreloader} = require("./utils/body")
const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory") const RequestHistory = require("./structures/RequestHistory")
const fhp = require("fast-html-parser") const fhp = require("fast-html-parser")
const db = require("./db") const db = require("./db")
require("./testimports")(constants, request, extractSharedData, UserRequestCache, RequestHistory, db) require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db)
const requestCache = new RequestCache(constants.caching.resource_cache_time) const requestCache = new RequestCache(constants.caching.resource_cache_time)
/** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */ /** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */
@ -30,11 +30,84 @@ async function fetchUser(username, context) {
let mode = constants.allow_user_from_reel let mode = constants.allow_user_from_reel
if (mode === "iweb") { if (mode === "iweb") {
return fetchUserFromIWeb(username) return fetchUserFromIWeb(username)
} else if (mode === "html") {
return fetchUserFromHTML(username)
} }
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`) throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
} }
/**
* @param {string} username
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}
*/
function fetchUserFromHTML(username) {
const blockedCacheConfig = constants.caching.self_blocked_status.user_html
if (blockedCacheConfig) {
if (history.store.has("user")) {
const entry = history.store.get("user")
if (!entry.lastRequestSuccessful && Date.now() < entry.lastRequestAt + blockedCacheConfig.time) {
return Promise.reject(entry.kind || constants.symbols.RATE_LIMITED)
}
}
}
let quotaUsed = 0
return userRequestCache.getOrFetch("user/"+username, false, true, () => {
quotaUsed++
return switcher.request("user_html", `https://www.instagram.com/${username}/feed/`, async res => {
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(async g => {
const res = await g.response()
if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else {
const text = await g.text()
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const preloader = extractPreloader(text)
const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/")
if (!profileInfoResponse) {
throw new Error("No profile info in the preloader.")
}
const user = new User(JSON.parse(profileInfoResponse.result.response).data.user)
history.report("user", true)
if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
db.prepare(
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
).run({
username: user.data.username,
user_id: user.data.id,
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
updated: Date.now(),
updated_version: constants.database_version,
biography: user.data.biography || null,
post_count: user.posts || 0,
following_count: user.following || 0,
followed_by_count: user.followedBy || 0,
external_url: user.data.external_url || null,
full_name: user.data.full_name || null,
is_private: +user.data.is_private,
is_verified: +user.data.is_verified,
profile_pic_url: user.data.profile_pic_url
})
}
return user
}
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
history.report("user", false, error)
}
throw error
})
}).then(user => ({user, quotaUsed}))
}
/** /**
* @param {string} username * @param {string} username
* @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>} * @returns {Promise<{user: import("./structures/User"), quotaUsed: number}>}

View File

@ -41,7 +41,7 @@ let constants = {
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain. // change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
does_not_track: false, does_not_track: false,
allow_user_from_reel: "iweb", // legacy. this must be "iweb" now. allow_user_from_reel: "html", // "iweb" or "html", whichever one works for you
proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains. proxy_media: { // Whether to proxy media (images, videos, thumbnails) through Bibliogram. This is strongly recommended to protect user privacy. If proxy is turned off, some browser content blockers may break all images since they are served from Facebook domains.
image: true, image: true,
video: true, video: true,
@ -223,7 +223,7 @@ let constants = {
csrf_time: 60*60*1000, csrf_time: 60*60*1000,
self_blocked_status: { self_blocked_status: {
user_html: { user_html: {
enabled: true, enabled: false, // enable this if you're using iweb method AND a high traffic instance
time: 60*60*1000 time: 60*60*1000
}, },
}, },

View File

@ -29,6 +29,23 @@ function extractSharedData(text) {
return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData}
} }
/**
* @param {string} text
* @returns {any}
*/
function extractPreloader(text) {
const entries = []
const parser = new Parser(text)
while (parser.seek('{"require":[["PolarisQueryPreloaderCache"', {moveToMatch: true, useEnd: true}) !== -1) {
if (parser.seek('{"complete":', {moveToMatch: true, useEnd: false}) !== -1) {
let details = parser.get({split: ',"status_code":'}) + "}}"
let data = JSON.parse(details)
entries.push(data)
}
}
return entries
}
/** /**
* @param {string} text * @param {string} text
*/ */
@ -45,3 +62,4 @@ function getRestrictedAge(text) {
} }
module.exports.extractSharedData = extractSharedData module.exports.extractSharedData = extractSharedData
module.exports.extractPreloader = extractPreloader

View File

@ -8,7 +8,8 @@ const userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:102.0) Gecko/20100
const headers = { const headers = {
"User-Agent": userAgent, "User-Agent": userAgent,
"X-IG-App-ID": 936619743392459 // needed for profile iweb to work "X-IG-App-ID": "936619743392459", // needed for profile iweb to work
"Sec-Fetch-Mode": "navigate", // needed for profile html to work
} }
const backendStatusLineMap = new Map([ const backendStatusLineMap = new Map([