From fd65ef664610a9d78d4f5f06bce7d358bb3e5b7d Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Fri, 2 Sep 2022 00:31:02 +1200 Subject: [PATCH] Feeble attempt to support the new multi-format This doesn't work, but it's a start. Somebody else can continue the effort. --- src/lib/collectors.js | 14 +++++--------- src/lib/constants.js | 2 +- src/lib/utils/body.js | 29 +++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/lib/collectors.js b/src/lib/collectors.js index c0e6342..82062ec 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -1,12 +1,12 @@ const constants = require("./constants") const {request} = require("./utils/request") const switcher = require("./utils/torswitcher") -const {extractPreloader} = require("./utils/body") +const {selectExtractor} = require("./utils/body") const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const RequestHistory = require("./structures/RequestHistory") const fhp = require("fast-html-parser") const db = require("./db") -require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db) +require("./testimports")(constants, request, selectExtractor, UserRequestCache, RequestHistory, db) const requestCache = new RequestCache(constants.caching.resource_cache_time) /** @type {import("./cache").UserRequestCache} */ @@ -34,7 +34,7 @@ async function fetchUser(username, context) { return fetchUserFromHTML(username) } - throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`) + throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please ask them to use the default fetch mode by omitting that setting.`) } /** @@ -68,12 +68,8 @@ function fetchUserFromHTML(username) { // require down here or have to deal with require loop. require cache will take care of it anyway. // User -> Timeline -> TimelineEntry -> collectors -/> User const User = require("./structures/User") - const preloader = extractPreloader(text) - const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/") - if (!profileInfoResponse) { - throw new Error("No profile info in the preloader.") - } - const user = new User(JSON.parse(profileInfoResponse.result.response).data.user) + const userData = selectExtractor(text) + const user = new User(userData) history.report("user", true) if (constants.caching.db_user_id) { const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) diff --git a/src/lib/constants.js b/src/lib/constants.js index 5675713..4d549a1 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -30,7 +30,7 @@ let constants = { password: null, // If `null`, Bibliogram will run its own Tor process instead. port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.) for: { - user_html: false, + user_html: true, timeline_graphql: false, post_graphql: false, reel_graphql: false diff --git a/src/lib/utils/body.js b/src/lib/utils/body.js index 819057c..3d2005c 100644 --- a/src/lib/utils/body.js +++ b/src/lib/utils/body.js @@ -1,6 +1,16 @@ const constants = require("../constants") const {Parser} = require("./parser/parser") +function selectExtractor(text) { + if (text.includes("window._sharedData = ")) { + return extractSharedData(text) + } else if (text.includes("PolarisQueryPreloaderCache")) { + return extractPreloader(text) + } else { + throw constants.symbols.extractor_results.NO_SHARED_DATA + } +} + /** * @param {string} text * @returns {{status: symbol, value: any}} @@ -12,21 +22,22 @@ function extractSharedData(text) { // Maybe the profile is age restricted? const age = getRestrictedAge(text) if (age !== null) { // Correct. - return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age} + throw constants.symbols.extractor_results.AGE_RESTRICTED } - return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null} + throw constants.symbols.extractor_results.NO_SHARED_DATA } parser.store() const end = parser.seek(";") parser.restore() const sharedDataString = parser.slice(end - parser.cursor) const sharedData = JSON.parse(sharedDataString) + console.log(sharedData) // check for alternate form of age restrictions if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) { - // lazy fix; ideally extracting the age should be done here, but for the web ui it doesn't matter - return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: null} + // ideally extracting the age should be done here, but for the web ui it doesn't matter + throw constants.symbols.extractor_results.AGE_RESTRICTED } - return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} + return sharedData.entry_data.ProfilePage[0].graphql.user } /** @@ -43,7 +54,12 @@ function extractPreloader(text) { entries.push(data) } } - return entries + // entries now has the things + const profileInfoResponse = entries.find(x => x.request.url === "/api/v1/users/web_profile_info/") + if (!profileInfoResponse) { + throw new Error("No profile info in the preloader.") + } + return JSON.parse(profileInfoResponse.result.response).data.user } /** @@ -61,5 +77,6 @@ function getRestrictedAge(text) { return +match[1] // the age } +module.exports.selectExtractor = selectExtractor module.exports.extractSharedData = extractSharedData module.exports.extractPreloader = extractPreloader