diff --git a/src/lib/collectors.js b/src/lib/collectors.js index 82062ec..c0e6342 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -1,12 +1,12 @@ const constants = require("./constants") const {request} = require("./utils/request") const switcher = require("./utils/torswitcher") -const {selectExtractor} = require("./utils/body") +const {extractPreloader} = require("./utils/body") const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const RequestHistory = require("./structures/RequestHistory") const fhp = require("fast-html-parser") const db = require("./db") -require("./testimports")(constants, request, selectExtractor, UserRequestCache, RequestHistory, db) +require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db) const requestCache = new RequestCache(constants.caching.resource_cache_time) /** @type {import("./cache").UserRequestCache} */ @@ -34,7 +34,7 @@ async function fetchUser(username, context) { return fetchUserFromHTML(username) } - throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please ask them to use the default fetch mode by omitting that setting.`) + throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`) } /** @@ -68,8 +68,12 @@ function fetchUserFromHTML(username) { // require down here or have to deal with require loop. require cache will take care of it anyway. // User -> Timeline -> TimelineEntry -> collectors -/> User const User = require("./structures/User") - const userData = selectExtractor(text) - const user = new User(userData) + const preloader = extractPreloader(text) + const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/") + if (!profileInfoResponse) { + throw new Error("No profile info in the preloader.") + } + const user = new User(JSON.parse(profileInfoResponse.result.response).data.user) history.report("user", true) if (constants.caching.db_user_id) { const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) diff --git a/src/lib/constants.js b/src/lib/constants.js index 4d549a1..5675713 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -30,7 +30,7 @@ let constants = { password: null, // If `null`, Bibliogram will run its own Tor process instead. port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.) for: { - user_html: true, + user_html: false, timeline_graphql: false, post_graphql: false, reel_graphql: false diff --git a/src/lib/utils/body.js b/src/lib/utils/body.js index 3d2005c..819057c 100644 --- a/src/lib/utils/body.js +++ b/src/lib/utils/body.js @@ -1,16 +1,6 @@ const constants = require("../constants") const {Parser} = require("./parser/parser") -function selectExtractor(text) { - if (text.includes("window._sharedData = ")) { - return extractSharedData(text) - } else if (text.includes("PolarisQueryPreloaderCache")) { - return extractPreloader(text) - } else { - throw constants.symbols.extractor_results.NO_SHARED_DATA - } -} - /** * @param {string} text * @returns {{status: symbol, value: any}} @@ -22,22 +12,21 @@ function extractSharedData(text) { // Maybe the profile is age restricted? const age = getRestrictedAge(text) if (age !== null) { // Correct. - throw constants.symbols.extractor_results.AGE_RESTRICTED + return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age} } - throw constants.symbols.extractor_results.NO_SHARED_DATA + return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null} } parser.store() const end = parser.seek(";") parser.restore() const sharedDataString = parser.slice(end - parser.cursor) const sharedData = JSON.parse(sharedDataString) - console.log(sharedData) // check for alternate form of age restrictions if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) { - // ideally extracting the age should be done here, but for the web ui it doesn't matter - throw constants.symbols.extractor_results.AGE_RESTRICTED + // lazy fix; ideally extracting the age should be done here, but for the web ui it doesn't matter + return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: null} } - return sharedData.entry_data.ProfilePage[0].graphql.user + return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} } /** @@ -54,12 +43,7 @@ function extractPreloader(text) { entries.push(data) } } - // entries now has the things - const profileInfoResponse = entries.find(x => x.request.url === "/api/v1/users/web_profile_info/") - if (!profileInfoResponse) { - throw new Error("No profile info in the preloader.") - } - return JSON.parse(profileInfoResponse.result.response).data.user + return entries } /** @@ -77,6 +61,5 @@ function getRestrictedAge(text) { return +match[1] // the age } -module.exports.selectExtractor = selectExtractor module.exports.extractSharedData = extractSharedData module.exports.extractPreloader = extractPreloader diff --git a/src/lib/utils/tor.js b/src/lib/utils/tor.js index 900217d..517697f 100644 --- a/src/lib/utils/tor.js +++ b/src/lib/utils/tor.js @@ -4,8 +4,6 @@ const constants = require("../constants") const {request} = require("./request") const {RequestCache} = require("../cache") -let circuitIndex = 0 - class TorManager { /** * @param {import("@deadcanaries/granax/lib/controller")} tor @@ -22,16 +20,12 @@ class TorManager { let done = false let g while (!done) { - const circuitIndexUsed = circuitIndex g = await request(url, {agent: this.agent}, {log: true, statusLine: "TOR"}) try { await g.check(test) break } catch (e) { - if (circuitIndexUsed === circuitIndex) { - circuitIndex++ - await this.newCircuit() - } + await this.newCircuit() } } return g diff --git a/src/site/sass/includes/_main.sass b/src/site/sass/includes/_main.sass index 129d661..504b93d 100644 --- a/src/site/sass/includes/_main.sass +++ b/src/site/sass/includes/_main.sass @@ -15,7 +15,10 @@ $theme: () !default src: url(/static/fonts/cantarell-#{$weight}.otf) format("opentype") font-display: swap // prefer a fallback font until the font file is loaded -body, body.use-boring-font +body + font-family: "Bariol", sans-serif + +body.use-boring-font // this is useful so that cyrillic and other scripts don't look conspicuous compared to nearby latin letters. // the use-boring-font class is activated based on the page and heuristics of its contents font-family: "Cantarell", sans-serif