From c2df6d696b15a20fb68b5ad2356a43b0aebae29e Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 2 Aug 2022 19:58:52 +1200 Subject: [PATCH 1/3] Only build new circuits when really needed --- src/lib/utils/tor.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib/utils/tor.js b/src/lib/utils/tor.js index 517697f..900217d 100644 --- a/src/lib/utils/tor.js +++ b/src/lib/utils/tor.js @@ -4,6 +4,8 @@ const constants = require("../constants") const {request} = require("./request") const {RequestCache} = require("../cache") +let circuitIndex = 0 + class TorManager { /** * @param {import("@deadcanaries/granax/lib/controller")} tor @@ -20,12 +22,16 @@ class TorManager { let done = false let g while (!done) { + const circuitIndexUsed = circuitIndex g = await request(url, {agent: this.agent}, {log: true, statusLine: "TOR"}) try { await g.check(test) break } catch (e) { - await this.newCircuit() + if (circuitIndexUsed === circuitIndex) { + circuitIndex++ + await this.newCircuit() + } } } return g From 39c8d7102f2dcd6bb9dccbe8b2b9b52902efdd18 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Thu, 11 Aug 2022 15:38:09 +1200 Subject: [PATCH 2/3] Always use Cantarell font --- src/site/sass/includes/_main.sass | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/site/sass/includes/_main.sass b/src/site/sass/includes/_main.sass index 504b93d..129d661 100644 --- a/src/site/sass/includes/_main.sass +++ b/src/site/sass/includes/_main.sass @@ -15,10 +15,7 @@ $theme: () !default src: url(/static/fonts/cantarell-#{$weight}.otf) format("opentype") font-display: swap // prefer a fallback font until the font file is loaded -body - font-family: "Bariol", sans-serif - -body.use-boring-font +body, body.use-boring-font // this is useful so that cyrillic and other scripts don't look conspicuous compared to nearby latin letters. // the use-boring-font class is activated based on the page and heuristics of its contents font-family: "Cantarell", sans-serif From fd65ef664610a9d78d4f5f06bce7d358bb3e5b7d Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Fri, 2 Sep 2022 00:31:02 +1200 Subject: [PATCH 3/3] Feeble attempt to support the new multi-format This doesn't work, but it's a start. Somebody else can continue the effort. --- src/lib/collectors.js | 14 +++++--------- src/lib/constants.js | 2 +- src/lib/utils/body.js | 29 +++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/lib/collectors.js b/src/lib/collectors.js index c0e6342..82062ec 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -1,12 +1,12 @@ const constants = require("./constants") const {request} = require("./utils/request") const switcher = require("./utils/torswitcher") -const {extractPreloader} = require("./utils/body") +const {selectExtractor} = require("./utils/body") const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const RequestHistory = require("./structures/RequestHistory") const fhp = require("fast-html-parser") const db = require("./db") -require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db) +require("./testimports")(constants, request, selectExtractor, UserRequestCache, RequestHistory, db) const requestCache = new RequestCache(constants.caching.resource_cache_time) /** @type {import("./cache").UserRequestCache} */ @@ -34,7 +34,7 @@ async function fetchUser(username, context) { return fetchUserFromHTML(username) } - throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`) + throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please ask them to use the default fetch mode by omitting that setting.`) } /** @@ -68,12 +68,8 @@ function fetchUserFromHTML(username) { // require down here or have to deal with require loop. require cache will take care of it anyway. // User -> Timeline -> TimelineEntry -> collectors -/> User const User = require("./structures/User") - const preloader = extractPreloader(text) - const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/") - if (!profileInfoResponse) { - throw new Error("No profile info in the preloader.") - } - const user = new User(JSON.parse(profileInfoResponse.result.response).data.user) + const userData = selectExtractor(text) + const user = new User(userData) history.report("user", true) if (constants.caching.db_user_id) { const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) diff --git a/src/lib/constants.js b/src/lib/constants.js index 5675713..4d549a1 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -30,7 +30,7 @@ let constants = { password: null, // If `null`, Bibliogram will run its own Tor process instead. port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.) for: { - user_html: false, + user_html: true, timeline_graphql: false, post_graphql: false, reel_graphql: false diff --git a/src/lib/utils/body.js b/src/lib/utils/body.js index 819057c..3d2005c 100644 --- a/src/lib/utils/body.js +++ b/src/lib/utils/body.js @@ -1,6 +1,16 @@ const constants = require("../constants") const {Parser} = require("./parser/parser") +function selectExtractor(text) { + if (text.includes("window._sharedData = ")) { + return extractSharedData(text) + } else if (text.includes("PolarisQueryPreloaderCache")) { + return extractPreloader(text) + } else { + throw constants.symbols.extractor_results.NO_SHARED_DATA + } +} + /** * @param {string} text * @returns {{status: symbol, value: any}} @@ -12,21 +22,22 @@ function extractSharedData(text) { // Maybe the profile is age restricted? const age = getRestrictedAge(text) if (age !== null) { // Correct. - return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age} + throw constants.symbols.extractor_results.AGE_RESTRICTED } - return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null} + throw constants.symbols.extractor_results.NO_SHARED_DATA } parser.store() const end = parser.seek(";") parser.restore() const sharedDataString = parser.slice(end - parser.cursor) const sharedData = JSON.parse(sharedDataString) + console.log(sharedData) // check for alternate form of age restrictions if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) { - // lazy fix; ideally extracting the age should be done here, but for the web ui it doesn't matter - return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: null} + // ideally extracting the age should be done here, but for the web ui it doesn't matter + throw constants.symbols.extractor_results.AGE_RESTRICTED } - return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} + return sharedData.entry_data.ProfilePage[0].graphql.user } /** @@ -43,7 +54,12 @@ function extractPreloader(text) { entries.push(data) } } - return entries + // entries now has the things + const profileInfoResponse = entries.find(x => x.request.url === "/api/v1/users/web_profile_info/") + if (!profileInfoResponse) { + throw new Error("No profile info in the preloader.") + } + return JSON.parse(profileInfoResponse.result.response).data.user } /** @@ -61,5 +77,6 @@ function getRestrictedAge(text) { return +match[1] // the age } +module.exports.selectExtractor = selectExtractor module.exports.extractSharedData = extractSharedData module.exports.extractPreloader = extractPreloader