1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2024-11-23 16:37:30 +00:00

Compare commits

..

3 Commits

Author SHA1 Message Date
Cadence Ember
fd65ef6646
Feeble attempt to support the new multi-format
This doesn't work, but it's a start. Somebody else can continue the effort.
2022-09-02 00:31:02 +12:00
Cadence Ember
39c8d7102f
Always use Cantarell font 2022-08-11 15:38:09 +12:00
Cadence Ember
c2df6d696b
Only build new circuits when really needed 2022-08-02 19:58:52 +12:00
5 changed files with 37 additions and 21 deletions

View File

@ -1,12 +1,12 @@
const constants = require("./constants") const constants = require("./constants")
const {request} = require("./utils/request") const {request} = require("./utils/request")
const switcher = require("./utils/torswitcher") const switcher = require("./utils/torswitcher")
const {extractPreloader} = require("./utils/body") const {selectExtractor} = require("./utils/body")
const {TtlCache, RequestCache, UserRequestCache} = require("./cache") const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory") const RequestHistory = require("./structures/RequestHistory")
const fhp = require("fast-html-parser") const fhp = require("fast-html-parser")
const db = require("./db") const db = require("./db")
require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db) require("./testimports")(constants, request, selectExtractor, UserRequestCache, RequestHistory, db)
const requestCache = new RequestCache(constants.caching.resource_cache_time) const requestCache = new RequestCache(constants.caching.resource_cache_time)
/** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */ /** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */
@ -34,7 +34,7 @@ async function fetchUser(username, context) {
return fetchUserFromHTML(username) return fetchUserFromHTML(username)
} }
throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`) throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please ask them to use the default fetch mode by omitting that setting.`)
} }
/** /**
@ -68,12 +68,8 @@ function fetchUserFromHTML(username) {
// require down here or have to deal with require loop. require cache will take care of it anyway. // require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User // User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User") const User = require("./structures/User")
const preloader = extractPreloader(text) const userData = selectExtractor(text)
const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/") const user = new User(userData)
if (!profileInfoResponse) {
throw new Error("No profile info in the preloader.")
}
const user = new User(JSON.parse(profileInfoResponse.result.response).data.user)
history.report("user", true) history.report("user", true)
if (constants.caching.db_user_id) { if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)

View File

@ -30,7 +30,7 @@ let constants = {
password: null, // If `null`, Bibliogram will run its own Tor process instead. password: null, // If `null`, Bibliogram will run its own Tor process instead.
port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.) port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.)
for: { for: {
user_html: false, user_html: true,
timeline_graphql: false, timeline_graphql: false,
post_graphql: false, post_graphql: false,
reel_graphql: false reel_graphql: false

View File

@ -1,6 +1,16 @@
const constants = require("../constants") const constants = require("../constants")
const {Parser} = require("./parser/parser") const {Parser} = require("./parser/parser")
function selectExtractor(text) {
if (text.includes("window._sharedData = ")) {
return extractSharedData(text)
} else if (text.includes("PolarisQueryPreloaderCache")) {
return extractPreloader(text)
} else {
throw constants.symbols.extractor_results.NO_SHARED_DATA
}
}
/** /**
* @param {string} text * @param {string} text
* @returns {{status: symbol, value: any}} * @returns {{status: symbol, value: any}}
@ -12,21 +22,22 @@ function extractSharedData(text) {
// Maybe the profile is age restricted? // Maybe the profile is age restricted?
const age = getRestrictedAge(text) const age = getRestrictedAge(text)
if (age !== null) { // Correct. if (age !== null) { // Correct.
return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age} throw constants.symbols.extractor_results.AGE_RESTRICTED
} }
return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null} throw constants.symbols.extractor_results.NO_SHARED_DATA
} }
parser.store() parser.store()
const end = parser.seek(";</script>") const end = parser.seek(";</script>")
parser.restore() parser.restore()
const sharedDataString = parser.slice(end - parser.cursor) const sharedDataString = parser.slice(end - parser.cursor)
const sharedData = JSON.parse(sharedDataString) const sharedData = JSON.parse(sharedDataString)
console.log(sharedData)
// check for alternate form of age restrictions // check for alternate form of age restrictions
if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) { if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) {
// lazy fix; ideally extracting the age should be done here, but for the web ui it doesn't matter // ideally extracting the age should be done here, but for the web ui it doesn't matter
return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: null} throw constants.symbols.extractor_results.AGE_RESTRICTED
} }
return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} return sharedData.entry_data.ProfilePage[0].graphql.user
} }
/** /**
@ -43,7 +54,12 @@ function extractPreloader(text) {
entries.push(data) entries.push(data)
} }
} }
return entries // entries now has the things
const profileInfoResponse = entries.find(x => x.request.url === "/api/v1/users/web_profile_info/")
if (!profileInfoResponse) {
throw new Error("No profile info in the preloader.")
}
return JSON.parse(profileInfoResponse.result.response).data.user
} }
/** /**
@ -61,5 +77,6 @@ function getRestrictedAge(text) {
return +match[1] // the age return +match[1] // the age
} }
module.exports.selectExtractor = selectExtractor
module.exports.extractSharedData = extractSharedData module.exports.extractSharedData = extractSharedData
module.exports.extractPreloader = extractPreloader module.exports.extractPreloader = extractPreloader

View File

@ -4,6 +4,8 @@ const constants = require("../constants")
const {request} = require("./request") const {request} = require("./request")
const {RequestCache} = require("../cache") const {RequestCache} = require("../cache")
let circuitIndex = 0
class TorManager { class TorManager {
/** /**
* @param {import("@deadcanaries/granax/lib/controller")} tor * @param {import("@deadcanaries/granax/lib/controller")} tor
@ -20,12 +22,16 @@ class TorManager {
let done = false let done = false
let g let g
while (!done) { while (!done) {
const circuitIndexUsed = circuitIndex
g = await request(url, {agent: this.agent}, {log: true, statusLine: "TOR"}) g = await request(url, {agent: this.agent}, {log: true, statusLine: "TOR"})
try { try {
await g.check(test) await g.check(test)
break break
} catch (e) { } catch (e) {
await this.newCircuit() if (circuitIndexUsed === circuitIndex) {
circuitIndex++
await this.newCircuit()
}
} }
} }
return g return g

View File

@ -15,10 +15,7 @@ $theme: () !default
src: url(/static/fonts/cantarell-#{$weight}.otf) format("opentype") src: url(/static/fonts/cantarell-#{$weight}.otf) format("opentype")
font-display: swap // prefer a fallback font until the font file is loaded font-display: swap // prefer a fallback font until the font file is loaded
body body, body.use-boring-font
font-family: "Bariol", sans-serif
body.use-boring-font
// this is useful so that cyrillic and other scripts don't look conspicuous compared to nearby latin letters. // this is useful so that cyrillic and other scripts don't look conspicuous compared to nearby latin letters.
// the use-boring-font class is activated based on the page and heuristics of its contents // the use-boring-font class is activated based on the page and heuristics of its contents
font-family: "Cantarell", sans-serif font-family: "Cantarell", sans-serif