mirror of
https://git.sr.ht/~cadence/bibliogram
synced 2024-11-22 08:07:30 +00:00
Handle changed usernames in import script
This commit is contained in:
parent
d76c4fae25
commit
b4d21f5002
@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip")
|
||||
const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
|
||||
const target = pj(__dirname, targetDir, filename)
|
||||
|
||||
async function progress(message, callback) {
|
||||
process.stdout.write(message)
|
||||
const result = await callback()
|
||||
process.stdout.write("done.\n")
|
||||
return result
|
||||
}
|
||||
|
||||
;(async () => {
|
||||
let data = await progress("Preparing export data... ", () => {
|
||||
const users = db.prepare("SELECT * FROM Users").all()
|
||||
let data = Buffer.from(JSON.stringify(users), "utf8")
|
||||
return Buffer.from(JSON.stringify(users), "utf8")
|
||||
})
|
||||
|
||||
if (shouldGzip) {
|
||||
data = await p(gzip)(data)
|
||||
data = await progress("Compressing... ", () => p(gzip)(data))
|
||||
}
|
||||
|
||||
await fs.writeFile(target, data)
|
||||
await progress("Writing file... ", () => fs.writeFile(target, data))
|
||||
|
||||
console.log(`Users exported to ${target}`)
|
||||
})()
|
||||
|
@ -4,6 +4,12 @@ const pj = require("path").join
|
||||
const db = require("../src/lib/db")
|
||||
const {request} = require("../src/lib/utils/request")
|
||||
|
||||
async function progress(message, callback) {
|
||||
process.stdout.write(message)
|
||||
const result = await callback()
|
||||
process.stdout.write("done.\n")
|
||||
return result
|
||||
}
|
||||
|
||||
;(async () => {
|
||||
const target = process.argv[2]
|
||||
@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request")
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
// Resolve input to stream
|
||||
if (target.match(/^https?:\/\//)) {
|
||||
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
|
||||
const ref = await request(target)
|
||||
@ -33,34 +40,41 @@ const {request} = require("../src/lib/utils/request")
|
||||
}
|
||||
|
||||
// Read out the stream into a buffer
|
||||
process.stdout.write("Reading data... ")
|
||||
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
|
||||
const incomingUsers = await progress("Reading data... ", async () => {
|
||||
const buffers = []
|
||||
usersStream.on("data", chunk => buffers.push(chunk))
|
||||
await new Promise(resolve => usersStream.once("end", resolve))
|
||||
const usersString = Buffer.concat(buffers).toString("utf8")
|
||||
process.stdout.write("done.\n")
|
||||
return JSON.parse(usersString)
|
||||
})
|
||||
|
||||
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
|
||||
const incomingUsers = JSON.parse(usersString)
|
||||
|
||||
process.stdout.write("Noting existing users... ")
|
||||
// Note the existing users
|
||||
const [existing, existingUsernames] = await progress("Noting existing users... ", () => {
|
||||
const existing = new Map()
|
||||
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) {
|
||||
const existingUsernames = new Map()
|
||||
for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) {
|
||||
existing.set(row.user_id, row)
|
||||
existingUsernames.set(row.username, row.user_id)
|
||||
}
|
||||
process.stdout.write("done.\n")
|
||||
return [existing, existingUsernames]
|
||||
})
|
||||
|
||||
// Prepare queries
|
||||
const base =
|
||||
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
||||
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
||||
const preparedReplace = db.prepare("REPLACE "+base)
|
||||
const preparedInsert = db.prepare("INSERT "+base)
|
||||
const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?")
|
||||
|
||||
// Prepare counters
|
||||
let newCount = 0
|
||||
let overwrittenCount = 0
|
||||
let skippedCount = 0
|
||||
|
||||
process.stdout.write("Importing into database... ")
|
||||
// Import new data
|
||||
await progress("Importing into database... ", () => {
|
||||
db.transaction(() => {
|
||||
for (const user of incomingUsers) {
|
||||
if (existing.has(user.user_id)) {
|
||||
@ -72,12 +86,29 @@ const {request} = require("../src/lib/utils/request")
|
||||
skippedCount++
|
||||
}
|
||||
} else {
|
||||
if (existingUsernames.has(user.username)) {
|
||||
/*
|
||||
The new row's user ID has not been seen, but the new row's username is already used.
|
||||
So somebody changed username at some point. Which person has the username now?
|
||||
We'll look at timestamps and accept the later version.
|
||||
*/
|
||||
const existingRow = existing.get(existingUsernames.get(user.username))
|
||||
if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently
|
||||
preparedDeleteByUsername.run(user.username) // delete the existing copy
|
||||
existing.delete(user.user_id)
|
||||
existingUsernames.delete(user.username)
|
||||
// proceed on to insert the new row
|
||||
} else { // the existing copy has been updated more recently, so skip this import
|
||||
skippedCount++
|
||||
continue // ew
|
||||
}
|
||||
}
|
||||
preparedInsert.run(user)
|
||||
newCount++
|
||||
}
|
||||
}
|
||||
})()
|
||||
process.stdout.write("done.\n")
|
||||
})
|
||||
|
||||
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
|
||||
})()
|
||||
|
Loading…
Reference in New Issue
Block a user