Handle changed usernames in import script

This commit is contained in:
Cadence Ember 2020-07-13 02:08:26 +12:00
parent d76c4fae25
commit b4d21f5002
No known key found for this signature in database
GPG Key ID: 128B99B1B74A6412
2 changed files with 73 additions and 33 deletions

View File

@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip")
const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
const target = pj(__dirname, targetDir, filename)
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => {
const users = db.prepare("SELECT * FROM Users").all()
let data = Buffer.from(JSON.stringify(users), "utf8")
let data = await progress("Preparing export data... ", () => {
const users = db.prepare("SELECT * FROM Users").all()
return Buffer.from(JSON.stringify(users), "utf8")
})
if (shouldGzip) {
data = await p(gzip)(data)
data = await progress("Compressing... ", () => p(gzip)(data))
}
await fs.writeFile(target, data)
await progress("Writing file... ", () => fs.writeFile(target, data))
console.log(`Users exported to ${target}`)
})()

View File

@ -4,6 +4,12 @@ const pj = require("path").join
const db = require("../src/lib/db")
const {request} = require("../src/lib/utils/request")
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => {
const target = process.argv[2]
@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request")
process.exit(1)
}
// Resolve input to stream
if (target.match(/^https?:\/\//)) {
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
const ref = await request(target)
@ -33,51 +40,75 @@ const {request} = require("../src/lib/utils/request")
}
// Read out the stream into a buffer
process.stdout.write("Reading data... ")
const buffers = []
usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8")
process.stdout.write("done.\n")
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
const incomingUsers = JSON.parse(usersString)
const incomingUsers = await progress("Reading data... ", async () => {
const buffers = []
usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8")
return JSON.parse(usersString)
})
process.stdout.write("Noting existing users... ")
const existing = new Map()
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) {
existing.set(row.user_id, row)
}
process.stdout.write("done.\n")
// Note the existing users
const [existing, existingUsernames] = await progress("Noting existing users... ", () => {
const existing = new Map()
const existingUsernames = new Map()
for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) {
existing.set(row.user_id, row)
existingUsernames.set(row.username, row.user_id)
}
return [existing, existingUsernames]
})
// Prepare queries
const base =
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
const preparedReplace = db.prepare("REPLACE "+base)
const preparedInsert = db.prepare("INSERT "+base)
const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?")
// Prepare counters
let newCount = 0
let overwrittenCount = 0
let skippedCount = 0
process.stdout.write("Importing into database... ")
db.transaction(() => {
for (const user of incomingUsers) {
if (existing.has(user.user_id)) {
const existingRow = existing.get(user.user_id)
if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) {
preparedReplace.run(user)
overwrittenCount++
// Import new data
await progress("Importing into database... ", () => {
db.transaction(() => {
for (const user of incomingUsers) {
if (existing.has(user.user_id)) {
const existingRow = existing.get(user.user_id)
if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) {
preparedReplace.run(user)
overwrittenCount++
} else {
skippedCount++
}
} else {
skippedCount++
if (existingUsernames.has(user.username)) {
/*
The new row's user ID has not been seen, but the new row's username is already used.
So somebody changed username at some point. Which person has the username now?
We'll look at timestamps and accept the later version.
*/
const existingRow = existing.get(existingUsernames.get(user.username))
if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently
preparedDeleteByUsername.run(user.username) // delete the existing copy
existing.delete(user.user_id)
existingUsernames.delete(user.username)
// proceed on to insert the new row
} else { // the existing copy has been updated more recently, so skip this import
skippedCount++
continue // ew
}
}
preparedInsert.run(user)
newCount++
}
} else {
preparedInsert.run(user)
newCount++
}
}
})()
process.stdout.write("done.\n")
})()
})
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
})()