Handle changed usernames in import script

This commit is contained in:
Cadence Ember 2020-07-13 02:08:26 +12:00
parent d76c4fae25
commit b4d21f5002
No known key found for this signature in database
GPG Key ID: 128B99B1B74A6412
2 changed files with 73 additions and 33 deletions

View File

@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip")
const filename = "users_export.json" + (shouldGzip ? ".gz" : "") const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
const target = pj(__dirname, targetDir, filename) const target = pj(__dirname, targetDir, filename)
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => { ;(async () => {
const users = db.prepare("SELECT * FROM Users").all() let data = await progress("Preparing export data... ", () => {
let data = Buffer.from(JSON.stringify(users), "utf8") const users = db.prepare("SELECT * FROM Users").all()
return Buffer.from(JSON.stringify(users), "utf8")
})
if (shouldGzip) { if (shouldGzip) {
data = await p(gzip)(data) data = await progress("Compressing... ", () => p(gzip)(data))
} }
await fs.writeFile(target, data) await progress("Writing file... ", () => fs.writeFile(target, data))
console.log(`Users exported to ${target}`) console.log(`Users exported to ${target}`)
})() })()

View File

@ -4,6 +4,12 @@ const pj = require("path").join
const db = require("../src/lib/db") const db = require("../src/lib/db")
const {request} = require("../src/lib/utils/request") const {request} = require("../src/lib/utils/request")
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => { ;(async () => {
const target = process.argv[2] const target = process.argv[2]
@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request")
process.exit(1) process.exit(1)
} }
// Resolve input to stream
if (target.match(/^https?:\/\//)) { if (target.match(/^https?:\/\//)) {
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.") console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
const ref = await request(target) const ref = await request(target)
@ -33,51 +40,75 @@ const {request} = require("../src/lib/utils/request")
} }
// Read out the stream into a buffer // Read out the stream into a buffer
process.stdout.write("Reading data... ")
const buffers = []
usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8")
process.stdout.write("done.\n")
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */ /** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
const incomingUsers = JSON.parse(usersString) const incomingUsers = await progress("Reading data... ", async () => {
const buffers = []
usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8")
return JSON.parse(usersString)
})
process.stdout.write("Noting existing users... ") // Note the existing users
const existing = new Map() const [existing, existingUsernames] = await progress("Noting existing users... ", () => {
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) { const existing = new Map()
existing.set(row.user_id, row) const existingUsernames = new Map()
} for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) {
process.stdout.write("done.\n") existing.set(row.user_id, row)
existingUsernames.set(row.username, row.user_id)
}
return [existing, existingUsernames]
})
// Prepare queries
const base = const base =
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES " "INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)" +"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
const preparedReplace = db.prepare("REPLACE "+base) const preparedReplace = db.prepare("REPLACE "+base)
const preparedInsert = db.prepare("INSERT "+base) const preparedInsert = db.prepare("INSERT "+base)
const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?")
// Prepare counters
let newCount = 0 let newCount = 0
let overwrittenCount = 0 let overwrittenCount = 0
let skippedCount = 0 let skippedCount = 0
process.stdout.write("Importing into database... ") // Import new data
db.transaction(() => { await progress("Importing into database... ", () => {
for (const user of incomingUsers) { db.transaction(() => {
if (existing.has(user.user_id)) { for (const user of incomingUsers) {
const existingRow = existing.get(user.user_id) if (existing.has(user.user_id)) {
if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) { const existingRow = existing.get(user.user_id)
preparedReplace.run(user) if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) {
overwrittenCount++ preparedReplace.run(user)
overwrittenCount++
} else {
skippedCount++
}
} else { } else {
skippedCount++ if (existingUsernames.has(user.username)) {
/*
The new row's user ID has not been seen, but the new row's username is already used.
So somebody changed username at some point. Which person has the username now?
We'll look at timestamps and accept the later version.
*/
const existingRow = existing.get(existingUsernames.get(user.username))
if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently
preparedDeleteByUsername.run(user.username) // delete the existing copy
existing.delete(user.user_id)
existingUsernames.delete(user.username)
// proceed on to insert the new row
} else { // the existing copy has been updated more recently, so skip this import
skippedCount++
continue // ew
}
}
preparedInsert.run(user)
newCount++
} }
} else {
preparedInsert.run(user)
newCount++
} }
} })()
})() })
process.stdout.write("done.\n")
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`) console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
})() })()