diff --git a/scripts/export_users.js b/scripts/export_users.js index ba22257..5d0efc2 100755 --- a/scripts/export_users.js +++ b/scripts/export_users.js @@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip") const filename = "users_export.json" + (shouldGzip ? ".gz" : "") const target = pj(__dirname, targetDir, filename) +async function progress(message, callback) { + process.stdout.write(message) + const result = await callback() + process.stdout.write("done.\n") + return result +} + ;(async () => { - const users = db.prepare("SELECT * FROM Users").all() - let data = Buffer.from(JSON.stringify(users), "utf8") + let data = await progress("Preparing export data... ", () => { + const users = db.prepare("SELECT * FROM Users").all() + return Buffer.from(JSON.stringify(users), "utf8") + }) if (shouldGzip) { - data = await p(gzip)(data) + data = await progress("Compressing... ", () => p(gzip)(data)) } - await fs.writeFile(target, data) + await progress("Writing file... ", () => fs.writeFile(target, data)) console.log(`Users exported to ${target}`) })() diff --git a/scripts/import_users.js b/scripts/import_users.js index 5b7a0a5..74ed92b 100644 --- a/scripts/import_users.js +++ b/scripts/import_users.js @@ -4,6 +4,12 @@ const pj = require("path").join const db = require("../src/lib/db") const {request} = require("../src/lib/utils/request") +async function progress(message, callback) { + process.stdout.write(message) + const result = await callback() + process.stdout.write("done.\n") + return result +} ;(async () => { const target = process.argv[2] @@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request") process.exit(1) } + // Resolve input to stream if (target.match(/^https?:\/\//)) { console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.") const ref = await request(target) @@ -33,51 +40,75 @@ const {request} = require("../src/lib/utils/request") } // Read out the stream into a buffer - process.stdout.write("Reading data... ") - const buffers = [] - usersStream.on("data", chunk => buffers.push(chunk)) - await new Promise(resolve => usersStream.once("end", resolve)) - const usersString = Buffer.concat(buffers).toString("utf8") - process.stdout.write("done.\n") - /** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */ - const incomingUsers = JSON.parse(usersString) + const incomingUsers = await progress("Reading data... ", async () => { + const buffers = [] + usersStream.on("data", chunk => buffers.push(chunk)) + await new Promise(resolve => usersStream.once("end", resolve)) + const usersString = Buffer.concat(buffers).toString("utf8") + return JSON.parse(usersString) + }) - process.stdout.write("Noting existing users... ") - const existing = new Map() - for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) { - existing.set(row.user_id, row) - } - process.stdout.write("done.\n") + // Note the existing users + const [existing, existingUsernames] = await progress("Noting existing users... ", () => { + const existing = new Map() + const existingUsernames = new Map() + for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) { + existing.set(row.user_id, row) + existingUsernames.set(row.username, row.user_id) + } + return [existing, existingUsernames] + }) + // Prepare queries const base = "INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES " +"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)" const preparedReplace = db.prepare("REPLACE "+base) const preparedInsert = db.prepare("INSERT "+base) + const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?") + // Prepare counters let newCount = 0 let overwrittenCount = 0 let skippedCount = 0 - process.stdout.write("Importing into database... ") - db.transaction(() => { - for (const user of incomingUsers) { - if (existing.has(user.user_id)) { - const existingRow = existing.get(user.user_id) - if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) { - preparedReplace.run(user) - overwrittenCount++ + // Import new data + await progress("Importing into database... ", () => { + db.transaction(() => { + for (const user of incomingUsers) { + if (existing.has(user.user_id)) { + const existingRow = existing.get(user.user_id) + if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) { + preparedReplace.run(user) + overwrittenCount++ + } else { + skippedCount++ + } } else { - skippedCount++ + if (existingUsernames.has(user.username)) { + /* + The new row's user ID has not been seen, but the new row's username is already used. + So somebody changed username at some point. Which person has the username now? + We'll look at timestamps and accept the later version. + */ + const existingRow = existing.get(existingUsernames.get(user.username)) + if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently + preparedDeleteByUsername.run(user.username) // delete the existing copy + existing.delete(user.user_id) + existingUsernames.delete(user.username) + // proceed on to insert the new row + } else { // the existing copy has been updated more recently, so skip this import + skippedCount++ + continue // ew + } + } + preparedInsert.run(user) + newCount++ } - } else { - preparedInsert.run(user) - newCount++ } - } - })() - process.stdout.write("done.\n") + })() + }) console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`) })()