1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2024-11-22 08:07:30 +00:00

Handle changed usernames in import script

This commit is contained in:
Cadence Ember 2020-07-13 02:08:26 +12:00
parent d76c4fae25
commit b4d21f5002
No known key found for this signature in database
GPG Key ID: 128B99B1B74A6412
2 changed files with 73 additions and 33 deletions

View File

@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip")
const filename = "users_export.json" + (shouldGzip ? ".gz" : "") const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
const target = pj(__dirname, targetDir, filename) const target = pj(__dirname, targetDir, filename)
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => { ;(async () => {
let data = await progress("Preparing export data... ", () => {
const users = db.prepare("SELECT * FROM Users").all() const users = db.prepare("SELECT * FROM Users").all()
let data = Buffer.from(JSON.stringify(users), "utf8") return Buffer.from(JSON.stringify(users), "utf8")
})
if (shouldGzip) { if (shouldGzip) {
data = await p(gzip)(data) data = await progress("Compressing... ", () => p(gzip)(data))
} }
await fs.writeFile(target, data) await progress("Writing file... ", () => fs.writeFile(target, data))
console.log(`Users exported to ${target}`) console.log(`Users exported to ${target}`)
})() })()

View File

@ -4,6 +4,12 @@ const pj = require("path").join
const db = require("../src/lib/db") const db = require("../src/lib/db")
const {request} = require("../src/lib/utils/request") const {request} = require("../src/lib/utils/request")
async function progress(message, callback) {
process.stdout.write(message)
const result = await callback()
process.stdout.write("done.\n")
return result
}
;(async () => { ;(async () => {
const target = process.argv[2] const target = process.argv[2]
@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request")
process.exit(1) process.exit(1)
} }
// Resolve input to stream
if (target.match(/^https?:\/\//)) { if (target.match(/^https?:\/\//)) {
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.") console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
const ref = await request(target) const ref = await request(target)
@ -33,34 +40,41 @@ const {request} = require("../src/lib/utils/request")
} }
// Read out the stream into a buffer // Read out the stream into a buffer
process.stdout.write("Reading data... ") /** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
const incomingUsers = await progress("Reading data... ", async () => {
const buffers = [] const buffers = []
usersStream.on("data", chunk => buffers.push(chunk)) usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve)) await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8") const usersString = Buffer.concat(buffers).toString("utf8")
process.stdout.write("done.\n") return JSON.parse(usersString)
})
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */ // Note the existing users
const incomingUsers = JSON.parse(usersString) const [existing, existingUsernames] = await progress("Noting existing users... ", () => {
process.stdout.write("Noting existing users... ")
const existing = new Map() const existing = new Map()
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) { const existingUsernames = new Map()
for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) {
existing.set(row.user_id, row) existing.set(row.user_id, row)
existingUsernames.set(row.username, row.user_id)
} }
process.stdout.write("done.\n") return [existing, existingUsernames]
})
// Prepare queries
const base = const base =
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES " "INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)" +"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
const preparedReplace = db.prepare("REPLACE "+base) const preparedReplace = db.prepare("REPLACE "+base)
const preparedInsert = db.prepare("INSERT "+base) const preparedInsert = db.prepare("INSERT "+base)
const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?")
// Prepare counters
let newCount = 0 let newCount = 0
let overwrittenCount = 0 let overwrittenCount = 0
let skippedCount = 0 let skippedCount = 0
process.stdout.write("Importing into database... ") // Import new data
await progress("Importing into database... ", () => {
db.transaction(() => { db.transaction(() => {
for (const user of incomingUsers) { for (const user of incomingUsers) {
if (existing.has(user.user_id)) { if (existing.has(user.user_id)) {
@ -72,12 +86,29 @@ const {request} = require("../src/lib/utils/request")
skippedCount++ skippedCount++
} }
} else { } else {
if (existingUsernames.has(user.username)) {
/*
The new row's user ID has not been seen, but the new row's username is already used.
So somebody changed username at some point. Which person has the username now?
We'll look at timestamps and accept the later version.
*/
const existingRow = existing.get(existingUsernames.get(user.username))
if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently
preparedDeleteByUsername.run(user.username) // delete the existing copy
existing.delete(user.user_id)
existingUsernames.delete(user.username)
// proceed on to insert the new row
} else { // the existing copy has been updated more recently, so skip this import
skippedCount++
continue // ew
}
}
preparedInsert.run(user) preparedInsert.run(user)
newCount++ newCount++
} }
} }
})() })()
process.stdout.write("done.\n") })
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`) console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
})() })()