mirror of
https://git.sr.ht/~cadence/bibliogram
synced 2024-11-22 08:07:30 +00:00
Handle changed usernames in import script
This commit is contained in:
parent
d76c4fae25
commit
b4d21f5002
@ -9,15 +9,24 @@ const shouldGzip = process.argv.slice(2).includes("--gzip")
|
|||||||
const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
|
const filename = "users_export.json" + (shouldGzip ? ".gz" : "")
|
||||||
const target = pj(__dirname, targetDir, filename)
|
const target = pj(__dirname, targetDir, filename)
|
||||||
|
|
||||||
|
async function progress(message, callback) {
|
||||||
|
process.stdout.write(message)
|
||||||
|
const result = await callback()
|
||||||
|
process.stdout.write("done.\n")
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
;(async () => {
|
;(async () => {
|
||||||
|
let data = await progress("Preparing export data... ", () => {
|
||||||
const users = db.prepare("SELECT * FROM Users").all()
|
const users = db.prepare("SELECT * FROM Users").all()
|
||||||
let data = Buffer.from(JSON.stringify(users), "utf8")
|
return Buffer.from(JSON.stringify(users), "utf8")
|
||||||
|
})
|
||||||
|
|
||||||
if (shouldGzip) {
|
if (shouldGzip) {
|
||||||
data = await p(gzip)(data)
|
data = await progress("Compressing... ", () => p(gzip)(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
await fs.writeFile(target, data)
|
await progress("Writing file... ", () => fs.writeFile(target, data))
|
||||||
|
|
||||||
console.log(`Users exported to ${target}`)
|
console.log(`Users exported to ${target}`)
|
||||||
})()
|
})()
|
||||||
|
@ -4,6 +4,12 @@ const pj = require("path").join
|
|||||||
const db = require("../src/lib/db")
|
const db = require("../src/lib/db")
|
||||||
const {request} = require("../src/lib/utils/request")
|
const {request} = require("../src/lib/utils/request")
|
||||||
|
|
||||||
|
async function progress(message, callback) {
|
||||||
|
process.stdout.write(message)
|
||||||
|
const result = await callback()
|
||||||
|
process.stdout.write("done.\n")
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
;(async () => {
|
;(async () => {
|
||||||
const target = process.argv[2]
|
const target = process.argv[2]
|
||||||
@ -13,6 +19,7 @@ const {request} = require("../src/lib/utils/request")
|
|||||||
process.exit(1)
|
process.exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resolve input to stream
|
||||||
if (target.match(/^https?:\/\//)) {
|
if (target.match(/^https?:\/\//)) {
|
||||||
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
|
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
|
||||||
const ref = await request(target)
|
const ref = await request(target)
|
||||||
@ -33,34 +40,41 @@ const {request} = require("../src/lib/utils/request")
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read out the stream into a buffer
|
// Read out the stream into a buffer
|
||||||
process.stdout.write("Reading data... ")
|
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
|
||||||
|
const incomingUsers = await progress("Reading data... ", async () => {
|
||||||
const buffers = []
|
const buffers = []
|
||||||
usersStream.on("data", chunk => buffers.push(chunk))
|
usersStream.on("data", chunk => buffers.push(chunk))
|
||||||
await new Promise(resolve => usersStream.once("end", resolve))
|
await new Promise(resolve => usersStream.once("end", resolve))
|
||||||
const usersString = Buffer.concat(buffers).toString("utf8")
|
const usersString = Buffer.concat(buffers).toString("utf8")
|
||||||
process.stdout.write("done.\n")
|
return JSON.parse(usersString)
|
||||||
|
})
|
||||||
|
|
||||||
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
|
// Note the existing users
|
||||||
const incomingUsers = JSON.parse(usersString)
|
const [existing, existingUsernames] = await progress("Noting existing users... ", () => {
|
||||||
|
|
||||||
process.stdout.write("Noting existing users... ")
|
|
||||||
const existing = new Map()
|
const existing = new Map()
|
||||||
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) {
|
const existingUsernames = new Map()
|
||||||
|
for (const row of db.prepare("SELECT username, user_id, updated, updated_version FROM Users").iterate()) {
|
||||||
existing.set(row.user_id, row)
|
existing.set(row.user_id, row)
|
||||||
|
existingUsernames.set(row.username, row.user_id)
|
||||||
}
|
}
|
||||||
process.stdout.write("done.\n")
|
return [existing, existingUsernames]
|
||||||
|
})
|
||||||
|
|
||||||
|
// Prepare queries
|
||||||
const base =
|
const base =
|
||||||
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
|
||||||
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
|
||||||
const preparedReplace = db.prepare("REPLACE "+base)
|
const preparedReplace = db.prepare("REPLACE "+base)
|
||||||
const preparedInsert = db.prepare("INSERT "+base)
|
const preparedInsert = db.prepare("INSERT "+base)
|
||||||
|
const preparedDeleteByUsername = db.prepare("DELETE FROM Users WHERE username = ?")
|
||||||
|
|
||||||
|
// Prepare counters
|
||||||
let newCount = 0
|
let newCount = 0
|
||||||
let overwrittenCount = 0
|
let overwrittenCount = 0
|
||||||
let skippedCount = 0
|
let skippedCount = 0
|
||||||
|
|
||||||
process.stdout.write("Importing into database... ")
|
// Import new data
|
||||||
|
await progress("Importing into database... ", () => {
|
||||||
db.transaction(() => {
|
db.transaction(() => {
|
||||||
for (const user of incomingUsers) {
|
for (const user of incomingUsers) {
|
||||||
if (existing.has(user.user_id)) {
|
if (existing.has(user.user_id)) {
|
||||||
@ -72,12 +86,29 @@ const {request} = require("../src/lib/utils/request")
|
|||||||
skippedCount++
|
skippedCount++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (existingUsernames.has(user.username)) {
|
||||||
|
/*
|
||||||
|
The new row's user ID has not been seen, but the new row's username is already used.
|
||||||
|
So somebody changed username at some point. Which person has the username now?
|
||||||
|
We'll look at timestamps and accept the later version.
|
||||||
|
*/
|
||||||
|
const existingRow = existing.get(existingUsernames.get(user.username))
|
||||||
|
if (existingRow.updated < user.updated) { // if the incoming copy has been updated more recently
|
||||||
|
preparedDeleteByUsername.run(user.username) // delete the existing copy
|
||||||
|
existing.delete(user.user_id)
|
||||||
|
existingUsernames.delete(user.username)
|
||||||
|
// proceed on to insert the new row
|
||||||
|
} else { // the existing copy has been updated more recently, so skip this import
|
||||||
|
skippedCount++
|
||||||
|
continue // ew
|
||||||
|
}
|
||||||
|
}
|
||||||
preparedInsert.run(user)
|
preparedInsert.run(user)
|
||||||
newCount++
|
newCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})()
|
})()
|
||||||
process.stdout.write("done.\n")
|
})
|
||||||
|
|
||||||
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
|
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
|
||||||
})()
|
})()
|
||||||
|
Loading…
Reference in New Issue
Block a user