bibliogram/scripts/import_users.js

84 lines
3.1 KiB
JavaScript
Raw Normal View History

const fs = require("fs")
const {createGunzip} = require("zlib")
2020-02-18 07:07:11 +00:00
const pj = require("path").join
const db = require("../src/lib/db")
const {request} = require("../src/lib/utils/request")
2020-02-18 07:07:11 +00:00
;(async () => {
const target = process.argv[2]
const isGzip = target.endsWith(".gz")
2020-02-18 07:07:11 +00:00
if (!target) {
console.log("Provide the file or URL to import from on the command line.")
process.exit(1)
}
2020-04-09 09:59:49 +00:00
2020-02-18 07:07:11 +00:00
if (target.match(/^https?:\/\//)) {
console.log("Seems to be a URL, requesting now. This could take a few minutes. Be patient.")
const ref = await request(target)
const res = await ref.response()
const lengthContainer = res.headers.get("content-length")
if (lengthContainer) {
const length = Number(Array.isArray(lengthContainer) ? lengthContainer[0] : lengthContainer)
console.log(`${Math.floor(length/1000)} kB will be downloaded`)
}
var usersStream = await ref.stream()
2020-02-18 07:07:11 +00:00
} else {
/** @type {any} */
var usersStream = await fs.createReadStream(target)
}
if (isGzip) {
usersStream = usersStream.pipe(createGunzip())
2020-02-18 07:07:11 +00:00
}
2020-04-09 09:59:49 +00:00
// Read out the stream into a buffer
process.stdout.write("Reading data... ")
const buffers = []
usersStream.on("data", chunk => buffers.push(chunk))
await new Promise(resolve => usersStream.once("end", resolve))
const usersString = Buffer.concat(buffers).toString("utf8")
process.stdout.write("done.\n")
2020-02-18 07:07:11 +00:00
/** @type {{username: string, user_id: string, created: number, updated: number, updated_version: number, biography: string, post_count: number, following_count: number, followed_by_count: number, external_url: string, full_name: string, is_private: number, is_verified: number, profile_pic_url: string}[]} */
2020-04-09 09:59:49 +00:00
const incomingUsers = JSON.parse(usersString)
process.stdout.write("Noting existing users... ")
2020-04-09 09:59:49 +00:00
const existing = new Map()
for (const row of db.prepare("SELECT user_id, updated, updated_version FROM Users").iterate()) {
existing.set(row.user_id, row)
}
process.stdout.write("done.\n")
2020-04-09 09:59:49 +00:00
const base =
"INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
const preparedReplace = db.prepare("REPLACE "+base)
const preparedInsert = db.prepare("INSERT "+base)
2020-02-18 07:07:11 +00:00
let newCount = 0
let overwrittenCount = 0
2020-04-09 09:59:49 +00:00
let skippedCount = 0
process.stdout.write("Importing into database... ")
2020-02-18 07:07:11 +00:00
db.transaction(() => {
2020-04-09 09:59:49 +00:00
for (const user of incomingUsers) {
if (existing.has(user.user_id)) {
const existingRow = existing.get(user.user_id)
if (existingRow.updated_version <= user.updated_version && existingRow.updated < user.updated) {
2020-04-09 09:59:49 +00:00
preparedReplace.run(user)
overwrittenCount++
} else {
skippedCount++
}
} else {
preparedInsert.run(user)
newCount++
}
2020-02-18 07:07:11 +00:00
}
})()
process.stdout.write("done.\n")
2020-04-09 09:59:49 +00:00
console.log(`Imported ${incomingUsers.length} entries (${newCount} new, ${overwrittenCount} overwritten, ${skippedCount} skipped)`)
2020-02-18 07:07:11 +00:00
})()