mirror of
https://git.sr.ht/~cadence/cloudtube
synced 2025-01-21 02:56:59 +00:00
Add script to remove old video descriptions
This commit is contained in:
parent
55696a1b54
commit
efcef9b540
@ -16,5 +16,8 @@
|
||||
"mixin-deep": "^2.0.1",
|
||||
"node-fetch": "^2.6.6",
|
||||
"pinski": "git+https://git.sr.ht/~cadence/nodejs-pinski#9653807f309aee34c8c63ce4e6ee760cccbfdf0d"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cli-progress": "^3.12.0"
|
||||
}
|
||||
}
|
||||
|
67
scripts/min-video-data.js
Normal file
67
scripts/min-video-data.js
Normal file
@ -0,0 +1,67 @@
|
||||
const fs = require("fs")
|
||||
const zlib = require("zlib")
|
||||
const progress = require("cli-progress")
|
||||
const {promisify} = require("util")
|
||||
const {pipeline} = require("stream")
|
||||
const pipe = promisify(pipeline)
|
||||
|
||||
const db = require("../utils/db")
|
||||
|
||||
const cutoff = new Date("2023-01-01").getTime() / 1000
|
||||
|
||||
function* toRows(stmt) {
|
||||
yield* stmt.raw().iterate(cutoff);
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const countToMin = db.prepare("select count(*) from Videos where published < ?").pluck().get(cutoff)
|
||||
const countTotal = db.prepare("select count(*) from Videos").pluck().get()
|
||||
console.log("want to trim", countToMin, "out of", countTotal, "videos");
|
||||
|
||||
// ensure that we're not trimming the entire content
|
||||
if (Math.abs(countTotal - countToMin) <= 10) {
|
||||
throw new Error("failsafe: not trimming everything")
|
||||
}
|
||||
|
||||
// export
|
||||
const backupName = "video-descriptions-backup.jsonl.gz"
|
||||
console.log(`exporting a backup to ${backupName}...`)
|
||||
const contents = db.prepare("select videoId, descriptionHtml from Videos where published < ? order by author asc, published asc")
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
const rowsProgress = new progress.SingleBar({fps: 3}, progress.Presets.shades_classic)
|
||||
const gzipProgress = new progress.SingleBar({fps: 3}, progress.Presets.shades_classic)
|
||||
|
||||
// write rows into gzip
|
||||
const gzip = zlib.createGzip()
|
||||
const dest = fs.createWriteStream(backupName)
|
||||
gzip.pipe(dest)
|
||||
rowsProgress.start(countToMin, 0)
|
||||
for (const row of toRows(contents)) {
|
||||
gzip.write(JSON.stringify(row))
|
||||
rowsProgress.increment()
|
||||
}
|
||||
gzip.end()
|
||||
rowsProgress.stop()
|
||||
|
||||
// track gzip progress
|
||||
console.log(" compressing backup...")
|
||||
const max = gzip._writableState.length
|
||||
gzipProgress.start(max, 0)
|
||||
const interval = setInterval(() => {
|
||||
gzipProgress.update(max - gzip._writableState.length)
|
||||
}, 100)
|
||||
dest.on("finish", () => {
|
||||
clearInterval(interval)
|
||||
gzipProgress.stop()
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
|
||||
// do it!
|
||||
console.log("removing descriptions...")
|
||||
db.prepare("update videos set descriptionHtml = null where published < ?").run(cutoff)
|
||||
|
||||
console.log("reclaiming disk space from database...")
|
||||
db.prepare("vacuum").run()
|
||||
})()
|
Loading…
Reference in New Issue
Block a user