mirror of
https://git.sr.ht/~cadence/cloudtube
synced 2026-06-01 06:56:48 +00:00
Add shorts identification by thumbnails
This commit is contained in:
parent
095dc3f918
commit
21c3f624b0
7 changed files with 235 additions and 5 deletions
201
background/thumbnail-scan.js
Normal file
201
background/thumbnail-scan.js
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
const crypto = require("crypto")
|
||||
const sharp = require("sharp")
|
||||
const Denque = require("denque")
|
||||
const constants = require("../utils/constants")
|
||||
const db = require("../utils/db")
|
||||
|
||||
const prepared = {
|
||||
set_short: db.prepare(
|
||||
"UPDATE Videos SET short = ? WHERE videoId = ?"
|
||||
),
|
||||
delete_video: db.prepare(
|
||||
"DELETE FROM Videos WHERE videoId = ?"
|
||||
),
|
||||
}
|
||||
|
||||
class ScannerQueue {
|
||||
constructor() {
|
||||
this.queue = new Denque()
|
||||
this.lastLoadTime = 0
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return this.queue.isEmpty()
|
||||
}
|
||||
|
||||
load() {
|
||||
// get the next set of scheduled channels to refresh
|
||||
const afterTime = (Date.now() - constants.caching.video_thumbnail_scan_eligible) / 1000
|
||||
/** @type {string[]} video IDs to check */
|
||||
const videos = db.prepare("SELECT Videos.videoId FROM Videos WHERE Videos.published > ? AND Videos.short IS NULL ORDER BY Videos.published DESC").pluck().all(afterTime)
|
||||
// console.log(`loaded ${videos.length} videos for thumbnail scanning`)
|
||||
this.addLast(videos)
|
||||
this.lastLoadTime = Date.now()
|
||||
}
|
||||
|
||||
addNext(items) {
|
||||
for (const i of items) {
|
||||
this.queue.unshift(i)
|
||||
}
|
||||
}
|
||||
|
||||
addLast(items) {
|
||||
for (const i of items) {
|
||||
this.queue.push(i)
|
||||
}
|
||||
}
|
||||
|
||||
next() {
|
||||
if (this.isEmpty()) {
|
||||
throw new Error("Cannot get next of empty scanner queue")
|
||||
}
|
||||
|
||||
const item = this.queue.shift()
|
||||
return item
|
||||
}
|
||||
}
|
||||
|
||||
const DARKNESS_MAX = 0x4C
|
||||
const DARKNESS_TOLERANCE_G = 0x05
|
||||
const DARKNESS_TOLERANCE_RB = 0x0A
|
||||
const PEAKS_TOLERANCE = 0.01
|
||||
|
||||
/**
|
||||
* @param {Buffer<ArrayBufferLike>} data
|
||||
* @param {sharp.OutputInfo} info
|
||||
* @param {number} col
|
||||
*/
|
||||
function columnIsLight(data, info, col) {
|
||||
// Pixel ordering is left-to-right, top-to-bottom, without padding. Channel ordering is RGB.
|
||||
// console.log(`Scanning column ${col}`)
|
||||
const peaksCountTolerance = Math.ceil(info.height * PEAKS_TOLERANCE)
|
||||
let peaksSeen = 0
|
||||
for (let row = 0; row < info.height; row++) {
|
||||
const x = info.channels * col
|
||||
const y = row * info.channels * info.width
|
||||
const xy = x + y
|
||||
const lightness = Math.max(data.at(xy + 0) - DARKNESS_TOLERANCE_RB, data.at(xy + 1) - DARKNESS_TOLERANCE_G, data.at(xy + 2) - DARKNESS_TOLERANCE_RB)
|
||||
if (lightness > DARKNESS_MAX) {
|
||||
peaksSeen++
|
||||
// console.log(`col ${col} row ${row} lightness = 0x${lightness.toString(16)}`)
|
||||
if (peaksSeen > peaksCountTolerance) return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
async function scanThumbnail(videoId) {
|
||||
const files = ["sddefault", "hqdefault", "mqdefault"]
|
||||
|
||||
let usableThumbnail
|
||||
for (const file of files) {
|
||||
const thumbnail = await fetch(`https://i.ytimg.com/vi/${videoId}/${file}.jpg`).then(res => res.bytes())
|
||||
// console.log(`scanning ${videoId}`)
|
||||
|
||||
// Check if it's the deleted thumbnail
|
||||
// First check length
|
||||
if (thumbnail.byteLength === 1097) {
|
||||
// Then check hash
|
||||
const h = crypto.createHash("sha256")
|
||||
h.update(thumbnail)
|
||||
if (h.digest("hex") === "20e9aab22032d85684d7d916a1013f7c577a132a5b10ea3fd3578e8d0b28a711") {
|
||||
// It is deleted
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
usableThumbnail = thumbnail
|
||||
}
|
||||
|
||||
if (!usableThumbnail) return "deleted"
|
||||
|
||||
// Check if it looks like a dimmed 9:16 shorts thumbnail
|
||||
const {data, info} = await sharp(usableThumbnail).removeAlpha().raw().toBuffer({resolveWithObject: true})
|
||||
// console.log(data)
|
||||
// console.log(info)
|
||||
|
||||
const SHORTS_ASPECT_RATIO = 9/16
|
||||
const innerWidth = info.height * SHORTS_ASPECT_RATIO
|
||||
const innerStartX = info.width / 2 - innerWidth / 2
|
||||
const innerEndX = info.width / 2 + innerWidth / 2
|
||||
// console.log(innerStartX, innerEndX)
|
||||
|
||||
const pattern = [
|
||||
columnIsLight(data, info, Math.floor(innerStartX) - 3),
|
||||
columnIsLight(data, info, Math.ceil(innerStartX) + 3),
|
||||
columnIsLight(data, info, Math.floor(innerEndX) - 3),
|
||||
columnIsLight(data, info, Math.ceil(innerEndX) + 3),
|
||||
].map(Number).join("")
|
||||
|
||||
return pattern
|
||||
}
|
||||
|
||||
const IS_NOT_SHORT = 0
|
||||
const INCONCLUSIVE = 1
|
||||
const IS_SHORT = 2
|
||||
|
||||
class Scanner {
|
||||
constructor() {
|
||||
this.sym = constants.symbols.refresher
|
||||
this.scanQueue = new ScannerQueue()
|
||||
this.state = this.sym.ACTIVE
|
||||
this.waitingTimeout = null
|
||||
this.next()
|
||||
}
|
||||
|
||||
async scanThumbnail(videoId) {
|
||||
const scanResult = await scanThumbnail(videoId)
|
||||
if (scanResult === "deleted") {
|
||||
prepared.delete_video.run(videoId)
|
||||
} else if (scanResult === "0110") {
|
||||
prepared.set_short.run(IS_SHORT, videoId)
|
||||
// console.log("is short")
|
||||
} else if (scanResult.match(/^1..1$/)) {
|
||||
prepared.set_short.run(IS_NOT_SHORT, videoId)
|
||||
// console.log("is not short")
|
||||
} else {
|
||||
prepared.set_short.run(INCONCLUSIVE, videoId)
|
||||
// console.log(`inconclusive: ${scanResult}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
next() {
|
||||
if (this.scanQueue.isEmpty()) {
|
||||
const timeSinceLastLoop = Date.now() - this.scanQueue.lastLoadTime
|
||||
if (timeSinceLastLoop < constants.caching.video_thumbnail_scan_loop_min) {
|
||||
const timeToWait = constants.caching.video_thumbnail_scan_loop_min - timeSinceLastLoop
|
||||
// console.log(`waiting ${timeToWait} before next loop`)
|
||||
this.state = this.sym.WAITING
|
||||
this.waitingTimeout = setTimeout(() => this.next(), timeToWait)
|
||||
return
|
||||
} else {
|
||||
this.scanQueue.load()
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.scanQueue.isEmpty()) {
|
||||
this.state = this.sym.ACTIVE
|
||||
const videoId = this.scanQueue.next()
|
||||
this.scanThumbnail(videoId).then(() => this.next()).catch(error => {
|
||||
console.error("Error in background thumbnail scan:\n", error)
|
||||
setTimeout(() => {
|
||||
this.next()
|
||||
}, 10e3)
|
||||
})
|
||||
} else {
|
||||
this.state = this.sym.EMPTY
|
||||
}
|
||||
}
|
||||
|
||||
skipWaiting() {
|
||||
if (this.state !== this.sym.ACTIVE) {
|
||||
clearTimeout(this.waitingTimeout)
|
||||
this.scanQueue.lastLoadTime = 0
|
||||
this.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const scanner = new Scanner()
|
||||
module.exports.scanner = scanner
|
||||
Loading…
Add table
Add a link
Reference in a new issue