From 21c3f624b01b08fa1b140a3250ee0f3b52d30417 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 27 May 2026 22:52:13 +1200 Subject: [PATCH] Add shorts identification by thumbnails --- api/subscriptions.js | 4 +- background/feed-update.js | 8 +- background/thumbnail-scan.js | 201 +++++++++++++++++++++++++++++++++++ pug/settings.pug | 10 ++ utils/constants.js | 6 ++ utils/getuser.js | 4 +- utils/upgradedb.js | 7 ++ 7 files changed, 235 insertions(+), 5 deletions(-) create mode 100644 background/thumbnail-scan.js diff --git a/api/subscriptions.js b/api/subscriptions.js index 6c44aaf..cfd90b3 100644 --- a/api/subscriptions.js +++ b/api/subscriptions.js @@ -13,6 +13,7 @@ module.exports = [ let channels = [] let missingChannelCount = 0 let refreshed = null + const settings = user.getSettingsOrDefaults() if (user.token) { // trigger a background refresh, needed if they came back from being inactive refresher.skipWaiting() @@ -26,7 +27,7 @@ module.exports = [ // get videos if (channels.length) { hasSubscriptions = true - videos = db.prepare(`SELECT Videos.* FROM Videos INNER JOIN Subscriptions ON Videos.authorID = Subscriptions.ucid WHERE token = ? ORDER BY published DESC LIMIT 60`).all(user.token) + videos = db.prepare(`SELECT Videos.* FROM Videos INNER JOIN Subscriptions ON Videos.authorID = Subscriptions.ucid WHERE token = ? AND (short IS NULL OR short <= ?) ORDER BY published DESC LIMIT 60`).all(user.token, +settings.show_shorts + 1) .map(video => { video.publishedText = timeToPastText(video.published * 1000) video.watched = watchedVideos.includes(video.videoId) @@ -37,7 +38,6 @@ module.exports = [ const filters = user.getFilters() ;({videos} = applyVideoFilters(videos, filters)) } - const settings = user.getSettingsOrDefaults() const instanceOrigin = settings.instance return render(200, "pug/subscriptions.pug", {req, url, settings, hasSubscriptions, videos, channels, missingChannelCount, refreshed, timeToPastText, instanceOrigin}) } diff --git a/background/feed-update.js b/background/feed-update.js index 10f2bb1..7d258ec 100644 --- a/background/feed-update.js +++ b/background/feed-update.js @@ -1,5 +1,6 @@ const Denque = require("denque") const constants = require("../utils/constants") +const thumbnailScan = require("./thumbnail-scan") const db = require("../utils/db") const prepared = { @@ -81,7 +82,12 @@ class Refresher { video.descriptionHtml = video.descriptionHtml.replace(/ ? AND Videos.short IS NULL ORDER BY Videos.published DESC").pluck().all(afterTime) + // console.log(`loaded ${videos.length} videos for thumbnail scanning`) + this.addLast(videos) + this.lastLoadTime = Date.now() + } + + addNext(items) { + for (const i of items) { + this.queue.unshift(i) + } + } + + addLast(items) { + for (const i of items) { + this.queue.push(i) + } + } + + next() { + if (this.isEmpty()) { + throw new Error("Cannot get next of empty scanner queue") + } + + const item = this.queue.shift() + return item + } +} + +const DARKNESS_MAX = 0x4C +const DARKNESS_TOLERANCE_G = 0x05 +const DARKNESS_TOLERANCE_RB = 0x0A +const PEAKS_TOLERANCE = 0.01 + +/** + * @param {Buffer} data + * @param {sharp.OutputInfo} info + * @param {number} col + */ +function columnIsLight(data, info, col) { + // Pixel ordering is left-to-right, top-to-bottom, without padding. Channel ordering is RGB. + // console.log(`Scanning column ${col}`) + const peaksCountTolerance = Math.ceil(info.height * PEAKS_TOLERANCE) + let peaksSeen = 0 + for (let row = 0; row < info.height; row++) { + const x = info.channels * col + const y = row * info.channels * info.width + const xy = x + y + const lightness = Math.max(data.at(xy + 0) - DARKNESS_TOLERANCE_RB, data.at(xy + 1) - DARKNESS_TOLERANCE_G, data.at(xy + 2) - DARKNESS_TOLERANCE_RB) + if (lightness > DARKNESS_MAX) { + peaksSeen++ + // console.log(`col ${col} row ${row} lightness = 0x${lightness.toString(16)}`) + if (peaksSeen > peaksCountTolerance) return true + } + } + return false +} + +async function scanThumbnail(videoId) { + const files = ["sddefault", "hqdefault", "mqdefault"] + + let usableThumbnail + for (const file of files) { + const thumbnail = await fetch(`https://i.ytimg.com/vi/${videoId}/${file}.jpg`).then(res => res.bytes()) + // console.log(`scanning ${videoId}`) + + // Check if it's the deleted thumbnail + // First check length + if (thumbnail.byteLength === 1097) { + // Then check hash + const h = crypto.createHash("sha256") + h.update(thumbnail) + if (h.digest("hex") === "20e9aab22032d85684d7d916a1013f7c577a132a5b10ea3fd3578e8d0b28a711") { + // It is deleted + continue + } + } + + usableThumbnail = thumbnail + } + + if (!usableThumbnail) return "deleted" + + // Check if it looks like a dimmed 9:16 shorts thumbnail + const {data, info} = await sharp(usableThumbnail).removeAlpha().raw().toBuffer({resolveWithObject: true}) + // console.log(data) + // console.log(info) + + const SHORTS_ASPECT_RATIO = 9/16 + const innerWidth = info.height * SHORTS_ASPECT_RATIO + const innerStartX = info.width / 2 - innerWidth / 2 + const innerEndX = info.width / 2 + innerWidth / 2 + // console.log(innerStartX, innerEndX) + + const pattern = [ + columnIsLight(data, info, Math.floor(innerStartX) - 3), + columnIsLight(data, info, Math.ceil(innerStartX) + 3), + columnIsLight(data, info, Math.floor(innerEndX) - 3), + columnIsLight(data, info, Math.ceil(innerEndX) + 3), + ].map(Number).join("") + + return pattern +} + +const IS_NOT_SHORT = 0 +const INCONCLUSIVE = 1 +const IS_SHORT = 2 + +class Scanner { + constructor() { + this.sym = constants.symbols.refresher + this.scanQueue = new ScannerQueue() + this.state = this.sym.ACTIVE + this.waitingTimeout = null + this.next() + } + + async scanThumbnail(videoId) { + const scanResult = await scanThumbnail(videoId) + if (scanResult === "deleted") { + prepared.delete_video.run(videoId) + } else if (scanResult === "0110") { + prepared.set_short.run(IS_SHORT, videoId) + // console.log("is short") + } else if (scanResult.match(/^1..1$/)) { + prepared.set_short.run(IS_NOT_SHORT, videoId) + // console.log("is not short") + } else { + prepared.set_short.run(INCONCLUSIVE, videoId) + // console.log(`inconclusive: ${scanResult}`) + } + } + + + next() { + if (this.scanQueue.isEmpty()) { + const timeSinceLastLoop = Date.now() - this.scanQueue.lastLoadTime + if (timeSinceLastLoop < constants.caching.video_thumbnail_scan_loop_min) { + const timeToWait = constants.caching.video_thumbnail_scan_loop_min - timeSinceLastLoop + // console.log(`waiting ${timeToWait} before next loop`) + this.state = this.sym.WAITING + this.waitingTimeout = setTimeout(() => this.next(), timeToWait) + return + } else { + this.scanQueue.load() + } + } + + if (!this.scanQueue.isEmpty()) { + this.state = this.sym.ACTIVE + const videoId = this.scanQueue.next() + this.scanThumbnail(videoId).then(() => this.next()).catch(error => { + console.error("Error in background thumbnail scan:\n", error) + setTimeout(() => { + this.next() + }, 10e3) + }) + } else { + this.state = this.sym.EMPTY + } + } + + skipWaiting() { + if (this.state !== this.sym.ACTIVE) { + clearTimeout(this.waitingTimeout) + this.scanQueue.lastLoadTime = 0 + this.next() + } + } +} + +const scanner = new Scanner() +module.exports.scanner = scanner diff --git a/pug/settings.pug b/pug/settings.pug index f504dba..dfe1637 100644 --- a/pug/settings.pug +++ b/pug/settings.pug @@ -66,6 +66,16 @@ block content ] }) + +select({ + id: "show_shorts", + label: "Display YouTube Shorts", + description: "When off, shorts will be hidden from the subscriptions page.\nShorts are detected by thumbnail patterns with a 10% false negative rate and a 0% false positive rate.", + options: [ + {value: "0", text: "Hide shorts"}, + {value: "1", text: "Show shorts"} + ] + }) + +select({ id: "quality", label: "Preferred qualities", diff --git a/utils/constants.js b/utils/constants.js index fe06882..f1e1a0f 100644 --- a/utils/constants.js +++ b/utils/constants.js @@ -28,6 +28,10 @@ let constants = { recommended_mode: { type: "integer", default: 0 + }, + show_shorts: { + type: "boolean", + default: false } }, @@ -49,7 +53,9 @@ let constants = { caching: { csrf_time: 4*60*60*1000, seen_token_subscriptions_eligible: 40*60*60*1000, + video_thumbnail_scan_eligible: 90*24*60*60*1000, subscriptions_refresh_loop_min: 5*60*1000, + video_thumbnail_scan_loop_min: 2*60*60*1000, subscriptions_refesh_fake_not_found_cooldown: 10*60*1000, }, diff --git a/utils/getuser.js b/utils/getuser.js index 4fc1d24..58ad47c 100644 --- a/utils/getuser.js +++ b/utils/getuser.js @@ -34,7 +34,7 @@ class User { this.token = token } - /** @return {{instance?: string, save_history?: boolean, local?: boolean, quality?: number}} */ + /** @return {{instance?: string, save_history?: boolean, show_shorts?: boolean, local?: number, quality?: number}} */ getSettings() { if (this.token) { return db.prepare("SELECT * FROM Settings WHERE token = ?").get(this.token) || {} @@ -43,7 +43,7 @@ class User { } } - /** @return {{instance?: string, save_history?: boolean, local?: boolean, quality?: number}} */ + /** @return {{instance?: string, save_history?: boolean, show_shorts?: boolean, local?: number, quality?: number}} */ getSettingsOrDefaults() { const settings = this.getSettings() for (const key of Object.keys(constants.user_settings)) { diff --git a/utils/upgradedb.js b/utils/upgradedb.js index 545ed61..4fdcd75 100644 --- a/utils/upgradedb.js +++ b/utils/upgradedb.js @@ -95,6 +95,13 @@ const deltas = [ db.prepare("ALTER TABLE NEW_Subscriptions RENAME TO Subscriptions") .run() })() + }, + // 13: Videos +short + function() { + db.prepare("ALTER TABLE Videos ADD COLUMN short INTEGER") + .run() + db.prepare("ALTER TABLE Settings ADD COLUMN show_shorts INTEGER NOT NULL DEFAULT 0") + .run() } ]