mirror of
https://git.sr.ht/~cadence/cloudtube
synced 2026-05-31 22:46:46 +00:00
Add shorts identification by thumbnails
This commit is contained in:
parent
095dc3f918
commit
21c3f624b0
7 changed files with 235 additions and 5 deletions
|
|
@ -13,6 +13,7 @@ module.exports = [
|
|||
let channels = []
|
||||
let missingChannelCount = 0
|
||||
let refreshed = null
|
||||
const settings = user.getSettingsOrDefaults()
|
||||
if (user.token) {
|
||||
// trigger a background refresh, needed if they came back from being inactive
|
||||
refresher.skipWaiting()
|
||||
|
|
@ -26,7 +27,7 @@ module.exports = [
|
|||
// get videos
|
||||
if (channels.length) {
|
||||
hasSubscriptions = true
|
||||
videos = db.prepare(`SELECT Videos.* FROM Videos INNER JOIN Subscriptions ON Videos.authorID = Subscriptions.ucid WHERE token = ? ORDER BY published DESC LIMIT 60`).all(user.token)
|
||||
videos = db.prepare(`SELECT Videos.* FROM Videos INNER JOIN Subscriptions ON Videos.authorID = Subscriptions.ucid WHERE token = ? AND (short IS NULL OR short <= ?) ORDER BY published DESC LIMIT 60`).all(user.token, +settings.show_shorts + 1)
|
||||
.map(video => {
|
||||
video.publishedText = timeToPastText(video.published * 1000)
|
||||
video.watched = watchedVideos.includes(video.videoId)
|
||||
|
|
@ -37,7 +38,6 @@ module.exports = [
|
|||
const filters = user.getFilters()
|
||||
;({videos} = applyVideoFilters(videos, filters))
|
||||
}
|
||||
const settings = user.getSettingsOrDefaults()
|
||||
const instanceOrigin = settings.instance
|
||||
return render(200, "pug/subscriptions.pug", {req, url, settings, hasSubscriptions, videos, channels, missingChannelCount, refreshed, timeToPastText, instanceOrigin})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
const Denque = require("denque")
|
||||
const constants = require("../utils/constants")
|
||||
const thumbnailScan = require("./thumbnail-scan")
|
||||
const db = require("../utils/db")
|
||||
|
||||
const prepared = {
|
||||
|
|
@ -81,7 +82,12 @@ class Refresher {
|
|||
video.descriptionHtml = video.descriptionHtml.replace(/<a /g, '<a tabindex="-1" ') // should be safe
|
||||
video.viewCountText = null //TODO?
|
||||
// store
|
||||
prepared.video_insert.run(video)
|
||||
const {changes} = prepared.video_insert.run(video)
|
||||
if (changes === 1) {
|
||||
// video was newly found, run a thumbnail scan on it
|
||||
thumbnailScan.scanner.scanQueue.addNext([video.videoId])
|
||||
thumbnailScan.scanner.skipWaiting()
|
||||
}
|
||||
})
|
||||
// update channel refreshed
|
||||
prepared.channel_refreshed_update.run(Date.now(), ucid)
|
||||
|
|
|
|||
201
background/thumbnail-scan.js
Normal file
201
background/thumbnail-scan.js
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
const crypto = require("crypto")
|
||||
const sharp = require("sharp")
|
||||
const Denque = require("denque")
|
||||
const constants = require("../utils/constants")
|
||||
const db = require("../utils/db")
|
||||
|
||||
const prepared = {
|
||||
set_short: db.prepare(
|
||||
"UPDATE Videos SET short = ? WHERE videoId = ?"
|
||||
),
|
||||
delete_video: db.prepare(
|
||||
"DELETE FROM Videos WHERE videoId = ?"
|
||||
),
|
||||
}
|
||||
|
||||
class ScannerQueue {
|
||||
constructor() {
|
||||
this.queue = new Denque()
|
||||
this.lastLoadTime = 0
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return this.queue.isEmpty()
|
||||
}
|
||||
|
||||
load() {
|
||||
// get the next set of scheduled channels to refresh
|
||||
const afterTime = (Date.now() - constants.caching.video_thumbnail_scan_eligible) / 1000
|
||||
/** @type {string[]} video IDs to check */
|
||||
const videos = db.prepare("SELECT Videos.videoId FROM Videos WHERE Videos.published > ? AND Videos.short IS NULL ORDER BY Videos.published DESC").pluck().all(afterTime)
|
||||
// console.log(`loaded ${videos.length} videos for thumbnail scanning`)
|
||||
this.addLast(videos)
|
||||
this.lastLoadTime = Date.now()
|
||||
}
|
||||
|
||||
addNext(items) {
|
||||
for (const i of items) {
|
||||
this.queue.unshift(i)
|
||||
}
|
||||
}
|
||||
|
||||
addLast(items) {
|
||||
for (const i of items) {
|
||||
this.queue.push(i)
|
||||
}
|
||||
}
|
||||
|
||||
next() {
|
||||
if (this.isEmpty()) {
|
||||
throw new Error("Cannot get next of empty scanner queue")
|
||||
}
|
||||
|
||||
const item = this.queue.shift()
|
||||
return item
|
||||
}
|
||||
}
|
||||
|
||||
const DARKNESS_MAX = 0x4C
|
||||
const DARKNESS_TOLERANCE_G = 0x05
|
||||
const DARKNESS_TOLERANCE_RB = 0x0A
|
||||
const PEAKS_TOLERANCE = 0.01
|
||||
|
||||
/**
|
||||
* @param {Buffer<ArrayBufferLike>} data
|
||||
* @param {sharp.OutputInfo} info
|
||||
* @param {number} col
|
||||
*/
|
||||
function columnIsLight(data, info, col) {
|
||||
// Pixel ordering is left-to-right, top-to-bottom, without padding. Channel ordering is RGB.
|
||||
// console.log(`Scanning column ${col}`)
|
||||
const peaksCountTolerance = Math.ceil(info.height * PEAKS_TOLERANCE)
|
||||
let peaksSeen = 0
|
||||
for (let row = 0; row < info.height; row++) {
|
||||
const x = info.channels * col
|
||||
const y = row * info.channels * info.width
|
||||
const xy = x + y
|
||||
const lightness = Math.max(data.at(xy + 0) - DARKNESS_TOLERANCE_RB, data.at(xy + 1) - DARKNESS_TOLERANCE_G, data.at(xy + 2) - DARKNESS_TOLERANCE_RB)
|
||||
if (lightness > DARKNESS_MAX) {
|
||||
peaksSeen++
|
||||
// console.log(`col ${col} row ${row} lightness = 0x${lightness.toString(16)}`)
|
||||
if (peaksSeen > peaksCountTolerance) return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
async function scanThumbnail(videoId) {
|
||||
const files = ["sddefault", "hqdefault", "mqdefault"]
|
||||
|
||||
let usableThumbnail
|
||||
for (const file of files) {
|
||||
const thumbnail = await fetch(`https://i.ytimg.com/vi/${videoId}/${file}.jpg`).then(res => res.bytes())
|
||||
// console.log(`scanning ${videoId}`)
|
||||
|
||||
// Check if it's the deleted thumbnail
|
||||
// First check length
|
||||
if (thumbnail.byteLength === 1097) {
|
||||
// Then check hash
|
||||
const h = crypto.createHash("sha256")
|
||||
h.update(thumbnail)
|
||||
if (h.digest("hex") === "20e9aab22032d85684d7d916a1013f7c577a132a5b10ea3fd3578e8d0b28a711") {
|
||||
// It is deleted
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
usableThumbnail = thumbnail
|
||||
}
|
||||
|
||||
if (!usableThumbnail) return "deleted"
|
||||
|
||||
// Check if it looks like a dimmed 9:16 shorts thumbnail
|
||||
const {data, info} = await sharp(usableThumbnail).removeAlpha().raw().toBuffer({resolveWithObject: true})
|
||||
// console.log(data)
|
||||
// console.log(info)
|
||||
|
||||
const SHORTS_ASPECT_RATIO = 9/16
|
||||
const innerWidth = info.height * SHORTS_ASPECT_RATIO
|
||||
const innerStartX = info.width / 2 - innerWidth / 2
|
||||
const innerEndX = info.width / 2 + innerWidth / 2
|
||||
// console.log(innerStartX, innerEndX)
|
||||
|
||||
const pattern = [
|
||||
columnIsLight(data, info, Math.floor(innerStartX) - 3),
|
||||
columnIsLight(data, info, Math.ceil(innerStartX) + 3),
|
||||
columnIsLight(data, info, Math.floor(innerEndX) - 3),
|
||||
columnIsLight(data, info, Math.ceil(innerEndX) + 3),
|
||||
].map(Number).join("")
|
||||
|
||||
return pattern
|
||||
}
|
||||
|
||||
const IS_NOT_SHORT = 0
|
||||
const INCONCLUSIVE = 1
|
||||
const IS_SHORT = 2
|
||||
|
||||
class Scanner {
|
||||
constructor() {
|
||||
this.sym = constants.symbols.refresher
|
||||
this.scanQueue = new ScannerQueue()
|
||||
this.state = this.sym.ACTIVE
|
||||
this.waitingTimeout = null
|
||||
this.next()
|
||||
}
|
||||
|
||||
async scanThumbnail(videoId) {
|
||||
const scanResult = await scanThumbnail(videoId)
|
||||
if (scanResult === "deleted") {
|
||||
prepared.delete_video.run(videoId)
|
||||
} else if (scanResult === "0110") {
|
||||
prepared.set_short.run(IS_SHORT, videoId)
|
||||
// console.log("is short")
|
||||
} else if (scanResult.match(/^1..1$/)) {
|
||||
prepared.set_short.run(IS_NOT_SHORT, videoId)
|
||||
// console.log("is not short")
|
||||
} else {
|
||||
prepared.set_short.run(INCONCLUSIVE, videoId)
|
||||
// console.log(`inconclusive: ${scanResult}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
next() {
|
||||
if (this.scanQueue.isEmpty()) {
|
||||
const timeSinceLastLoop = Date.now() - this.scanQueue.lastLoadTime
|
||||
if (timeSinceLastLoop < constants.caching.video_thumbnail_scan_loop_min) {
|
||||
const timeToWait = constants.caching.video_thumbnail_scan_loop_min - timeSinceLastLoop
|
||||
// console.log(`waiting ${timeToWait} before next loop`)
|
||||
this.state = this.sym.WAITING
|
||||
this.waitingTimeout = setTimeout(() => this.next(), timeToWait)
|
||||
return
|
||||
} else {
|
||||
this.scanQueue.load()
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.scanQueue.isEmpty()) {
|
||||
this.state = this.sym.ACTIVE
|
||||
const videoId = this.scanQueue.next()
|
||||
this.scanThumbnail(videoId).then(() => this.next()).catch(error => {
|
||||
console.error("Error in background thumbnail scan:\n", error)
|
||||
setTimeout(() => {
|
||||
this.next()
|
||||
}, 10e3)
|
||||
})
|
||||
} else {
|
||||
this.state = this.sym.EMPTY
|
||||
}
|
||||
}
|
||||
|
||||
skipWaiting() {
|
||||
if (this.state !== this.sym.ACTIVE) {
|
||||
clearTimeout(this.waitingTimeout)
|
||||
this.scanQueue.lastLoadTime = 0
|
||||
this.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const scanner = new Scanner()
|
||||
module.exports.scanner = scanner
|
||||
|
|
@ -66,6 +66,16 @@ block content
|
|||
]
|
||||
})
|
||||
|
||||
+select({
|
||||
id: "show_shorts",
|
||||
label: "Display YouTube Shorts",
|
||||
description: "When off, shorts will be hidden from the subscriptions page.\nShorts are detected by thumbnail patterns with a 10% false negative rate and a 0% false positive rate.",
|
||||
options: [
|
||||
{value: "0", text: "Hide shorts"},
|
||||
{value: "1", text: "Show shorts"}
|
||||
]
|
||||
})
|
||||
|
||||
+select({
|
||||
id: "quality",
|
||||
label: "Preferred qualities",
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ let constants = {
|
|||
recommended_mode: {
|
||||
type: "integer",
|
||||
default: 0
|
||||
},
|
||||
show_shorts: {
|
||||
type: "boolean",
|
||||
default: false
|
||||
}
|
||||
},
|
||||
|
||||
|
|
@ -49,7 +53,9 @@ let constants = {
|
|||
caching: {
|
||||
csrf_time: 4*60*60*1000,
|
||||
seen_token_subscriptions_eligible: 40*60*60*1000,
|
||||
video_thumbnail_scan_eligible: 90*24*60*60*1000,
|
||||
subscriptions_refresh_loop_min: 5*60*1000,
|
||||
video_thumbnail_scan_loop_min: 2*60*60*1000,
|
||||
subscriptions_refesh_fake_not_found_cooldown: 10*60*1000,
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class User {
|
|||
this.token = token
|
||||
}
|
||||
|
||||
/** @return {{instance?: string, save_history?: boolean, local?: boolean, quality?: number}} */
|
||||
/** @return {{instance?: string, save_history?: boolean, show_shorts?: boolean, local?: number, quality?: number}} */
|
||||
getSettings() {
|
||||
if (this.token) {
|
||||
return db.prepare("SELECT * FROM Settings WHERE token = ?").get(this.token) || {}
|
||||
|
|
@ -43,7 +43,7 @@ class User {
|
|||
}
|
||||
}
|
||||
|
||||
/** @return {{instance?: string, save_history?: boolean, local?: boolean, quality?: number}} */
|
||||
/** @return {{instance?: string, save_history?: boolean, show_shorts?: boolean, local?: number, quality?: number}} */
|
||||
getSettingsOrDefaults() {
|
||||
const settings = this.getSettings()
|
||||
for (const key of Object.keys(constants.user_settings)) {
|
||||
|
|
|
|||
|
|
@ -95,6 +95,13 @@ const deltas = [
|
|||
db.prepare("ALTER TABLE NEW_Subscriptions RENAME TO Subscriptions")
|
||||
.run()
|
||||
})()
|
||||
},
|
||||
// 13: Videos +short
|
||||
function() {
|
||||
db.prepare("ALTER TABLE Videos ADD COLUMN short INTEGER")
|
||||
.run()
|
||||
db.prepare("ALTER TABLE Settings ADD COLUMN show_shorts INTEGER NOT NULL DEFAULT 0")
|
||||
.run()
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue