1
0
mirror of https://git.sr.ht/~cadence/NewLeaf synced 2024-11-25 00:47:29 +00:00

Fix recommended videos extraction on IDs starting with - and _

Let's just leverage yt_dlp instead of rolling our own algorithms and fix
this kind of issue (not finding yt_dlp dump file for a given video) once
and for all

Example videos:
* https://www.youtube.com/watch?v=-q78QXpSL2M
* https://www.youtube.com/watch?v=_4SKG5uUEqs
This commit is contained in:
Lomanic 2021-11-01 02:21:30 +01:00 committed by Cadence Ember
parent 2a0291cd5b
commit f22decbb74
No known key found for this signature in database
GPG Key ID: BC1C2C61CF521B17

View File

@ -1,22 +1,14 @@
import os import os
import re import yt_dlp.utils
def get_created_files(id): def get_created_files(id):
# youtube-dl transforms filenames when saving, for example changing - to _ at the start to presumbly not trigger switches in shell, but also in other strange ways too # youtube-dl transforms filenames when saving, for example changing - to _ at the start to presumbly not trigger switches in shell, but also in other strange ways too
patterns = [ sanitized_id = yt_dlp.utils.sanitize_filename(id)
"__+", "_",
"^_*(-_)?", "",
"^-", "_"
]
trim_id = id
for find, replace in zip(patterns[::-2], patterns[1::-2]): # for each 2 items in the list
trim_id = re.sub(find, replace, trim_id)
# all file names then have an underscore before the converted URL # all file names then have an underscore before the converted URL
id += "_" id += "_"
trim_id += "_" sanitized_id += "_"
return (f for f in os.listdir() if f.startswith(id) or f.startswith(trim_id)) return (f for f in os.listdir() if f.startswith(id) or f.startswith(sanitized_id))
def clean_up_temp_files(id): def clean_up_temp_files(id):
created_files = get_created_files(id) created_files = get_created_files(id)