feat: cover song identification via stream
I found a tuto with the streaming way to performe cover song identification. I also add a way to download some sound with youtube dl. to download musique now just type: youtube-dl --config-location config_youtube-dl
This commit is contained in:
parent
76a85a75c8
commit
3c4d7b7299
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
*.sw*
|
*.sw*
|
||||||
|
*.mp3
|
||||||
|
6
config_youtube-dl
Normal file
6
config_youtube-dl
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
--batch-file song_to_test
|
||||||
|
--no-overwrites
|
||||||
|
--continue
|
||||||
|
--extract-audio
|
||||||
|
--audio-format mp3
|
||||||
|
-o ./audio/%(title)s.%(ext)s
|
@ -1,81 +0,0 @@
|
|||||||
## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
|
|
||||||
##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
|
|
||||||
|
|
||||||
import essentia.standard as estd
|
|
||||||
from essentia.pytools.spectral import hpcpgram
|
|
||||||
|
|
||||||
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
|
||||||
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
|
||||||
yesterday_cover_02 = 'audio/The Beatles - Yesterday Saxophone Cover Alexandra Ilieva Thomann.mp3'
|
|
||||||
different_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
|
|
||||||
|
|
||||||
# query cover song
|
|
||||||
original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
|
|
||||||
true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
|
|
||||||
true_cover_02 = estd.MonoLoader(filename=yesterday_cover_02, sampleRate=32000)()
|
|
||||||
|
|
||||||
# wrong match
|
|
||||||
false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
|
|
||||||
|
|
||||||
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
|
||||||
query_hpcp = hpcpgram(original_song, sampleRate=32000)
|
|
||||||
true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
|
|
||||||
true_cover_hpcp_2 = hpcpgram(true_cover_02, sampleRate=32000)
|
|
||||||
false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
|
|
||||||
|
|
||||||
## Next steps are done using the essentia ChromaCrossSimilarity function,
|
|
||||||
##
|
|
||||||
## Stacking input features
|
|
||||||
##
|
|
||||||
## Key invariance using Optimal Transposition Index (OTI) [3].
|
|
||||||
##
|
|
||||||
## Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
|
|
||||||
|
|
||||||
crp = estd.ChromaCrossSimilarity(frameStackSize=9,
|
|
||||||
frameStackStride=1,
|
|
||||||
binarizePercentile=0.095,
|
|
||||||
oti=True)
|
|
||||||
|
|
||||||
true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
|
|
||||||
true_pair_crp_2 = crp(query_hpcp, true_cover_hpcp_2)
|
|
||||||
|
|
||||||
## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
|
|
||||||
|
|
||||||
false_pair_crp = crp(query_hpcp, false_cover_hpcp)
|
|
||||||
|
|
||||||
## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
|
|
||||||
|
|
||||||
csm = estd.ChromaCrossSimilarity(frameStackSize=9,
|
|
||||||
frameStackStride=1,
|
|
||||||
binarizePercentile=0.095,
|
|
||||||
oti=True,
|
|
||||||
otiBinary=True)
|
|
||||||
|
|
||||||
oti_csm = csm(query_hpcp, false_cover_hpcp)
|
|
||||||
|
|
||||||
|
|
||||||
## Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
|
|
||||||
##
|
|
||||||
## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’
|
|
||||||
|
|
||||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
|
||||||
disExtension=0.5,
|
|
||||||
alignmentType='serra09',
|
|
||||||
distanceType='asymmetric')(true_pair_crp_1)
|
|
||||||
print('Cover song similarity distance: %s' % distance)
|
|
||||||
|
|
||||||
## other similar
|
|
||||||
|
|
||||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
|
||||||
disExtension=0.5,
|
|
||||||
alignmentType='serra09',
|
|
||||||
distanceType='asymmetric')(true_pair_crp_2)
|
|
||||||
print('Cover song similarity distance: %s' % distance)
|
|
||||||
|
|
||||||
## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
|
|
||||||
|
|
||||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
|
||||||
disExtension=0.5,
|
|
||||||
alignmentType='serra09',
|
|
||||||
distanceType='asymmetric')(false_pair_crp)
|
|
||||||
print('Cover song similarity distance: %s' % distance)
|
|
101
cover_song_stream.py
Normal file
101
cover_song_stream.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
|
||||||
|
|
||||||
|
#################
|
||||||
|
# standard part #
|
||||||
|
#################
|
||||||
|
|
||||||
|
|
||||||
|
import essentia.standard as estd
|
||||||
|
from essentia.pytools.spectral import hpcpgram
|
||||||
|
|
||||||
|
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||||||
|
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||||||
|
wrong_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
|
||||||
|
|
||||||
|
song_reference = yesterday_original
|
||||||
|
|
||||||
|
# query cover song
|
||||||
|
original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
|
||||||
|
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||||||
|
true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||||||
|
|
||||||
|
|
||||||
|
#################
|
||||||
|
# Straming part #
|
||||||
|
#################
|
||||||
|
|
||||||
|
import essentia.streaming as estr
|
||||||
|
from essentia import array, run, Pool
|
||||||
|
|
||||||
|
query_filename = wrong_song
|
||||||
|
|
||||||
|
# Let's instantiate all the required essentia streaming algorithms
|
||||||
|
|
||||||
|
audio = estr.MonoLoader(filename=query_filename, sampleRate=32000)
|
||||||
|
|
||||||
|
frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
|
||||||
|
|
||||||
|
windowing = estr.Windowing(type="blackmanharris62")
|
||||||
|
|
||||||
|
spectrum = estr.Spectrum();
|
||||||
|
|
||||||
|
peak = estr.SpectralPeaks(sampleRate=32000)
|
||||||
|
|
||||||
|
whitening = estr.SpectralWhitening(maxFrequency=3500,
|
||||||
|
sampleRate=32000);
|
||||||
|
|
||||||
|
hpcp = estr.HPCP(sampleRate=32000,
|
||||||
|
minFrequency=100,
|
||||||
|
maxFrequency=3500,
|
||||||
|
size=12);
|
||||||
|
|
||||||
|
# Create an instance of streaming ChromaCrossSimilarity algorithm
|
||||||
|
# With parameter `referenceFeature`,
|
||||||
|
# we can pass the pre-computed reference song chroma features.
|
||||||
|
# In this case, we use the pre-computed HPCP feature
|
||||||
|
# of the 'true_cover_song'.
|
||||||
|
# With parameter `oti`, we can tranpose the pitch
|
||||||
|
# of the reference song HPCP feature
|
||||||
|
# to an given OTI [5] (if it's known before hand).
|
||||||
|
# By default we set `oti=0`
|
||||||
|
sim_matrix = estr.ChromaCrossSimilarity(
|
||||||
|
referenceFeature=true_cover_hpcp,
|
||||||
|
oti=0)
|
||||||
|
|
||||||
|
# Create an instance of the cover song similarity alignment algorithm
|
||||||
|
# 'pipeDistance=True' stdout distance values for each input stream
|
||||||
|
alignment = estr.CoverSongSimilarity(pipeDistance=True)
|
||||||
|
|
||||||
|
# essentia Pool instance (python dict like object) to aggregrate the outputs
|
||||||
|
pool = Pool()
|
||||||
|
|
||||||
|
# Connect all the required algorithms in a essentia streaming network
|
||||||
|
# ie., connecting inputs and outputs of the algorithms
|
||||||
|
# in the required workflow and order
|
||||||
|
audio.audio >> frame_cutter.signal
|
||||||
|
frame_cutter.frame >> windowing.frame
|
||||||
|
windowing.frame >> spectrum.frame
|
||||||
|
spectrum.spectrum >> peak.spectrum
|
||||||
|
spectrum.spectrum >> whitening.spectrum
|
||||||
|
peak.magnitudes >> whitening.magnitudes
|
||||||
|
peak.frequencies >> whitening.frequencies
|
||||||
|
peak.frequencies >> hpcp.frequencies
|
||||||
|
whitening.magnitudes >> hpcp.magnitudes
|
||||||
|
hpcp.hpcp >> sim_matrix.queryFeature
|
||||||
|
sim_matrix.csm >> alignment.inputArray
|
||||||
|
alignment.scoreMatrix >> (pool, 'scoreMatrix')
|
||||||
|
alignment.distance >> (pool, 'distance')
|
||||||
|
|
||||||
|
# Run the algorithm network
|
||||||
|
run(audio)
|
||||||
|
# This process will stdout the cover song similarity distance
|
||||||
|
# for every input stream in realtime.
|
||||||
|
# It also aggregrates the Smith-Waterman alignment score matrix
|
||||||
|
# and cover song similarity distance for every accumulating
|
||||||
|
# input audio stream in an essentia pool instance (similar to a python dict)
|
||||||
|
# which can be accessed after the end of the stream.
|
||||||
|
|
||||||
|
# Now, let's check the final cover song similarity distance value
|
||||||
|
# computed at the last input stream.
|
||||||
|
print(pool['distance'][-1])
|
||||||
|
|
14
song_to_test
Normal file
14
song_to_test
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
https://www.youtube.com/watch?v=TQemQRL_YVQ # yesterday original
|
||||||
|
https://www.youtube.com/watch?v=sGSZA6mYo4c # yesterday cover 1
|
||||||
|
https://www.youtube.com/watch?v=Dyjrnxj70dU # yesterday cover 2
|
||||||
|
|
||||||
|
https://youtu.be/EzRtlhjyNZM # gangsta rap
|
||||||
|
https://youtu.be/mm_PH5BadTk # gangsta rap
|
||||||
|
|
||||||
|
https://youtu.be/26Nuj6dhte8 # Georges Brassens - La Mauvaise Réputation
|
||||||
|
|
||||||
|
https://youtu.be/i2wmKcBm4Ik # Jacques Brel - Ne Me Quitte Pas
|
||||||
|
|
||||||
|
https://youtu.be/nUE80DTNxK4 # Barbara - Dis, quand reviendras-tu
|
||||||
|
|
||||||
|
https://youtu.be/UGtKGX8B9hU # le cafe - oldelaf _ future shorts
|
78
todo
78
todo
@ -35,4 +35,80 @@ Pour ca il faudrait:
|
|||||||
* comprendre un peu la logique du streaming avec essentia
|
* comprendre un peu la logique du streaming avec essentia
|
||||||
* refaire l'exemple em mode streaming
|
* refaire l'exemple em mode streaming
|
||||||
|
|
||||||
?? Est-ce que ca va etre rapide a s'executer ??
|
?? Est-ce que ca va etre rapide a s'execute ??
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
Bon en fait j'ai trouver le code d'exemple don j'ai besoin.
|
||||||
|
Ca commence part recuperer en mode standar la description d'un fichier sonore.
|
||||||
|
Puis avec un input en mode stream ca compart la distance avec la chansson.
|
||||||
|
|
||||||
|
Ce qu'il reste a faire:
|
||||||
|
* avoir un script pour telecharger les musique a tester.
|
||||||
|
* avoir un input type micro
|
||||||
|
* avoir une entree avec jack (jackd)
|
||||||
|
* Faire tourner plusieur processus pour pouvoir annalyser plusieurs track en meme temps.
|
||||||
|
*
|
||||||
|
|
||||||
|
1) un scritp qui telecharge les son:
|
||||||
|
|
||||||
|
J'ai besoin d'un fichier de config qui telecharge les musique en extrayan le son
|
||||||
|
et les place dans le bon dossier. Et dans un format que je peut lire avec essentia.
|
||||||
|
|
||||||
|
* avoir un
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
|
||||||
|
Les ellement pour la config:
|
||||||
|
|
||||||
|
--config-location PATH Location of the configuration file;
|
||||||
|
either the path to the config or its
|
||||||
|
containing directory.
|
||||||
|
|
||||||
|
-a, --batch-file FILE File containing URLs to download ('-'
|
||||||
|
for stdin), one URL per line. Lines
|
||||||
|
starting with '#', ';' or ']' are
|
||||||
|
considered as comments and ignored.
|
||||||
|
|
||||||
|
-c, --continue Force resume of partially downloaded
|
||||||
|
files. By default, youtube-dl will
|
||||||
|
|
||||||
|
|
||||||
|
--encoding ENCODING Force the specified encoding
|
||||||
|
(experimental) resume downloads if possible.
|
||||||
|
|
||||||
|
-x, --extract-audio Convert video files to audio-only files
|
||||||
|
(requires ffmpeg/avconv and
|
||||||
|
ffprobe/avprobe)
|
||||||
|
|
||||||
|
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||||
|
"flac", "mp3", "m4a", "opus", "vorbis",
|
||||||
|
or "wav"; "best" by default; No effect
|
||||||
|
without -x
|
||||||
|
|
||||||
|
-w, --no-overwrites Do not overwrite files
|
||||||
|
|
||||||
|
|
||||||
|
vv -E-X-M-P-L-E- -C-O-N-F-I-G- -F-I-L-E- vv
|
||||||
|
|
||||||
|
# Lines starting with # are comments
|
||||||
|
|
||||||
|
# Always extract audio
|
||||||
|
-x
|
||||||
|
|
||||||
|
# Do not copy the mtime
|
||||||
|
--no-mtime
|
||||||
|
|
||||||
|
# Use this proxy
|
||||||
|
--proxy 127.0.0.1:3128
|
||||||
|
|
||||||
|
# Save all videos under Movies directory in your home directory
|
||||||
|
-o ~/Movies/%(title)s.%(ext)s
|
||||||
|
^^ - - - - - - - - - - - - - - - - - - - ^^
|
||||||
|
|
||||||
|
to run download:
|
||||||
|
$> youtube-dl --config-location config_youtube-dl
|
||||||
|
|
||||||
|
Dependance:
|
||||||
|
youtube-dl: sudo apt-get install -y ffmpeg
|
||||||
|
Loading…
Reference in New Issue
Block a user