feat: cover song identification via stream
I found a tuto with the streaming way to performe cover song identification. I also add a way to download some sound with youtube dl. to download musique now just type: youtube-dl --config-location config_youtube-dl
This commit is contained in:
parent
76a85a75c8
commit
3c4d7b7299
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
*.sw*
|
||||
*.mp3
|
||||
|
6
config_youtube-dl
Normal file
6
config_youtube-dl
Normal file
@ -0,0 +1,6 @@
|
||||
--batch-file song_to_test
|
||||
--no-overwrites
|
||||
--continue
|
||||
--extract-audio
|
||||
--audio-format mp3
|
||||
-o ./audio/%(title)s.%(ext)s
|
@ -1,81 +0,0 @@
|
||||
## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
|
||||
##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
|
||||
|
||||
import essentia.standard as estd
|
||||
from essentia.pytools.spectral import hpcpgram
|
||||
|
||||
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||||
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||||
yesterday_cover_02 = 'audio/The Beatles - Yesterday Saxophone Cover Alexandra Ilieva Thomann.mp3'
|
||||
different_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
|
||||
|
||||
# query cover song
|
||||
original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
|
||||
true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
|
||||
true_cover_02 = estd.MonoLoader(filename=yesterday_cover_02, sampleRate=32000)()
|
||||
|
||||
# wrong match
|
||||
false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
|
||||
|
||||
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||||
query_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||||
true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
|
||||
true_cover_hpcp_2 = hpcpgram(true_cover_02, sampleRate=32000)
|
||||
false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
|
||||
|
||||
## Next steps are done using the essentia ChromaCrossSimilarity function,
|
||||
##
|
||||
## Stacking input features
|
||||
##
|
||||
## Key invariance using Optimal Transposition Index (OTI) [3].
|
||||
##
|
||||
## Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
|
||||
|
||||
crp = estd.ChromaCrossSimilarity(frameStackSize=9,
|
||||
frameStackStride=1,
|
||||
binarizePercentile=0.095,
|
||||
oti=True)
|
||||
|
||||
true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
|
||||
true_pair_crp_2 = crp(query_hpcp, true_cover_hpcp_2)
|
||||
|
||||
## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
|
||||
|
||||
false_pair_crp = crp(query_hpcp, false_cover_hpcp)
|
||||
|
||||
## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
|
||||
|
||||
csm = estd.ChromaCrossSimilarity(frameStackSize=9,
|
||||
frameStackStride=1,
|
||||
binarizePercentile=0.095,
|
||||
oti=True,
|
||||
otiBinary=True)
|
||||
|
||||
oti_csm = csm(query_hpcp, false_cover_hpcp)
|
||||
|
||||
|
||||
## Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
|
||||
##
|
||||
## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’
|
||||
|
||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
||||
disExtension=0.5,
|
||||
alignmentType='serra09',
|
||||
distanceType='asymmetric')(true_pair_crp_1)
|
||||
print('Cover song similarity distance: %s' % distance)
|
||||
|
||||
## other similar
|
||||
|
||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
||||
disExtension=0.5,
|
||||
alignmentType='serra09',
|
||||
distanceType='asymmetric')(true_pair_crp_2)
|
||||
print('Cover song similarity distance: %s' % distance)
|
||||
|
||||
## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
|
||||
|
||||
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
||||
disExtension=0.5,
|
||||
alignmentType='serra09',
|
||||
distanceType='asymmetric')(false_pair_crp)
|
||||
print('Cover song similarity distance: %s' % distance)
|
101
cover_song_stream.py
Normal file
101
cover_song_stream.py
Normal file
@ -0,0 +1,101 @@
|
||||
## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
|
||||
|
||||
#################
|
||||
# standard part #
|
||||
#################
|
||||
|
||||
|
||||
import essentia.standard as estd
|
||||
from essentia.pytools.spectral import hpcpgram
|
||||
|
||||
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||||
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||||
wrong_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
|
||||
|
||||
song_reference = yesterday_original
|
||||
|
||||
# query cover song
|
||||
original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
|
||||
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||||
true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||||
|
||||
|
||||
#################
|
||||
# Straming part #
|
||||
#################
|
||||
|
||||
import essentia.streaming as estr
|
||||
from essentia import array, run, Pool
|
||||
|
||||
query_filename = wrong_song
|
||||
|
||||
# Let's instantiate all the required essentia streaming algorithms
|
||||
|
||||
audio = estr.MonoLoader(filename=query_filename, sampleRate=32000)
|
||||
|
||||
frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
|
||||
|
||||
windowing = estr.Windowing(type="blackmanharris62")
|
||||
|
||||
spectrum = estr.Spectrum();
|
||||
|
||||
peak = estr.SpectralPeaks(sampleRate=32000)
|
||||
|
||||
whitening = estr.SpectralWhitening(maxFrequency=3500,
|
||||
sampleRate=32000);
|
||||
|
||||
hpcp = estr.HPCP(sampleRate=32000,
|
||||
minFrequency=100,
|
||||
maxFrequency=3500,
|
||||
size=12);
|
||||
|
||||
# Create an instance of streaming ChromaCrossSimilarity algorithm
|
||||
# With parameter `referenceFeature`,
|
||||
# we can pass the pre-computed reference song chroma features.
|
||||
# In this case, we use the pre-computed HPCP feature
|
||||
# of the 'true_cover_song'.
|
||||
# With parameter `oti`, we can tranpose the pitch
|
||||
# of the reference song HPCP feature
|
||||
# to an given OTI [5] (if it's known before hand).
|
||||
# By default we set `oti=0`
|
||||
sim_matrix = estr.ChromaCrossSimilarity(
|
||||
referenceFeature=true_cover_hpcp,
|
||||
oti=0)
|
||||
|
||||
# Create an instance of the cover song similarity alignment algorithm
|
||||
# 'pipeDistance=True' stdout distance values for each input stream
|
||||
alignment = estr.CoverSongSimilarity(pipeDistance=True)
|
||||
|
||||
# essentia Pool instance (python dict like object) to aggregrate the outputs
|
||||
pool = Pool()
|
||||
|
||||
# Connect all the required algorithms in a essentia streaming network
|
||||
# ie., connecting inputs and outputs of the algorithms
|
||||
# in the required workflow and order
|
||||
audio.audio >> frame_cutter.signal
|
||||
frame_cutter.frame >> windowing.frame
|
||||
windowing.frame >> spectrum.frame
|
||||
spectrum.spectrum >> peak.spectrum
|
||||
spectrum.spectrum >> whitening.spectrum
|
||||
peak.magnitudes >> whitening.magnitudes
|
||||
peak.frequencies >> whitening.frequencies
|
||||
peak.frequencies >> hpcp.frequencies
|
||||
whitening.magnitudes >> hpcp.magnitudes
|
||||
hpcp.hpcp >> sim_matrix.queryFeature
|
||||
sim_matrix.csm >> alignment.inputArray
|
||||
alignment.scoreMatrix >> (pool, 'scoreMatrix')
|
||||
alignment.distance >> (pool, 'distance')
|
||||
|
||||
# Run the algorithm network
|
||||
run(audio)
|
||||
# This process will stdout the cover song similarity distance
|
||||
# for every input stream in realtime.
|
||||
# It also aggregrates the Smith-Waterman alignment score matrix
|
||||
# and cover song similarity distance for every accumulating
|
||||
# input audio stream in an essentia pool instance (similar to a python dict)
|
||||
# which can be accessed after the end of the stream.
|
||||
|
||||
# Now, let's check the final cover song similarity distance value
|
||||
# computed at the last input stream.
|
||||
print(pool['distance'][-1])
|
||||
|
14
song_to_test
Normal file
14
song_to_test
Normal file
@ -0,0 +1,14 @@
|
||||
https://www.youtube.com/watch?v=TQemQRL_YVQ # yesterday original
|
||||
https://www.youtube.com/watch?v=sGSZA6mYo4c # yesterday cover 1
|
||||
https://www.youtube.com/watch?v=Dyjrnxj70dU # yesterday cover 2
|
||||
|
||||
https://youtu.be/EzRtlhjyNZM # gangsta rap
|
||||
https://youtu.be/mm_PH5BadTk # gangsta rap
|
||||
|
||||
https://youtu.be/26Nuj6dhte8 # Georges Brassens - La Mauvaise Réputation
|
||||
|
||||
https://youtu.be/i2wmKcBm4Ik # Jacques Brel - Ne Me Quitte Pas
|
||||
|
||||
https://youtu.be/nUE80DTNxK4 # Barbara - Dis, quand reviendras-tu
|
||||
|
||||
https://youtu.be/UGtKGX8B9hU # le cafe - oldelaf _ future shorts
|
78
todo
78
todo
@ -35,4 +35,80 @@ Pour ca il faudrait:
|
||||
* comprendre un peu la logique du streaming avec essentia
|
||||
* refaire l'exemple em mode streaming
|
||||
|
||||
?? Est-ce que ca va etre rapide a s'executer ??
|
||||
?? Est-ce que ca va etre rapide a s'execute ??
|
||||
|
||||
#################################################
|
||||
|
||||
Bon en fait j'ai trouver le code d'exemple don j'ai besoin.
|
||||
Ca commence part recuperer en mode standar la description d'un fichier sonore.
|
||||
Puis avec un input en mode stream ca compart la distance avec la chansson.
|
||||
|
||||
Ce qu'il reste a faire:
|
||||
* avoir un script pour telecharger les musique a tester.
|
||||
* avoir un input type micro
|
||||
* avoir une entree avec jack (jackd)
|
||||
* Faire tourner plusieur processus pour pouvoir annalyser plusieurs track en meme temps.
|
||||
*
|
||||
|
||||
1) un scritp qui telecharge les son:
|
||||
|
||||
J'ai besoin d'un fichier de config qui telecharge les musique en extrayan le son
|
||||
et les place dans le bon dossier. Et dans un format que je peut lire avec essentia.
|
||||
|
||||
* avoir un
|
||||
*
|
||||
*
|
||||
*
|
||||
|
||||
Les ellement pour la config:
|
||||
|
||||
--config-location PATH Location of the configuration file;
|
||||
either the path to the config or its
|
||||
containing directory.
|
||||
|
||||
-a, --batch-file FILE File containing URLs to download ('-'
|
||||
for stdin), one URL per line. Lines
|
||||
starting with '#', ';' or ']' are
|
||||
considered as comments and ignored.
|
||||
|
||||
-c, --continue Force resume of partially downloaded
|
||||
files. By default, youtube-dl will
|
||||
|
||||
|
||||
--encoding ENCODING Force the specified encoding
|
||||
(experimental) resume downloads if possible.
|
||||
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg/avconv and
|
||||
ffprobe/avprobe)
|
||||
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"flac", "mp3", "m4a", "opus", "vorbis",
|
||||
or "wav"; "best" by default; No effect
|
||||
without -x
|
||||
|
||||
-w, --no-overwrites Do not overwrite files
|
||||
|
||||
|
||||
vv -E-X-M-P-L-E- -C-O-N-F-I-G- -F-I-L-E- vv
|
||||
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
|
||||
# Save all videos under Movies directory in your home directory
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
^^ - - - - - - - - - - - - - - - - - - - ^^
|
||||
|
||||
to run download:
|
||||
$> youtube-dl --config-location config_youtube-dl
|
||||
|
||||
Dependance:
|
||||
youtube-dl: sudo apt-get install -y ffmpeg
|
||||
|
Loading…
Reference in New Issue
Block a user