feat: cover song identification via stream

I found a tuto with the streaming way to performe cover song identification. I also add a way to download some sound with youtube dl. to download musique now just type: youtube-dl --config-location config_youtube-dl
2021-06-05 21:51:04 +02:00 · 2021-06-05 21:51:04 +02:00 · 3c4d7b7299
commit 3c4d7b7299
parent 76a85a75c8
6 changed files with 199 additions and 82 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 *.sw*
+*.mp3
--- a/6
+++ b/6
@ -0,0 +1,6 @@
+--batch-file song_to_test
+--no-overwrites
+--continue
+--extract-audio
+--audio-format mp3
+-o ./audio/%(title)s.%(ext)s
--- a/cover_song_simplified.py
+++ b/cover_song_simplified.py
@ -1,81 +0,0 @@
-## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
-##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
-
-import essentia.standard as estd
-from essentia.pytools.spectral import hpcpgram
-
-yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
-yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
-yesterday_cover_02 = 'audio/The Beatles - Yesterday Saxophone Cover Alexandra Ilieva Thomann.mp3'
-different_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
-
-# query cover song
-original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
-true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
-true_cover_02 = estd.MonoLoader(filename=yesterday_cover_02, sampleRate=32000)()
-
-# wrong match
-false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
-
-## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
-query_hpcp = hpcpgram(original_song, sampleRate=32000)
-true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
-true_cover_hpcp_2 = hpcpgram(true_cover_02, sampleRate=32000)
-false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
-
-## Next steps are done using the essentia ChromaCrossSimilarity function,
-## 
-##     Stacking input features
-## 
-##     Key invariance using Optimal Transposition Index (OTI) [3].
-## 
-##     Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
-
-crp = estd.ChromaCrossSimilarity(frameStackSize=9,
-                                 frameStackStride=1,
-                                 binarizePercentile=0.095,
-                                 oti=True)
-
-true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
-true_pair_crp_2 = crp(query_hpcp, true_cover_hpcp_2)
-
-## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
-
-false_pair_crp = crp(query_hpcp, false_cover_hpcp)
-
-## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
-
-csm = estd.ChromaCrossSimilarity(frameStackSize=9,
-                                 frameStackStride=1,
-                                 binarizePercentile=0.095,
-                                 oti=True,
-                                 otiBinary=True)
-
-oti_csm = csm(query_hpcp, false_cover_hpcp)
-
-
-##     Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
-## 
-## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’
-
-score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
-                                                  disExtension=0.5,
-                                                  alignmentType='serra09',
-                                                  distanceType='asymmetric')(true_pair_crp_1)
-print('Cover song similarity distance: %s' % distance)
-
-## other similar
-
-score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
-                                                  disExtension=0.5,
-                                                  alignmentType='serra09',
-                                                  distanceType='asymmetric')(true_pair_crp_2)
-print('Cover song similarity distance: %s' % distance)
-
-## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
-
-score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
-                                                  disExtension=0.5,
-                                                  alignmentType='serra09',
-                                                  distanceType='asymmetric')(false_pair_crp)
-print('Cover song similarity distance: %s' % distance)
--- a/cover_song_stream.py
+++ b/cover_song_stream.py
@ -0,0 +1,101 @@
+## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
+
+#################
+# standard part #
+#################
+
+
+import essentia.standard as estd
+from essentia.pytools.spectral import hpcpgram
+
+yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
+yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
+wrong_song         = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
+
+song_reference = yesterday_original
+
+# query cover song
+original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
+## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
+true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
+
+
+#################
+# Straming part #
+#################
+
+import essentia.streaming as estr
+from essentia import array, run, Pool
+
+query_filename = wrong_song
+
+# Let's instantiate all the required essentia streaming algorithms
+
+audio = estr.MonoLoader(filename=query_filename, sampleRate=32000)
+  
+frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
+
+windowing = estr.Windowing(type="blackmanharris62")
+
+spectrum  = estr.Spectrum();
+
+peak = estr.SpectralPeaks(sampleRate=32000)
+
+whitening = estr.SpectralWhitening(maxFrequency=3500,
+                                sampleRate=32000);
+
+hpcp = estr.HPCP(sampleRate=32000,
+                 minFrequency=100,
+                 maxFrequency=3500,
+                 size=12);
+
+# Create an instance of streaming ChromaCrossSimilarity algorithm
+# With parameter `referenceFeature`, 
+# we can pass the pre-computed reference song chroma features.
+# In this case, we use the pre-computed HPCP feature 
+# of the 'true_cover_song'.
+# With parameter `oti`, we can tranpose the pitch 
+# of the reference song HPCP feature
+# to an given OTI [5] (if it's known before hand).
+# By default we set `oti=0`
+sim_matrix = estr.ChromaCrossSimilarity(
+                referenceFeature=true_cover_hpcp,
+                oti=0)
+
+# Create an instance of the cover song similarity alignment algorithm 
+# 'pipeDistance=True' stdout distance values for each input stream
+alignment = estr.CoverSongSimilarity(pipeDistance=True)
+
+# essentia Pool instance (python dict like object) to aggregrate the outputs  
+pool = Pool()
+
+# Connect all the required algorithms in a essentia streaming network
+# ie., connecting inputs and outputs of the algorithms 
+# in the required workflow and order
+audio.audio >> frame_cutter.signal
+frame_cutter.frame >> windowing.frame
+windowing.frame >> spectrum.frame
+spectrum.spectrum >> peak.spectrum
+spectrum.spectrum >> whitening.spectrum
+peak.magnitudes >> whitening.magnitudes
+peak.frequencies >> whitening.frequencies
+peak.frequencies >> hpcp.frequencies 
+whitening.magnitudes >> hpcp.magnitudes
+hpcp.hpcp >> sim_matrix.queryFeature
+sim_matrix.csm >> alignment.inputArray
+alignment.scoreMatrix >> (pool, 'scoreMatrix')
+alignment.distance >> (pool, 'distance')
+
+# Run the algorithm network
+run(audio)
+# This process will stdout the cover song similarity distance 
+# for every input stream in realtime.
+# It also aggregrates the Smith-Waterman alignment score matrix 
+# and cover song similarity distance for every accumulating 
+# input audio stream in an essentia pool instance (similar to a python dict) 
+# which can be accessed after the end of the stream.
+
+# Now, let's check the final cover song similarity distance value 
+# computed at the last input stream.
+print(pool['distance'][-1])
+
--- a/14
+++ b/14
@ -0,0 +1,14 @@
+https://www.youtube.com/watch?v=TQemQRL_YVQ # yesterday original
+https://www.youtube.com/watch?v=sGSZA6mYo4c # yesterday cover 1
+https://www.youtube.com/watch?v=Dyjrnxj70dU # yesterday cover 2
+
+https://youtu.be/EzRtlhjyNZM  # gangsta rap
+https://youtu.be/mm_PH5BadTk  # gangsta rap
+
+https://youtu.be/26Nuj6dhte8 # Georges Brassens - La Mauvaise Réputation
+
+https://youtu.be/i2wmKcBm4Ik # Jacques Brel - Ne Me Quitte Pas
+
+https://youtu.be/nUE80DTNxK4 # Barbara - Dis, quand reviendras-tu
+
+https://youtu.be/UGtKGX8B9hU # le cafe - oldelaf _ future shorts
--- a/78
+++ b/78
@ -35,4 +35,80 @@ Pour ca il faudrait:
    * comprendre un peu la logique du streaming avec essentia
    * refaire l'exemple em mode streaming

-?? Est-ce que ca va etre rapide a s'executer ??
+?? Est-ce que ca va etre rapide a s'execute ??
+
+#################################################
+
+Bon en fait j'ai trouver le code d'exemple don j'ai besoin.
+Ca commence part recuperer en mode standar la description d'un fichier sonore.
+Puis avec un input en mode stream ca compart la distance avec la chansson.
+
+Ce qu'il reste a faire:
+    * avoir un script pour telecharger les musique a tester.
+    * avoir un input type micro
+    * avoir une entree avec jack (jackd)
+    * Faire tourner plusieur processus pour pouvoir annalyser plusieurs track en meme temps.
+    * 
+
+1) un scritp qui telecharge les son:
+
+J'ai besoin d'un fichier de config qui telecharge les musique en extrayan le son
+et les place dans le bon dossier. Et dans un format que je peut lire avec essentia.
+
+    * avoir un 
+    * 
+    * 
+    * 
+
+Les ellement pour la config:
+
+--config-location PATH               Location of the configuration file;
+                                     either the path to the config or its
+                                     containing directory.
+
+-a, --batch-file FILE                File containing URLs to download ('-'
+                                     for stdin), one URL per line. Lines
+                                     starting with '#', ';' or ']' are
+                                     considered as comments and ignored.
+
+-c, --continue                       Force resume of partially downloaded
+                                     files. By default, youtube-dl will
+  
+
+--encoding ENCODING                  Force the specified encoding
+                                     (experimental)                                   resume downloads if possible.
+
+-x, --extract-audio                  Convert video files to audio-only files
+                                     (requires ffmpeg/avconv and
+                                     ffprobe/avprobe)
+
+--audio-format FORMAT                Specify audio format: "best", "aac",
+                                     "flac", "mp3", "m4a", "opus", "vorbis",
+                                     or "wav"; "best" by default; No effect
+                                     without -x
+
+-w, --no-overwrites                  Do not overwrite files
+
+
+vv -E-X-M-P-L-E- -C-O-N-F-I-G- -F-I-L-E- vv
+
+# Lines starting with # are comments
+
+# Always extract audio
+-x
+
+# Do not copy the mtime
+--no-mtime
+
+# Use this proxy
+--proxy 127.0.0.1:3128
+
+# Save all videos under Movies directory in your home directory
+-o ~/Movies/%(title)s.%(ext)s
+^^ - - - - - - - - - - - - - - - - - - - ^^
+
+to run download:
+$> youtube-dl --config-location config_youtube-dl
+
+Dependance:
+youtube-dl: sudo apt-get install -y ffmpeg