feat: cover song identification via stream

I found a tuto with the streaming way to performe cover song identification. I also add a way to download some sound with youtube dl. to download musique now just type: youtube-dl --config-location config_youtube-dl
2021-06-05 21:51:04 +02:00 · 2021-06-05 21:51:04 +02:00 · 3c4d7b7299
commit 3c4d7b7299
parent 76a85a75c8
6 changed files with 199 additions and 82 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 *.sw*
 *.mp3
--- a/6
+++ b/6
@ -0,0 +1,6 @@
 --batch-file song_to_test
 --no-overwrites
 --continue
 --extract-audio
 --audio-format mp3
 -o ./audio/%(title)s.%(ext)s
--- a/cover_song_simplified.py
+++ b/cover_song_simplified.py
@ -1,81 +0,0 @@
 ## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
 ##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
 import essentia.standard as estd
 from essentia.pytools.spectral import hpcpgram
 yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
 yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
 yesterday_cover_02 = 'audio/The Beatles - Yesterday Saxophone Cover Alexandra Ilieva Thomann.mp3'
 different_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
 # query cover song
 original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
 true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
 true_cover_02 = estd.MonoLoader(filename=yesterday_cover_02, sampleRate=32000)()
 # wrong match
 false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
 ## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
 query_hpcp = hpcpgram(original_song, sampleRate=32000)
 true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
 true_cover_hpcp_2 = hpcpgram(true_cover_02, sampleRate=32000)
 false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
 ## Next steps are done using the essentia ChromaCrossSimilarity function,
 ## 
 ##     Stacking input features
 ## 
 ##     Key invariance using Optimal Transposition Index (OTI) [3].
 ## 
 ##     Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
 crp = estd.ChromaCrossSimilarity(frameStackSize=9,
                                 frameStackStride=1,
                                 binarizePercentile=0.095,
                                 oti=True)
 true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
 true_pair_crp_2 = crp(query_hpcp, true_cover_hpcp_2)
 ## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
 false_pair_crp = crp(query_hpcp, false_cover_hpcp)
 ## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
 csm = estd.ChromaCrossSimilarity(frameStackSize=9,
                                 frameStackStride=1,
                                 binarizePercentile=0.095,
                                 oti=True,
                                 otiBinary=True)
 oti_csm = csm(query_hpcp, false_cover_hpcp)
 ##     Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
 ## 
 ## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’
 score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
                                                  disExtension=0.5,
                                                  alignmentType='serra09',
                                                  distanceType='asymmetric')(true_pair_crp_1)
 print('Cover song similarity distance: %s' % distance)
 ## other similar
 score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
                                                  disExtension=0.5,
                                                  alignmentType='serra09',
                                                  distanceType='asymmetric')(true_pair_crp_2)
 print('Cover song similarity distance: %s' % distance)
 ## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
 score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
                                                  disExtension=0.5,
                                                  alignmentType='serra09',
                                                  distanceType='asymmetric')(false_pair_crp)
 print('Cover song similarity distance: %s' % distance)
--- a/cover_song_stream.py
+++ b/cover_song_stream.py
@ -0,0 +1,101 @@
 ## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
 #################
 # standard part #
 #################
 import essentia.standard as estd
 from essentia.pytools.spectral import hpcpgram
 yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
 yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
 wrong_song         = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
 song_reference = yesterday_original
 # query cover song
 original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
 ## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
 true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
 #################
 # Straming part #
 #################
 import essentia.streaming as estr
 from essentia import array, run, Pool
 query_filename = wrong_song
 # Let's instantiate all the required essentia streaming algorithms
 audio = estr.MonoLoader(filename=query_filename, sampleRate=32000)
 frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
 windowing = estr.Windowing(type="blackmanharris62")
 spectrum  = estr.Spectrum();
 peak = estr.SpectralPeaks(sampleRate=32000)
 whitening = estr.SpectralWhitening(maxFrequency=3500,
                                sampleRate=32000);
 hpcp = estr.HPCP(sampleRate=32000,
                 minFrequency=100,
                 maxFrequency=3500,
                 size=12);
 # Create an instance of streaming ChromaCrossSimilarity algorithm
 # With parameter `referenceFeature`, 
 # we can pass the pre-computed reference song chroma features.
 # In this case, we use the pre-computed HPCP feature 
 # of the 'true_cover_song'.
 # With parameter `oti`, we can tranpose the pitch 
 # of the reference song HPCP feature
 # to an given OTI [5] (if it's known before hand).
 # By default we set `oti=0`
 sim_matrix = estr.ChromaCrossSimilarity(
                referenceFeature=true_cover_hpcp,
                oti=0)
 # Create an instance of the cover song similarity alignment algorithm 
 # 'pipeDistance=True' stdout distance values for each input stream
 alignment = estr.CoverSongSimilarity(pipeDistance=True)
 # essentia Pool instance (python dict like object) to aggregrate the outputs  
 pool = Pool()
 # Connect all the required algorithms in a essentia streaming network
 # ie., connecting inputs and outputs of the algorithms 
 # in the required workflow and order
 audio.audio >> frame_cutter.signal
 frame_cutter.frame >> windowing.frame
 windowing.frame >> spectrum.frame
 spectrum.spectrum >> peak.spectrum
 spectrum.spectrum >> whitening.spectrum
 peak.magnitudes >> whitening.magnitudes
 peak.frequencies >> whitening.frequencies
 peak.frequencies >> hpcp.frequencies 
 whitening.magnitudes >> hpcp.magnitudes
 hpcp.hpcp >> sim_matrix.queryFeature
 sim_matrix.csm >> alignment.inputArray
 alignment.scoreMatrix >> (pool, 'scoreMatrix')
 alignment.distance >> (pool, 'distance')
 # Run the algorithm network
 run(audio)
 # This process will stdout the cover song similarity distance 
 # for every input stream in realtime.
 # It also aggregrates the Smith-Waterman alignment score matrix 
 # and cover song similarity distance for every accumulating 
 # input audio stream in an essentia pool instance (similar to a python dict) 
 # which can be accessed after the end of the stream.
 # Now, let's check the final cover song similarity distance value 
 # computed at the last input stream.
 print(pool['distance'][-1])
--- a/14
+++ b/14
@ -0,0 +1,14 @@
 https://www.youtube.com/watch?v=TQemQRL_YVQ # yesterday original
 https://www.youtube.com/watch?v=sGSZA6mYo4c # yesterday cover 1
 https://www.youtube.com/watch?v=Dyjrnxj70dU # yesterday cover 2
 https://youtu.be/EzRtlhjyNZM  # gangsta rap
 https://youtu.be/mm_PH5BadTk  # gangsta rap
 https://youtu.be/26Nuj6dhte8 # Georges Brassens - La Mauvaise Réputation
 https://youtu.be/i2wmKcBm4Ik # Jacques Brel - Ne Me Quitte Pas
 https://youtu.be/nUE80DTNxK4 # Barbara - Dis, quand reviendras-tu
 https://youtu.be/UGtKGX8B9hU # le cafe - oldelaf _ future shorts
--- a/78
+++ b/78
@ -35,4 +35,80 @@ Pour ca il faudrait:
    * comprendre un peu la logique du streaming avec essentia
    * refaire l'exemple em mode streaming
-?? Est-ce que ca va etre rapide a s'executer ??
+?? Est-ce que ca va etre rapide a s'execute ??
 #################################################
 Bon en fait j'ai trouver le code d'exemple don j'ai besoin.
 Ca commence part recuperer en mode standar la description d'un fichier sonore.
 Puis avec un input en mode stream ca compart la distance avec la chansson.
 Ce qu'il reste a faire:
    * avoir un script pour telecharger les musique a tester.
    * avoir un input type micro
    * avoir une entree avec jack (jackd)
    * Faire tourner plusieur processus pour pouvoir annalyser plusieurs track en meme temps.
    * 
 1) un scritp qui telecharge les son:
 J'ai besoin d'un fichier de config qui telecharge les musique en extrayan le son
 et les place dans le bon dossier. Et dans un format que je peut lire avec essentia.
    * avoir un 
    * 
    * 
    * 
 Les ellement pour la config:
 --config-location PATH               Location of the configuration file;
                                     either the path to the config or its
                                     containing directory.
 -a, --batch-file FILE                File containing URLs to download ('-'
                                     for stdin), one URL per line. Lines
                                     starting with '#', ';' or ']' are
                                     considered as comments and ignored.
 -c, --continue                       Force resume of partially downloaded
                                     files. By default, youtube-dl will
 --encoding ENCODING                  Force the specified encoding
                                     (experimental)                                   resume downloads if possible.
 -x, --extract-audio                  Convert video files to audio-only files
                                     (requires ffmpeg/avconv and
                                     ffprobe/avprobe)
 --audio-format FORMAT                Specify audio format: "best", "aac",
                                     "flac", "mp3", "m4a", "opus", "vorbis",
                                     or "wav"; "best" by default; No effect
                                     without -x
 -w, --no-overwrites                  Do not overwrite files
 vv -E-X-M-P-L-E- -C-O-N-F-I-G- -F-I-L-E- vv
 # Lines starting with # are comments
 # Always extract audio
 -x
 # Do not copy the mtime
 --no-mtime
 # Use this proxy
 --proxy 127.0.0.1:3128
 # Save all videos under Movies directory in your home directory
 -o ~/Movies/%(title)s.%(ext)s
 ^^ - - - - - - - - - - - - - - - - - - - ^^
 to run download:
 $> youtube-dl --config-location config_youtube-dl
 Dependance:
 youtube-dl: sudo apt-get install -y ffmpeg