## tutorial from: https://essentia.upf.edu/essentia_python_examples.html # ##Cover song identification (CSI) in MIR is a task of identifying when two musical recordings are derived from the same music composition. The cover of a song can be drastically different from the original recording. It can change key, tempo, instrumentation, musical structure or order, etc. ## ##Essentia provides open-source implmentation of some state-of-the-art cover song identification algorithms. The following process-chain is required to use this CSI algorithms. ## ## Tonal feature extraction. Mostly used by chroma features. Here we use HPCP. ## ## Post-processing of the features to achieve invariance (eg. key) [3]. ## ## Cross similarity matrix computation ([1] or [2]). ## ## Local sub-sequence alignment to compute the pairwise cover song similarity distance [1]. ## ##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia. import essentia.standard as estd from essentia.pytools.spectral import hpcpgram import IPython yesterday_original = 'audio/Yesterday (Remastered 2009).mp3' yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3' different_song = 'audio/Jacques Brel - Ne Me Quitte Pas.mp3' IPython.display.Audio(yesterday_original) IPython.display.Audio(yesterday_cover_01) IPython.display.Audio(different_song) # query cover song original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)() true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)() # wrong match false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)() ## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals. query_hpcp = hpcpgram(original_song, sampleRate=32000) true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000) false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000) ## plotting the hpcp features #%matplotlib inline import matplotlib.pyplot as plt fig = plt.gcf() fig.set_size_inches(14.5, 4.5) plt.title("Query song HPCP") plt.imshow(query_hpcp[:500].T, aspect='auto', origin='lower', interpolation='none') ## Next steps are done using the essentia ChromaCrossSimilarity function, ## ## Stacking input features ## ## Key invariance using Optimal Transposition Index (OTI) [3]. ## ## Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3] crp = estd.ChromaCrossSimilarity(frameStackSize=9, frameStackStride=1, binarizePercentile=0.095, oti=True) true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1) fig = plt.gcf() fig.set_size_inches(15.5, 5.5) plt.title('Cross recurrent plot [1]') plt.xlabel('Yesterday accapella cover') plt.ylabel('Yesterday - The Beatles') plt.imshow(true_pair_crp_1, origin='lower') ## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs crp = estd.ChromaCrossSimilarity(frameStackSize=9, frameStackStride=1, binarizePercentile=0.095, oti=True) false_pair_crp = crp(query_hpcp, false_cover_hpcp) fig = plt.gcf() fig.set_size_inches(15.5, 5.5) plt.title('Cross recurrent plot [1]') plt.xlabel('Come together cover - Aerosmith') plt.ylabel('Yesterday - The Beatles') plt.imshow(false_pair_crp, origin='lower') ## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features. csm = estd.ChromaCrossSimilarity(frameStackSize=9, frameStackStride=1, binarizePercentile=0.095, oti=True, otiBinary=True) oti_csm = csm(query_hpcp, false_cover_hpcp) fig = plt.gcf() fig.set_size_inches(15.5, 5.5) plt.title('Cross similarity matrix using OTI binary method [2]') plt.xlabel('Come together cover - Aerosmith') plt.ylabel('Yesterday - The Beatles') plt.imshow(oti_csm, origin='lower') ## Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17). ## ## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’ score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5, disExtension=0.5, alignmentType='serra09', distanceType='asymmetric')(true_pair_crp_1) fig = plt.gcf() fig.set_size_inches(15.5, 5.5) plt.title('Cover song similarity distance: %s' % distance) plt.xlabel('Yesterday accapella cover') plt.ylabel('Yesterday - The Beatles') plt.imshow(score_matrix, origin='lower') print('Cover song similarity distance: %s' % distance) ## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith. score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5, disExtension=0.5, alignmentType='serra09', distanceType='asymmetric')(false_pair_crp) fig = plt.gcf() fig.set_size_inches(15.5, 5.5) plt.title('Cover song similarity distance: %s' % distance) plt.xlabel('Yesterday accapella cover') plt.ylabel('Come together cover - Aerosmith') plt.imshow(score_matrix, origin='lower') print('Cover song similarity distance: %s' % distance)