sound_annalysis/cover_song_simplified.py
2021-06-04 23:02:04 +02:00

82 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
import essentia.standard as estd
from essentia.pytools.spectral import hpcpgram
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
yesterday_cover_02 = 'audio/The Beatles - Yesterday Saxophone Cover Alexandra Ilieva Thomann.mp3'
different_song = 'audio/Bella Poarch - Build a Btch (Official Music Video).mp3'
# query cover song
original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
true_cover_02 = estd.MonoLoader(filename=yesterday_cover_02, sampleRate=32000)()
# wrong match
false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
## Now lets compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
query_hpcp = hpcpgram(original_song, sampleRate=32000)
true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
true_cover_hpcp_2 = hpcpgram(true_cover_02, sampleRate=32000)
false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
## Next steps are done using the essentia ChromaCrossSimilarity function,
##
## Stacking input features
##
## Key invariance using Optimal Transposition Index (OTI) [3].
##
## Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
crp = estd.ChromaCrossSimilarity(frameStackSize=9,
frameStackStride=1,
binarizePercentile=0.095,
oti=True)
true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
true_pair_crp_2 = crp(query_hpcp, true_cover_hpcp_2)
## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
false_pair_crp = crp(query_hpcp, false_cover_hpcp)
## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
csm = estd.ChromaCrossSimilarity(frameStackSize=9,
frameStackStride=1,
binarizePercentile=0.095,
oti=True,
otiBinary=True)
oti_csm = csm(query_hpcp, false_cover_hpcp)
## Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
##
## Computing cover song similarity distance between Yesterday - accapella cover and Yesterday - The Beatles
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
disExtension=0.5,
alignmentType='serra09',
distanceType='asymmetric')(true_pair_crp_1)
print('Cover song similarity distance: %s' % distance)
## other similar
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
disExtension=0.5,
alignmentType='serra09',
distanceType='asymmetric')(true_pair_crp_2)
print('Cover song similarity distance: %s' % distance)
## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
disExtension=0.5,
alignmentType='serra09',
distanceType='asymmetric')(false_pair_crp)
print('Cover song similarity distance: %s' % distance)