102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
|
||
|
||
#################
|
||
# standard part #
|
||
#################
|
||
|
||
|
||
import essentia.standard as estd
|
||
from essentia.pytools.spectral import hpcpgram
|
||
|
||
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||
wrong_song = 'audio/Jacques Brel - Ne Me Quitte Pas.mp3'
|
||
|
||
song_reference = yesterday_original # the original song analysed in normal mode
|
||
song_streaming = wrong_song # the song get in stream mode to compare to reference
|
||
|
||
# query cover song
|
||
original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
|
||
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||
true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||
|
||
|
||
#################
|
||
# Straming part #
|
||
#################
|
||
|
||
import essentia.streaming as estr
|
||
from essentia import array, run, Pool
|
||
|
||
|
||
# Let's instantiate all the required essentia streaming algorithms
|
||
|
||
audio = estr.MonoLoader(filename=song_streaming, sampleRate=32000)
|
||
|
||
frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
|
||
|
||
windowing = estr.Windowing(type="blackmanharris62")
|
||
|
||
spectrum = estr.Spectrum();
|
||
|
||
peak = estr.SpectralPeaks(sampleRate=32000)
|
||
|
||
whitening = estr.SpectralWhitening(maxFrequency=3500,
|
||
sampleRate=32000);
|
||
|
||
hpcp = estr.HPCP(sampleRate=32000,
|
||
minFrequency=100,
|
||
maxFrequency=3500,
|
||
size=12);
|
||
|
||
# Create an instance of streaming ChromaCrossSimilarity algorithm
|
||
# With parameter `referenceFeature`,
|
||
# we can pass the pre-computed reference song chroma features.
|
||
# In this case, we use the pre-computed HPCP feature
|
||
# of the 'true_cover_song'.
|
||
# With parameter `oti`, we can tranpose the pitch
|
||
# of the reference song HPCP feature
|
||
# to an given OTI [5] (if it's known before hand).
|
||
# By default we set `oti=0`
|
||
sim_matrix = estr.ChromaCrossSimilarity(
|
||
referenceFeature=true_cover_hpcp,
|
||
oti=0)
|
||
|
||
# Create an instance of the cover song similarity alignment algorithm
|
||
# 'pipeDistance=True' stdout distance values for each input stream
|
||
alignment = estr.CoverSongSimilarity(pipeDistance=True)
|
||
|
||
# essentia Pool instance (python dict like object) to aggregrate the outputs
|
||
pool = Pool()
|
||
|
||
# Connect all the required algorithms in a essentia streaming network
|
||
# ie., connecting inputs and outputs of the algorithms
|
||
# in the required workflow and order
|
||
audio.audio >> frame_cutter.signal
|
||
frame_cutter.frame >> windowing.frame
|
||
windowing.frame >> spectrum.frame
|
||
spectrum.spectrum >> peak.spectrum
|
||
spectrum.spectrum >> whitening.spectrum
|
||
peak.magnitudes >> whitening.magnitudes
|
||
peak.frequencies >> whitening.frequencies
|
||
peak.frequencies >> hpcp.frequencies
|
||
whitening.magnitudes >> hpcp.magnitudes
|
||
hpcp.hpcp >> sim_matrix.queryFeature
|
||
sim_matrix.csm >> alignment.inputArray
|
||
alignment.scoreMatrix >> (pool, 'scoreMatrix')
|
||
alignment.distance >> (pool, 'distance')
|
||
|
||
# Run the algorithm network
|
||
run(audio)
|
||
# This process will stdout the cover song similarity distance
|
||
# for every input stream in realtime.
|
||
# It also aggregrates the Smith-Waterman alignment score matrix
|
||
# and cover song similarity distance for every accumulating
|
||
# input audio stream in an essentia pool instance (similar to a python dict)
|
||
# which can be accessed after the end of the stream.
|
||
|
||
# Now, let's check the final cover song similarity distance value
|
||
# computed at the last input stream.
|
||
print(pool['distance'][-1])
|
||
|