sound_annalysis/cover_song_stream.py

102 lines
3.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
#################
# standard part #
#################
import essentia.standard as estd
from essentia.pytools.spectral import hpcpgram
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
wrong_song = 'audio/Jacques Brel - Ne Me Quitte Pas.mp3'
song_reference = yesterday_original # the original song analysed in normal mode
song_streaming = wrong_song # the song get in stream mode to compare to reference
# query cover song
original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
## Now lets compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
#################
# Straming part #
#################
import essentia.streaming as estr
from essentia import array, run, Pool
# Let's instantiate all the required essentia streaming algorithms
audio = estr.MonoLoader(filename=song_streaming, sampleRate=32000)
frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
windowing = estr.Windowing(type="blackmanharris62")
spectrum = estr.Spectrum();
peak = estr.SpectralPeaks(sampleRate=32000)
whitening = estr.SpectralWhitening(maxFrequency=3500,
sampleRate=32000);
hpcp = estr.HPCP(sampleRate=32000,
minFrequency=100,
maxFrequency=3500,
size=12);
# Create an instance of streaming ChromaCrossSimilarity algorithm
# With parameter `referenceFeature`,
# we can pass the pre-computed reference song chroma features.
# In this case, we use the pre-computed HPCP feature
# of the 'true_cover_song'.
# With parameter `oti`, we can tranpose the pitch
# of the reference song HPCP feature
# to an given OTI [5] (if it's known before hand).
# By default we set `oti=0`
sim_matrix = estr.ChromaCrossSimilarity(
referenceFeature=true_cover_hpcp,
oti=0)
# Create an instance of the cover song similarity alignment algorithm
# 'pipeDistance=True' stdout distance values for each input stream
alignment = estr.CoverSongSimilarity(pipeDistance=True)
# essentia Pool instance (python dict like object) to aggregrate the outputs
pool = Pool()
# Connect all the required algorithms in a essentia streaming network
# ie., connecting inputs and outputs of the algorithms
# in the required workflow and order
audio.audio >> frame_cutter.signal
frame_cutter.frame >> windowing.frame
windowing.frame >> spectrum.frame
spectrum.spectrum >> peak.spectrum
spectrum.spectrum >> whitening.spectrum
peak.magnitudes >> whitening.magnitudes
peak.frequencies >> whitening.frequencies
peak.frequencies >> hpcp.frequencies
whitening.magnitudes >> hpcp.magnitudes
hpcp.hpcp >> sim_matrix.queryFeature
sim_matrix.csm >> alignment.inputArray
alignment.scoreMatrix >> (pool, 'scoreMatrix')
alignment.distance >> (pool, 'distance')
# Run the algorithm network
run(audio)
# This process will stdout the cover song similarity distance
# for every input stream in realtime.
# It also aggregrates the Smith-Waterman alignment score matrix
# and cover song similarity distance for every accumulating
# input audio stream in an essentia pool instance (similar to a python dict)
# which can be accessed after the end of the stream.
# Now, let's check the final cover song similarity distance value
# computed at the last input stream.
print(pool['distance'][-1])