Compare commits
8 Commits
6048fef636
...
0b78c404d3
Author | SHA1 | Date | |
---|---|---|---|
|
0b78c404d3 | ||
|
23502290e4 | ||
|
33925bbe3c | ||
|
3c4d7b7299 | ||
|
76a85a75c8 | ||
|
4671056cdc | ||
|
dfbc7e9600 | ||
|
ea925614e4 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
*.sw*
|
*.sw*
|
||||||
|
*.mp3
|
||||||
|
6
config_youtube-dl
Normal file
6
config_youtube-dl
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
--batch-file song_to_test
|
||||||
|
--no-overwrites
|
||||||
|
--continue
|
||||||
|
--extract-audio
|
||||||
|
--audio-format mp3
|
||||||
|
-o ./audio/%(title)s.%(ext)s
|
156
cover_song_identification.py
Normal file
156
cover_song_identification.py
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
## tutorial from: https://essentia.upf.edu/essentia_python_examples.html
|
||||||
|
#
|
||||||
|
##Cover song identification (CSI) in MIR is a task of identifying when two musical recordings are derived from the same music composition. The cover of a song can be drastically different from the original recording. It can change key, tempo, instrumentation, musical structure or order, etc.
|
||||||
|
##
|
||||||
|
##Essentia provides open-source implmentation of some state-of-the-art cover song identification algorithms. The following process-chain is required to use this CSI algorithms.
|
||||||
|
##
|
||||||
|
## Tonal feature extraction. Mostly used by chroma features. Here we use HPCP.
|
||||||
|
##
|
||||||
|
## Post-processing of the features to achieve invariance (eg. key) [3].
|
||||||
|
##
|
||||||
|
## Cross similarity matrix computation ([1] or [2]).
|
||||||
|
##
|
||||||
|
## Local sub-sequence alignment to compute the pairwise cover song similarity distance [1].
|
||||||
|
##
|
||||||
|
##In this tutorial, we use HPCP, ChromaCrossSimilarity and CoverSongSimilarity algorithms from essentia.
|
||||||
|
|
||||||
|
import essentia.standard as estd
|
||||||
|
from essentia.pytools.spectral import hpcpgram
|
||||||
|
|
||||||
|
import IPython
|
||||||
|
|
||||||
|
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||||||
|
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||||||
|
different_song = 'audio/Jacques Brel - Ne Me Quitte Pas.mp3'
|
||||||
|
|
||||||
|
IPython.display.Audio(yesterday_original)
|
||||||
|
IPython.display.Audio(yesterday_cover_01)
|
||||||
|
IPython.display.Audio(different_song)
|
||||||
|
|
||||||
|
# query cover song
|
||||||
|
original_song = estd.MonoLoader(filename=yesterday_original, sampleRate=32000)()
|
||||||
|
|
||||||
|
true_cover_01 = estd.MonoLoader(filename=yesterday_cover_01, sampleRate=32000)()
|
||||||
|
|
||||||
|
# wrong match
|
||||||
|
false_cover_1 = estd.MonoLoader(filename=different_song, sampleRate=32000)()
|
||||||
|
|
||||||
|
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||||||
|
|
||||||
|
query_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||||||
|
|
||||||
|
true_cover_hpcp_1 = hpcpgram(true_cover_01, sampleRate=32000)
|
||||||
|
|
||||||
|
false_cover_hpcp = hpcpgram(false_cover_1, sampleRate=32000)
|
||||||
|
|
||||||
|
## plotting the hpcp features
|
||||||
|
#%matplotlib inline
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(14.5, 4.5)
|
||||||
|
|
||||||
|
plt.title("Query song HPCP")
|
||||||
|
plt.imshow(query_hpcp[:500].T, aspect='auto', origin='lower', interpolation='none')
|
||||||
|
|
||||||
|
## Next steps are done using the essentia ChromaCrossSimilarity function,
|
||||||
|
##
|
||||||
|
## Stacking input features
|
||||||
|
##
|
||||||
|
## Key invariance using Optimal Transposition Index (OTI) [3].
|
||||||
|
##
|
||||||
|
## Compute binary chroma cross similarity using cross recurrent plot as described in [1] or using OTI-based chroma binary method as detailed in [3]
|
||||||
|
|
||||||
|
crp = estd.ChromaCrossSimilarity(frameStackSize=9,
|
||||||
|
frameStackStride=1,
|
||||||
|
binarizePercentile=0.095,
|
||||||
|
oti=True)
|
||||||
|
|
||||||
|
true_pair_crp_1 = crp(query_hpcp, true_cover_hpcp_1)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(15.5, 5.5)
|
||||||
|
|
||||||
|
plt.title('Cross recurrent plot [1]')
|
||||||
|
plt.xlabel('Yesterday accapella cover')
|
||||||
|
plt.ylabel('Yesterday - The Beatles')
|
||||||
|
plt.imshow(true_pair_crp_1, origin='lower')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Compute binary chroma cross similarity using cross recurrent plot of the non-cover pairs
|
||||||
|
|
||||||
|
crp = estd.ChromaCrossSimilarity(frameStackSize=9,
|
||||||
|
frameStackStride=1,
|
||||||
|
binarizePercentile=0.095,
|
||||||
|
oti=True)
|
||||||
|
|
||||||
|
false_pair_crp = crp(query_hpcp, false_cover_hpcp)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(15.5, 5.5)
|
||||||
|
|
||||||
|
plt.title('Cross recurrent plot [1]')
|
||||||
|
plt.xlabel('Come together cover - Aerosmith')
|
||||||
|
plt.ylabel('Yesterday - The Beatles')
|
||||||
|
plt.imshow(false_pair_crp, origin='lower')
|
||||||
|
|
||||||
|
|
||||||
|
## Alternatively, you can also use the OTI-based binary similarity method as explained in [2] to compute the cross similarity of two given chroma features.
|
||||||
|
|
||||||
|
csm = estd.ChromaCrossSimilarity(frameStackSize=9,
|
||||||
|
frameStackStride=1,
|
||||||
|
binarizePercentile=0.095,
|
||||||
|
oti=True,
|
||||||
|
otiBinary=True)
|
||||||
|
|
||||||
|
oti_csm = csm(query_hpcp, false_cover_hpcp)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(15.5, 5.5)
|
||||||
|
|
||||||
|
plt.title('Cross similarity matrix using OTI binary method [2]')
|
||||||
|
plt.xlabel('Come together cover - Aerosmith')
|
||||||
|
plt.ylabel('Yesterday - The Beatles')
|
||||||
|
plt.imshow(oti_csm, origin='lower')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Finally, we compute an asymmetric cover song similarity measure from the pre-computed binary cross simialrity matrix of cover/non-cover pairs using various contraints of smith-waterman sequence alignment algorithm (eg. serra09 or chen17).
|
||||||
|
##
|
||||||
|
## Computing cover song similarity distance between ‘Yesterday - accapella cover’ and ‘Yesterday - The Beatles’
|
||||||
|
|
||||||
|
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
||||||
|
disExtension=0.5,
|
||||||
|
alignmentType='serra09',
|
||||||
|
distanceType='asymmetric')(true_pair_crp_1)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(15.5, 5.5)
|
||||||
|
|
||||||
|
plt.title('Cover song similarity distance: %s' % distance)
|
||||||
|
plt.xlabel('Yesterday accapella cover')
|
||||||
|
plt.ylabel('Yesterday - The Beatles')
|
||||||
|
plt.imshow(score_matrix, origin='lower')
|
||||||
|
|
||||||
|
print('Cover song similarity distance: %s' % distance)
|
||||||
|
|
||||||
|
## Computing cover song similarity distance between Yesterday - accapella cover and Come Together cover - The Aerosmith.
|
||||||
|
|
||||||
|
score_matrix, distance = estd.CoverSongSimilarity(disOnset=0.5,
|
||||||
|
disExtension=0.5,
|
||||||
|
alignmentType='serra09',
|
||||||
|
distanceType='asymmetric')(false_pair_crp)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(15.5, 5.5)
|
||||||
|
|
||||||
|
plt.title('Cover song similarity distance: %s' % distance)
|
||||||
|
plt.xlabel('Yesterday accapella cover')
|
||||||
|
plt.ylabel('Come together cover - Aerosmith')
|
||||||
|
plt.imshow(score_matrix, origin='lower')
|
||||||
|
|
||||||
|
print('Cover song similarity distance: %s' % distance)
|
101
cover_song_stream.py
Normal file
101
cover_song_stream.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
## tutorial from: https://mtg.github.io/essentia-labs/news/2019/09/05/cover-song-similarity/
|
||||||
|
|
||||||
|
#################
|
||||||
|
# standard part #
|
||||||
|
#################
|
||||||
|
|
||||||
|
|
||||||
|
import essentia.standard as estd
|
||||||
|
from essentia.pytools.spectral import hpcpgram
|
||||||
|
|
||||||
|
yesterday_original = 'audio/Yesterday (Remastered 2009).mp3'
|
||||||
|
yesterday_cover_01 = 'audio/Yesterday - The Beatles - Connie Talbot (Cover).mp3'
|
||||||
|
wrong_song = 'audio/Jacques Brel - Ne Me Quitte Pas.mp3'
|
||||||
|
|
||||||
|
song_reference = yesterday_original # the original song analysed in normal mode
|
||||||
|
song_streaming = wrong_song # the song get in stream mode to compare to reference
|
||||||
|
|
||||||
|
# query cover song
|
||||||
|
original_song = estd.MonoLoader(filename=song_reference, sampleRate=32000)()
|
||||||
|
## Now let’s compute Harmonic Pitch Class Profile (HPCP) chroma features of these audio signals.
|
||||||
|
true_cover_hpcp = hpcpgram(original_song, sampleRate=32000)
|
||||||
|
|
||||||
|
|
||||||
|
#################
|
||||||
|
# Straming part #
|
||||||
|
#################
|
||||||
|
|
||||||
|
import essentia.streaming as estr
|
||||||
|
from essentia import array, run, Pool
|
||||||
|
|
||||||
|
|
||||||
|
# Let's instantiate all the required essentia streaming algorithms
|
||||||
|
|
||||||
|
audio = estr.MonoLoader(filename=song_streaming, sampleRate=32000)
|
||||||
|
|
||||||
|
frame_cutter = estr.FrameCutter(frameSize=4096, hopSize=2048)
|
||||||
|
|
||||||
|
windowing = estr.Windowing(type="blackmanharris62")
|
||||||
|
|
||||||
|
spectrum = estr.Spectrum();
|
||||||
|
|
||||||
|
peak = estr.SpectralPeaks(sampleRate=32000)
|
||||||
|
|
||||||
|
whitening = estr.SpectralWhitening(maxFrequency=3500,
|
||||||
|
sampleRate=32000);
|
||||||
|
|
||||||
|
hpcp = estr.HPCP(sampleRate=32000,
|
||||||
|
minFrequency=100,
|
||||||
|
maxFrequency=3500,
|
||||||
|
size=12);
|
||||||
|
|
||||||
|
# Create an instance of streaming ChromaCrossSimilarity algorithm
|
||||||
|
# With parameter `referenceFeature`,
|
||||||
|
# we can pass the pre-computed reference song chroma features.
|
||||||
|
# In this case, we use the pre-computed HPCP feature
|
||||||
|
# of the 'true_cover_song'.
|
||||||
|
# With parameter `oti`, we can tranpose the pitch
|
||||||
|
# of the reference song HPCP feature
|
||||||
|
# to an given OTI [5] (if it's known before hand).
|
||||||
|
# By default we set `oti=0`
|
||||||
|
sim_matrix = estr.ChromaCrossSimilarity(
|
||||||
|
referenceFeature=true_cover_hpcp,
|
||||||
|
oti=0)
|
||||||
|
|
||||||
|
# Create an instance of the cover song similarity alignment algorithm
|
||||||
|
# 'pipeDistance=True' stdout distance values for each input stream
|
||||||
|
alignment = estr.CoverSongSimilarity(pipeDistance=True)
|
||||||
|
|
||||||
|
# essentia Pool instance (python dict like object) to aggregrate the outputs
|
||||||
|
pool = Pool()
|
||||||
|
|
||||||
|
# Connect all the required algorithms in a essentia streaming network
|
||||||
|
# ie., connecting inputs and outputs of the algorithms
|
||||||
|
# in the required workflow and order
|
||||||
|
audio.audio >> frame_cutter.signal
|
||||||
|
frame_cutter.frame >> windowing.frame
|
||||||
|
windowing.frame >> spectrum.frame
|
||||||
|
spectrum.spectrum >> peak.spectrum
|
||||||
|
spectrum.spectrum >> whitening.spectrum
|
||||||
|
peak.magnitudes >> whitening.magnitudes
|
||||||
|
peak.frequencies >> whitening.frequencies
|
||||||
|
peak.frequencies >> hpcp.frequencies
|
||||||
|
whitening.magnitudes >> hpcp.magnitudes
|
||||||
|
hpcp.hpcp >> sim_matrix.queryFeature
|
||||||
|
sim_matrix.csm >> alignment.inputArray
|
||||||
|
alignment.scoreMatrix >> (pool, 'scoreMatrix')
|
||||||
|
alignment.distance >> (pool, 'distance')
|
||||||
|
|
||||||
|
# Run the algorithm network
|
||||||
|
run(audio)
|
||||||
|
# This process will stdout the cover song similarity distance
|
||||||
|
# for every input stream in realtime.
|
||||||
|
# It also aggregrates the Smith-Waterman alignment score matrix
|
||||||
|
# and cover song similarity distance for every accumulating
|
||||||
|
# input audio stream in an essentia pool instance (similar to a python dict)
|
||||||
|
# which can be accessed after the end of the stream.
|
||||||
|
|
||||||
|
# Now, let's check the final cover song similarity distance value
|
||||||
|
# computed at the last input stream.
|
||||||
|
print(pool['distance'][-1])
|
||||||
|
|
2
rm_mp3.sh
Normal file
2
rm_mp3.sh
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
git filter-branch --force --index-filter 'git rm --cached --ignore-unmatch "*.mp3"' --prune-empty --tag-name-filter cat -- --all
|
||||||
|
|
14
song_to_test
Normal file
14
song_to_test
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
https://www.youtube.com/watch?v=TQemQRL_YVQ # yesterday original
|
||||||
|
https://www.youtube.com/watch?v=sGSZA6mYo4c # yesterday cover 1
|
||||||
|
https://www.youtube.com/watch?v=Dyjrnxj70dU # yesterday cover 2
|
||||||
|
|
||||||
|
https://youtu.be/EzRtlhjyNZM # gangsta rap
|
||||||
|
https://youtu.be/mm_PH5BadTk # gangsta rap
|
||||||
|
|
||||||
|
https://youtu.be/26Nuj6dhte8 # Georges Brassens - La Mauvaise Réputation
|
||||||
|
|
||||||
|
https://youtu.be/i2wmKcBm4Ik # Jacques Brel - Ne Me Quitte Pas
|
||||||
|
|
||||||
|
https://youtu.be/nUE80DTNxK4 # Barbara - Dis, quand reviendras-tu
|
||||||
|
|
||||||
|
https://youtu.be/UGtKGX8B9hU # le cafe - oldelaf _ future shorts
|
114
todo
Normal file
114
todo
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
Le but c'est d'analyser un flux sonore en temps reel
|
||||||
|
afin de determiner le son le plus proche.
|
||||||
|
|
||||||
|
* installation
|
||||||
|
* trouver les exemple utile
|
||||||
|
* annalyser en temps reel sur un flux (micro)
|
||||||
|
* distatance / similarite d'un son
|
||||||
|
* meme operation sur des fichier fix
|
||||||
|
* communication avec osc
|
||||||
|
|
||||||
|
###################################################
|
||||||
|
Python exemple:
|
||||||
|
* Computing features with MusicExtractor
|
||||||
|
* Beat detection and BPM histogram
|
||||||
|
* Onset detection
|
||||||
|
* Melody detection
|
||||||
|
* Tonality analysis (HPCP, key and scale)
|
||||||
|
* Fingerprinting
|
||||||
|
* Using chromaprints to identify segments in an audio track
|
||||||
|
* Cover Song Identification
|
||||||
|
* Inference with TensorFlow models
|
||||||
|
* Auto-tagging
|
||||||
|
* Transfer learning classifiers
|
||||||
|
* Tempo estimation
|
||||||
|
* Embedding extraction
|
||||||
|
* Extracting embeddings from other models
|
||||||
|
|
||||||
|
##################################################
|
||||||
|
|
||||||
|
Bon la j'ai choper un exemple qui marche en mode standar.
|
||||||
|
Il faudrait que je refasse le meme truc en mode streaming.
|
||||||
|
|
||||||
|
Pour ca il faudrait:
|
||||||
|
* une version simplifier du code en question (sans les plt et autre affichage)
|
||||||
|
* comprendre un peu la logique du streaming avec essentia
|
||||||
|
* refaire l'exemple em mode streaming
|
||||||
|
|
||||||
|
?? Est-ce que ca va etre rapide a s'execute ??
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
Bon en fait j'ai trouver le code d'exemple don j'ai besoin.
|
||||||
|
Ca commence part recuperer en mode standar la description d'un fichier sonore.
|
||||||
|
Puis avec un input en mode stream ca compart la distance avec la chansson.
|
||||||
|
|
||||||
|
Ce qu'il reste a faire:
|
||||||
|
* avoir un script pour telecharger les musique a tester.
|
||||||
|
* avoir un input type micro
|
||||||
|
* avoir une entree avec jack (jackd)
|
||||||
|
* Faire tourner plusieur processus pour pouvoir annalyser plusieurs track en meme temps.
|
||||||
|
* ET VOILA CA DEVRAIT ETRE BON =)
|
||||||
|
|
||||||
|
1) un scritp qui telecharge les son:
|
||||||
|
|
||||||
|
J'ai besoin d'un fichier de config qui telecharge les musique en extrayan le son
|
||||||
|
et les place dans le bon dossier. Et dans un format que je peut lire avec essentia.
|
||||||
|
|
||||||
|
* avoir un
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
|
||||||
|
Les ellement pour la config:
|
||||||
|
|
||||||
|
--config-location PATH Location of the configuration file;
|
||||||
|
either the path to the config or its
|
||||||
|
containing directory.
|
||||||
|
|
||||||
|
-a, --batch-file FILE File containing URLs to download ('-'
|
||||||
|
for stdin), one URL per line. Lines
|
||||||
|
starting with '#', ';' or ']' are
|
||||||
|
considered as comments and ignored.
|
||||||
|
|
||||||
|
-c, --continue Force resume of partially downloaded
|
||||||
|
files. By default, youtube-dl will
|
||||||
|
|
||||||
|
|
||||||
|
--encoding ENCODING Force the specified encoding
|
||||||
|
(experimental) resume downloads if possible.
|
||||||
|
|
||||||
|
-x, --extract-audio Convert video files to audio-only files
|
||||||
|
(requires ffmpeg/avconv and
|
||||||
|
ffprobe/avprobe)
|
||||||
|
|
||||||
|
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||||
|
"flac", "mp3", "m4a", "opus", "vorbis",
|
||||||
|
or "wav"; "best" by default; No effect
|
||||||
|
without -x
|
||||||
|
|
||||||
|
-w, --no-overwrites Do not overwrite files
|
||||||
|
|
||||||
|
|
||||||
|
vv -E-X-M-P-L-E- -C-O-N-F-I-G- -F-I-L-E- vv
|
||||||
|
|
||||||
|
# Lines starting with # are comments
|
||||||
|
|
||||||
|
# Always extract audio
|
||||||
|
-x
|
||||||
|
|
||||||
|
# Do not copy the mtime
|
||||||
|
--no-mtime
|
||||||
|
|
||||||
|
# Use this proxy
|
||||||
|
--proxy 127.0.0.1:3128
|
||||||
|
|
||||||
|
# Save all videos under Movies directory in your home directory
|
||||||
|
-o ~/Movies/%(title)s.%(ext)s
|
||||||
|
^^ - - - - - - - - - - - - - - - - - - - ^^
|
||||||
|
|
||||||
|
to run download:
|
||||||
|
$> youtube-dl --config-location config_youtube-dl
|
||||||
|
|
||||||
|
Dependance:
|
||||||
|
youtube-dl: sudo apt-get install -y ffmpeg
|
Loading…
Reference in New Issue
Block a user