From 9887f622024ab566d50ac1e8a247b0cd3cab473a Mon Sep 17 00:00:00 2001 From: alban Date: Tue, 29 Sep 2020 00:56:54 +0200 Subject: [PATCH] [fix] doc and bpm work --- README.md | 152 +++++++++++++++++++++++++++++++++++---------------- redilysis.py | 106 ++++++++++++++++++++++++++--------- 2 files changed, 183 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 55b6cea..8927662 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,108 @@ -# Redilysis = Redis + Audio Analysis +# (Audio Analysis | redis ) == <3 -Redilysis sends audio analysis to a redis server. + +Redilysis sends audio analysis to a redis server. The idea is to share a single audio analysis to many Visual Jockey filters, in our case for lasers. -Two modes exist for now, you need to run two processes to get the complete experience! +**Two modes are available, so you might need to run two processes for full analysis.** -### Spectrum Mode -This is the default mode. +### Redis Keys and Contents + +Each **word in bold** is a key which you can query the redis server for. Ex: +``` +$ redis-cli get spectrum_120 +"[2.21, 0.56, 0.51, 0.32, 0.27, 0.21, 0.18, 0.17, 0.18, 0.23]" +``` + +**rms** +* **Mode** spectrum +* **Type** float number +* **Length** scalar +* **Meaning** Represents the root-mean-square -a mean value- for all frequencies between ```C0``` and ```C9```, e.g. between 12Hz and 8,372Hz. +* **Use** A fairly basic information about the scene audio volume. +* **Example** + * ```"0.12"``` + * The audio volume for the scene is pretty low. + * It is obtained by averaging the RMS of every audio frame during the capture. + +**spectrum_10** +* **Mode** spectrum +* **Type** array of float numbers (0.0-10.0) +* **Length** 10 +* **Meaning** Represents the audio volume for the 10 **octaves** between ```C0``` and ```C9```, e.g. between 12Hz and 8,372Hz. +* **Use** A simple and useful way to get a global idea of the sound landscape. +* **Example** + * ```"[2.21, 0.56, 0.51, 0.32, 0.27, 0.21, 0.18, 0.17, 0.18, 0.23]"``` + * The audio volume for the `C4` octave is `spectrum_10[4]`. + * That value being `0.27` is pretty low meaning almost no audio volume for that octave. + * It is calculated by averaging the volume of the octave's notes, e.g. `C4, D4, D#4, E4, F4, F#4, G4, G#4, A4, A#4, B4`. + +**spectrum_120** +* **Mode** spectrum +* **Type** array of float numbers (0.0-10.0) +* **Length** 120 +* **Meaning** Represents the audio volume for the 120 **notes** between ```C0``` and ```C9```, e.g. between 12Hz and 8,372Hz. +* **Use** More detailed than spectrum_10, it allows to find the standing out notes of the audio landscape. +* **Example** + * ```"[5.55, 2.61, 2.49, 1.79, 2.09, 4.35, 1.99, 1.57, 1.47, 0.77, 0.91, 0.89, 0.85, 0.56, 0.53, 0.73, 0.53, 0.46, 0.43, 0.44, 0.27, 0.45, 0.7, 0.81, 0.98, 0.7, 0.71, 0.6, 0.83, 0.51, 0.32, 0.31, 0.33, 0.24, 0.25, 0.33, 0.39, 0.43, 0.51, 0.28, 0.27, 0.25, 0.38, 0.25, 0.27, 0.3, 0.2, 0.27, 0.35, 0.29, 0.34, 0.3, 0.27, 0.27, 0.22, 0.21, 0.21, 0.29, 0.22, 0.28, 0.18, 0.19, 0.25, 0.26, 0.25, 0.24, 0.2, 0.21, 0.19, 0.18, 0.19, 0.17, 0.2, 0.17, 0.18, 0.17, 0.15, 0.17, 0.19, 0.18, 0.21, 0.16, 0.16, 0.18, 0.15, 0.13, 0.14, 0.16, 0.2, 0.17, 0.17, 0.2, 0.18, 0.16, 0.18, 0.15, 0.15, 0.16, 0.16, 0.19, 0.19, 0.19, 0.17, 0.18, 0.17, 0.19, 0.23, 0.23, 0.2, 0.23, 0.24, 0.36, 0.34, 0.23, 0.22, 0.2, 0.19, 0.18, 0.21, 0.21]"``` + * The audio volume for the `C2` note is `spectrum_10[23]` (12x2 - 1). + * That value being `0.81` is average meaning there is some audio volume for that octave. + + + +bpm +* **Mode** bpm +* **Type** +* **Length** +* **Meaning** Represents +* **Use** +* **Example** + +bpm_sample_interval +* **Mode** bpm +* **Type** +* **Length** +* **Meaning** Represents +* **Example** + +bpm_delay +* **Mode** bpm +* **Type** +* **Length** +* **Meaning** Represents +* **Example** + +beats +* **Mode** bpm +* **Type** +* **Length** +* **Meaning** Represents +* **Example** + +### Requirements and installation + +* python 2.7 +* audio card +* redis server + +#### Installation + +```python +sudo apt install python-pyaudio python +git clone https://git.interhacker.space/tmplab/redilysis.git +cd redilysis +pip install -r requirements.txt +python redilysis.py --help +``` + +### Running in Spectrum Mode + +``` +python redilysis.py -m spectrum +``` +This is the default mode. It performs some frequency analysis (Fast Fourier Transform) to detect "energy" in the human audition bandwidths. @@ -18,53 +112,15 @@ It can run at sub-second frequency (100ms) with no problem. It reports realistic data: spectrum analysis is the easy part. -### BPM Mode +### Running in BPM Mode -This mode is more experimental. - -It attempts to detect beats based on the - - -## Keys and contents in Redis - -bpm_time : (milliseconds integer timestamp) last update time -onset -bpm -beats -spectrum_time - -## Installation - -```python -sudo apt install python-pyaudio python3 -git clone https://git.interhacker.space/tmplab/redilysis.git -cd redilysis -pip install -r requirements.txt -python3 redilysis.py --help +``` +python redilysis.py -m bpm -s 0.5 ``` -## Guide +This mode is experimental. -There are two available modes. - -**One is the slow mode with BPM recognition:** - -python3 redilysis.py -m bpm -s 1 -f 44100 - -Pushes following keys in redis: - - * onset - * bpm - * beats +It attempts to detect beats based on complex parameters. -**The other is a fast mode with spectrogram analysis** -python3 redilysis.py -m spectrum -s 0.1 -f 4410 - - -Pushes following keys in redis: - - * rms - * spectrum - * tuning diff --git a/redilysis.py b/redilysis.py index d0d893d..9e791bb 100755 --- a/redilysis.py +++ b/redilysis.py @@ -34,42 +34,52 @@ _FRAMES_PER_BUFFER = 4410 _N_FFT = 4096 _RATE = 44100 _SAMPLING_FREQUENCY = 0.1 +_BPM_MIN=10 +_BPM_MAX=400 # Argument parsing # Audio Args parser = argparse.ArgumentParser(prog='realtime_redis') +# Audio Capture Args parser.add_argument('--list-devices','-L', action='store_true', help='Which devices are detected by pyaudio') parser.add_argument('--mode','-m', required=False, default='spectrum', choices=['spectrum', 'bpm'], type=str, help='Which mode to use. Default=spectrum') parser.add_argument('--device','-d', required=False, type=int, help='Which pyaudio device to use') -#parser.add_argument('--frames','-f', required=False, default=4410, type=int, help='How many frames per buffer. Default={}'.format(_FRAMES_PER_BUFFER)) parser.add_argument('--sampling-frequency','-s', required=False, default=0.1, type=float, help='Which frequency, in seconds. Default={}f '.format(_SAMPLING_FREQUENCY)) parser.add_argument('--channels','-c', required=False, default=_CHANNELS, type=int, help='How many channels. Default={} '.format(_CHANNELS)) -parser.add_argument('--rate','-r', required=False, default=44100, type=int, help='Which rate. Default={} '.format(_RATE)) +parser.add_argument('--rate','-r', required=False, default=44100, type=int, help='The audio capture rate in Hz. Default={} '.format(_RATE)) +#parser.add_argument('--frames','-f', required=False, default=4410, type=int, help='How many frames per buffer. Default={}'.format(_FRAMES_PER_BUFFER)) +# BPM Mode Args +parser.add_argument('--bpm-min', required=False, default=_BPM_MIN, type=int, help='BPM mode only. The low BPM threshold. Default={} '.format(_BPM_MIN)) +parser.add_argument('--bpm-max', required=False, default=_BPM_MAX, type=int, help='BPM mode only. The high BPM threshold. Default={} '.format(_BPM_MAX)) + # Redis Args parser.add_argument("-i","--ip",help="IP address of the Redis server ",default="127.0.0.1",type=str) parser.add_argument("-p","--port",help="Port of the Redis server ",default="6379",type=str) -# Stardard Args +# Standard Args parser.add_argument("-v","--verbose",action="store_true",help="Verbose") args = parser.parse_args() # global bpm = 120.0 +start = 0 # Set real variables F_LO = librosa.note_to_hz('C0') F_HI = librosa.note_to_hz('C10') -BAND_TONES = _BAND_TONES -CHANNELS = args.channels -DEVICE = args.device -FRAMES_PER_BUFFER = int(args.rate * args.sampling_frequency ) -LIST_DEVICES = args.list_devices -MODE = args.mode -N_FFT = _N_FFT -RATE = args.rate -SAMPLING_FREQUENCY = args.sampling_frequency -ip = args.ip -port = args.port -verbose = args.verbose +BAND_TONES = _BAND_TONES +N_FFT = _N_FFT +CHANNELS = args.channels +DEVICE = args.device +FRAMES_PER_BUFFER = int(args.rate * args.sampling_frequency ) +LIST_DEVICES = args.list_devices +MODE = args.mode +RATE = args.rate +SAMPLING_FREQUENCY = args.sampling_frequency +bpm_min = args.bpm_min +bpm_max = args.bpm_max +ip = args.ip +port = args.port +verbose = args.verbose if( MODE == "bpm" and SAMPLING_FREQUENCY < 0.5 ): debug( "You should use a --sampling_frequency superior to 0.5 in BPM mode...") @@ -106,32 +116,73 @@ p = pyaudio.PyAudio() def m_bpm(audio_data): """ This function saves slow analysis to redis - * onset * bpm * beat """ global bpm + global start - if( bpm <= 10): - bpm = 10 - onset = librosa.onset.onset_detect( - y = audio_data, - sr = RATE - ) + bpm_delay = SAMPLING_FREQUENCY + start - time.time() + + # Detect tempo / bpm new_bpm, beats = librosa.beat.beat_track( y = audio_data, sr = RATE, trim = False, - start_bpm = bpm, + #start_bpm = bpm, units = "time" ) + ''' + new_bpm = librosa.beat.tempo(y = audio_data, sr=RATE)[0] + ''' + # Correct the eventual octave error + if new_bpm < bpm_min or new_bpm > bpm_max: + octaveErrorList = [ 0.5, 2, 0.3333, 3 ] + for key,factor in enumerate(octaveErrorList): + correction = new_bpm * factor + if correction > bpm_min and correction < bpm_max: + debug( "Corrected bpm to:{}".format(correction)) + new_bpm = correction + break + if new_bpm < bpm_min : + new_bpm = bpm_min + else : + new_bpm = bpm_max + + ''' + + |start end| + Capture |........................| + BPM detect+Redis set || + Client Redis get | + + Time |........................||.............| + ---SAMPLING_FREQUENCY---- + - < TIME-START + Read Delay --------------- < 2*SAMPLING_FREQUENCY - PTTL + Delay ----------------------------------------- + Beats |last beat + . known ...b....b....b....b....b. + . passed (...b....b....b.) + . guessed (..b....b....b....b... + Next Beat Calculation b....b....b....b.|..b + + => (Delay - last beat) + x*BPM/60 (with x >= read_delay/BPM/60) + Redis: + bpm_sample_interval + |........................| + bpm_delay + |.........................| + + ''' + bpm = new_bpm # Save to Redis - r.set( 'onset', json.dumps( onset.tolist() ) ) - r.set( 'bpm', json.dumps( new_bpm ) ) + r.set( 'bpm', new_bpm, px=( 2* int(SAMPLING_FREQUENCY * 1000))) + r.set( 'bpm_sample_interval', SAMPLING_FREQUENCY ) + r.set( 'bpm_delay', bpm_delay ) r.set( 'beats', json.dumps( beats.tolist() ) ) - bpm = new_bpm - debug( "bpm:{} onset:{} beats:{}".format(bpm,onset,beats) ) + debug( "bpm:{} bpm_delay:{} beats:{}".format(bpm,bpm_delay,beats) ) return True def m_spectrum(audio_data): @@ -179,6 +230,7 @@ def m_spectrum(audio_data): def callback(in_data, frame_count, time_info, status): audio_data = numpy.fromstring(in_data, dtype=numpy.float32) + global start start = time.time() if MODE == 'spectrum': m_spectrum(audio_data)