""" Sends live audio analysis to the terminal. Based on musicinformationretrieval.com/realtime_spectrogram.py For more examples using PyAudio: https://github.com/mwickert/scikit-dsp-comm/blob/master/sk_dsp_comm/pyaudio_helper.py """ import argparse import json import librosa import numpy import os import pyaudio import redis import time def list_devices(): # List all audio input devices p = pyaudio.PyAudio() i = 0 n = p.get_device_count() print("\nFound {} devices\n".format(n)) print (" {} {}".format('ID', 'Device name')) while i < n: dev = p.get_device_info_by_index(i) if dev['maxInputChannels'] > 0: print (" {} {}".format(i, dev['name'])) i += 1 # Define default variables. _BAND_RANGE = 96 _CHANNELS = 1 _ENERGY_THRESHOLD = 0.4 _FRAMES_PER_BUFFER = 4410 _N_FFT = 4096 _RATE = 44100 _SAMPLING_FREQUENCY = 0.1 # Argument parsing parser = argparse.ArgumentParser(prog='realtime_redis') parser.add_argument('--list-devices','-L', action='store_true', help='Which devices are detected by pyaudio') parser.add_argument('--mode','-m', required=False, default='spectrum', choices=['spectrum', 'bpm'], type=str, help='Which mode to use. Default=spectrum') parser.add_argument('--device','-d', required=False, type=int, help='Which pyaudio device to use') parser.add_argument('--frames','-f', required=False, default=4410, type=int, help='How many frames per buffer. Default={}'.format(_FRAMES_PER_BUFFER)) parser.add_argument('--sampling-frequency','-s', required=False, default=0.1, type=float, help='Which frequency, in seconds. Default={}f '.format(_SAMPLING_FREQUENCY)) parser.add_argument('--channels','-c', required=False, default=_CHANNELS, type=int, help='How many channels. Default={} '.format(_CHANNELS)) parser.add_argument('--rate','-r', required=False, default=44100, type=int, help='Which rate. Default={} '.format(_RATE)) parser.add_argument('--energy-threshold','-e', required=False, default=0.4, type=float, help='Which energy triggers spectrum detection flag. Default={} '.format(_ENERGY_THRESHOLD)) args = parser.parse_args() # Set real variables BAND_RANGE = _BAND_RANGE CHANNELS = args.channels DEVICE = args.device ENERGY_THRESHOLD = args.energy_threshold FRAMES_PER_BUFFER = args.frames LIST_DEVICES = args.list_devices MODE = args.mode N_FFT = _N_FFT RATE = args.rate SAMPLING_FREQUENCY = args.sampling_frequency # Define the frequency range of the log-spectrogram. F_LO = librosa.note_to_hz('C2') F_HI = librosa.note_to_hz('C9') M = librosa.filters.mel(RATE, N_FFT, BAND_RANGE, fmin=F_LO, fmax=F_HI) r = redis.Redis( host='localhost', port=6379) # Early exit to list devices if( LIST_DEVICES ): list_devices() os._exit(1) p = pyaudio.PyAudio() # global bpm = 120.0 def m_bpm(audio_data): """ This function saves slow analysis to redis * beat """ global bpm # Get RMS rms = librosa.feature.rmse( audio_data ) onset = librosa.onset.onset_detect( y=audio_data, sr=RATE) new_bpm, beats = librosa.beat.beat_track( y=audio_data, sr=RATE, trim=False, start_bpm=bpm, units="time" ) print ( bpm, new_bpm) # Save spectrum r.set( 'onset', json.dumps( onset.tolist() ) ) r.set( 'bpm', json.dumps( new_bpm ) ) r.set( 'beats', json.dumps( beats.tolist() ) ) bpm = new_bpm return True def m_spectrum(audio_data): """ This function saves fast analysis to redis * spectrum * RMS """ # Compute real FFT. x_fft = numpy.fft.rfft(audio_data, n=N_FFT) # Compute mel spectrum. melspectrum = M.dot(abs(x_fft)) # Get RMS rms = librosa.feature.rmse( S=melspectrum, frame_length=FRAMES_PER_BUFFER ) # Initialize output characters to display. bit_list = [0]*BAND_RANGE count = 0 highest_index = -1 highest_value = 0 for i in range(BAND_RANGE): val = melspectrum[i] # If this is the highest tune, record it if( val > highest_value ) : highest_index = i highest_value = val # If there is energy in this frequency, mark it if val > ENERGY_THRESHOLD: count += 1 bit_list[i] = 1 # Save to redis r.set( 'rms', "{}".format(rms.tolist()) ) r.set( 'spectrum', json.dumps( bit_list ) ) r.set( 'tuning', highest_index ) return True def callback(in_data, frame_count, time_info, status): audio_data = numpy.fromstring(in_data, dtype=numpy.float32) start = time.time() if MODE == 'spectrum': m_spectrum(audio_data) elif MODE == 'bpm': m_bpm( audio_data) else: print( "Unknown mode. Exiting") os._exit(2) end = time.time() print ("\rLoop took {:.2}s on {}s ".format(end - start, SAMPLING_FREQUENCY), end="") return (in_data, pyaudio.paContinue) print( "\n\nRunning! Using mode {}.\n\n".format(MODE)) stream = p.open(format=pyaudio.paFloat32, channels=CHANNELS, rate=RATE, input=True, # Do record input. output=False, # Do not play back output. frames_per_buffer=FRAMES_PER_BUFFER, input_device_index = DEVICE, stream_callback=callback) stream.start_stream() while stream.is_active(): time.sleep(SAMPLING_FREQUENCY) stream.stop_stream() stream.close() p.terminate()