Noise RIR
To create a audio with reverberation:
- get the RIR, resample it to the audio frame rate
- convolve the audio with the rir
Sample code:
import numpy as np
import scipy.signal as s
import torchaudio
import torch
import torchaudio.transforms as T
def reverb_rir(frames,rir):
"""
frames is the clean audio numpy with shape [1, T]
rir is the rir numpy with shape [1, T']
returns: reverberated audio with shape [T'] (numpy)
"""
orig_frames_shape = frames.shape
frames,filter = np.squeeze(frames),np.squeeze(rir)
frames = s.convolve(frames,filter)
actlev = np.max(np.abs(frames))
if(actlev > 0.99):
frames = (frames / actlev) * 0.98
frames = frames[:orig_frames_shape[1]]
# print(frames.shape, orig_frames_shape)
return frames
rir_impulse = "/home/bltang/work/data/impulse/datasets_fullband/impulse_responses/SLR26/simulated_rirs_48k/largeroom/Room002/Room002-00001.wav"
## 48khz
frame_path = "/home/bltang/work/voicefixer_main/test/clean/SSB00050001.wav"
## 44.1khz
frame, frame_rate = torchaudio.load(frame_path)
rir, rir_rate = torchaudio.load(rir_impulse)
print(f"loaded audio frame: {frame.shape}, sample rate: {frame_rate}")
print(f"loaded rir: {rir.shape}, sample rate: {rir_rate}")
## downsample the rir to be 44.1khz
resampler = T.Resample(rir_rate, frame_rate, dtype=frame.dtype)
rir = resampler(rir)
frame = frame.numpy()
rir = rir.numpy()
## doing the convolution
output = reverb_rir(frame,rir)
output = torch.from_numpy(output).unsqueeze(0)
torchaudio.save("output.wav",output, frame_rate)
perform clipping:
### perform clipping
clip_factor = 0.1
z = torch.clamp(output,min = output.min() * clip_factor, max = output.max() * clip_factor)
print(z.min())
torchaudio.save("clamp.wav",z, frame_rate)