Skip to content

Commit

Permalink
Merge pull request #9 from ViseshXX/all-1.1-dev
Browse files Browse the repository at this point in the history
Added denoising function to the code
  • Loading branch information
sudeeppr1998 authored May 3, 2024
2 parents 62b6bb2 + faba14f commit 4848e7d
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 2 deletions.
124 changes: 124 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import jiwer
import eng_to_ipa as p
from fuzzywuzzy import fuzz
import librosa
import numpy as np
import soundfile as sf
import noisereduce as nr

app = Flask(__name__)

Expand Down Expand Up @@ -67,6 +71,126 @@
"y",
"a", "x", "c"
]
@app.route('/audio_processing', methods=['POST'])
def home():
data = request.json
if data:
audio_base64 = data.get('audio_base64')
if audio_base64:
# Convert base64 audio to audio data
audio_data = base64.b64decode(audio_base64)
audio_io = BytesIO(audio_data)

# Proceed with existing process
denoised_audio, sample_rate, initial_snr, final_snr = denoise_audio(audio_io, speed_factor=0.75)
denoised_audio_base64 = convert_to_base64(denoised_audio, sample_rate)

# Delete audio data from cache
del audio_data
del audio_io

return jsonify({"denoised_audio_base64": denoised_audio_base64}), 200
else:
return jsonify({"error": "Missing audio_base64 parameter."}), 400
else:
return jsonify({"error": "No data received."}), 400

def calculate_snr(audio, sr):
n_fft = min(len(audio), 2048) # Ensure n_fft does not exceed the length of the audio
stft = librosa.stft(audio, n_fft=n_fft)
power = np.abs(stft)**2

mel_spectrogram = librosa.feature.melspectrogram(S=power, sr=sr)
mel_power = np.mean(mel_spectrogram, axis=0)

energy_threshold = np.mean(mel_power)
speech_indices = mel_power > energy_threshold
noise_indices = ~speech_indices

signal_power = np.mean(power[:, speech_indices], axis=1)
average_signal_power = np.mean(signal_power) if signal_power.size > 0 else 0

noise_power = np.mean(power[:, noise_indices], axis=1)
average_noise_power = np.mean(noise_power) if noise_power.size > 0 else 1e-10

snr = 10 * np.log10(average_signal_power / average_noise_power) if average_signal_power > 0 else 0
return snr

def estimate_noise_floor(audio, sr, frame_length=None, hop_length=512):
frame_length = frame_length or min(len(audio), 2048)
stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)
power_spectrogram = np.abs(stft)**2
energy = np.sum(power_spectrogram, axis=0)

low_energy_threshold = np.percentile(energy, 10)
very_low_energy = energy[energy <= low_energy_threshold]

adaptive_percentile = 5 if len(very_low_energy) < len(energy) * 0.1 else 10
noise_floor = np.percentile(energy, adaptive_percentile)

return noise_floor

def denoise_audio(filepath, speed_factor=1.0):
audio, sample_rate = librosa.load(filepath, sr=None)

# Apply time stretching first if the speed factor is not 1.0
if speed_factor != 1.0:
audio = librosa.effects.time_stretch(audio, rate=speed_factor)

# Calculate initial full audio SNR
initial_snr = calculate_snr(audio, sample_rate)

# Improved VAD
vad_intervals = librosa.effects.split(audio, top_db=20)
noise_floor = estimate_noise_floor(audio, sample_rate)

noise_reduced_audio = np.copy(audio)
improved_intervals = False # Flag to track if any intervals improved SNR

for interval in vad_intervals:
interval_audio = audio[interval[0]:interval[1]]
interval_snr = calculate_snr(interval_audio, sample_rate)

# Determine reduction intensity based on initial SNR
reduction_intensity = determine_reduction_intensity(initial_snr)

# Apply noise reduction
reduced_interval_audio = nr.reduce_noise(y=interval_audio, sr=sample_rate, prop_decrease=reduction_intensity)

# Calculate SNR after noise reduction
reduced_interval_snr = calculate_snr(reduced_interval_audio, sample_rate)
if reduced_interval_snr > interval_snr:
noise_reduced_audio[interval[0]:interval[1]] = reduced_interval_audio
improved_intervals = True
else:
print("No SNR improvement; keeping original audio for this interval.")

# Calculate final SNR and decide which version to use based on SNR comparison
final_snr = calculate_snr(noise_reduced_audio, sample_rate)
if not improved_intervals or final_snr < initial_snr:
final_snr = initial_snr # Revert to original SNR if no improvement
noise_reduced_audio = audio # Revert to original audio

normalized_audio = librosa.util.normalize(noise_reduced_audio)
return normalized_audio, sample_rate, initial_snr, final_snr

def determine_reduction_intensity(snr):
if snr < 10:
return 0.7
elif snr < 15:
return 0.5
elif snr < 20:
return 0.22
elif snr >= 30:
return 0.1
return 0.1 # Default to the least aggressive reduction if no specific conditions are met

def convert_to_base64(audio_data, sample_rate):
buffer = io.BytesIO()
sf.write(buffer, audio_data, sample_rate, format='wav')
buffer.seek(0)
base64_audio = base64.b64encode(buffer.read()).decode('utf-8')
return base64_audio

def get_error_arrays(alignments, reference, hypothesis, base64string):
insertion = []
Expand Down
45 changes: 43 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,55 @@
audioread==3.0.1
blinker==1.7.0
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6
contourpy==1.1.1
cycler==0.12.1
decorator==5.1.1
eng-to-ipa==0.0.2
Flask==3.0.0
flask==3.0.0
fonttools==4.51.0
fuzzywuzzy==0.18.0
idna==3.7
importlib-metadata==7.1.0
importlib-resources==6.4.0
itsdangerous==2.1.2
Jinja2==3.1.2
jiwer==3.0.3
joblib==1.4.0
kiwisolver==1.4.5
lazy-loader==0.4
Levenshtein==0.24.0
librosa==0.10.1
llvmlite==0.41.1
MarkupSafe==2.1.3
matplotlib==3.7.5
msgpack==1.0.8
noisereduce==3.0.2
numba==0.58.1
numpy==1.24.4
packaging==24.0
pillow==10.3.0
platformdirs==4.2.0
pooch==1.8.1
pycparser==2.22
pydub==0.25.1
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
rapidfuzz==3.6.1
Werkzeug==3.0.1
requests==2.31.0
scikit-learn==1.3.2
scipy==1.10.1
six==1.16.0
soundfile==0.12.1
soxr==0.3.7
threadpoolctl==3.4.0
tqdm==4.66.2
typing-extensions==4.11.0
tzdata==2024.1
urllib3==2.2.1
werkzeug==3.0.1
zipp==3.18.1

0 comments on commit 4848e7d

Please sign in to comment.