Skip to content

Commit 5629cfa

Browse files
committed
Add volume normalization
1 parent 535c954 commit 5629cfa

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,7 @@ submission/
120120
submission.zip
121121

122122
# Hydra outputs
123-
outputs/
123+
outputs/
124+
125+
# Tensorboard
126+
tensorboard/

convert.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88
import librosa
99
from tqdm import tqdm
10+
import pyloudnorm
1011

1112
from preprocess import preemphasis
1213
from model import Encoder, Vocoder
@@ -46,11 +47,14 @@ def convert(cfg):
4647
encoder.eval()
4748
vocoder.eval()
4849

50+
meter = pyloudnorm.Meter(cfg.preprocessing.sr)
51+
4952
for wav_path, speaker_id, out_filename in tqdm(synthesis_list):
5053
wav_path = in_dir / wav_path
5154
wav, _ = librosa.load(
5255
wav_path.with_suffix(".wav"),
5356
sr=cfg.preprocessing.sr)
57+
ref_loudness = meter.integrated_loudness(wav)
5458
wav = wav / np.abs(wav).max() * 0.999
5559

5660
mel = librosa.feature.melspectrogram(
@@ -72,6 +76,8 @@ def convert(cfg):
7276
_, _, indices = encoder.encode(mel)
7377
output = vocoder.generate(indices, speaker)
7478

79+
output_loudness = meter.integrated_loudness(output)
80+
output = pyloudnorm.normalize.loudness(output, output_loudness, ref_loudness)
7581
path = out_dir / out_filename
7682
librosa.output.write_wav(path.with_suffix(".wav"), output.astype(np.float32), sr=cfg.preprocessing.sr)
7783

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ numpy>=1.17
22
scipy>=1.4
33
librosa>=0.7
44
tqdm>=4.45.0
5-
hydra-core>=0.11
5+
hydra-core>=0.11
6+
pyloudnorm>=0.1.0

0 commit comments

Comments
 (0)