File tree Expand file tree Collapse file tree 3 files changed +12
-2
lines changed
Expand file tree Collapse file tree 3 files changed +12
-2
lines changed Original file line number Diff line number Diff line change @@ -120,4 +120,7 @@ submission/
120120submission.zip
121121
122122# Hydra outputs
123- outputs /
123+ outputs /
124+
125+ # Tensorboard
126+ tensorboard /
Original file line number Diff line number Diff line change 77import numpy as np
88import librosa
99from tqdm import tqdm
10+ import pyloudnorm
1011
1112from preprocess import preemphasis
1213from model import Encoder , Vocoder
@@ -46,11 +47,14 @@ def convert(cfg):
4647 encoder .eval ()
4748 vocoder .eval ()
4849
50+ meter = pyloudnorm .Meter (cfg .preprocessing .sr )
51+
4952 for wav_path , speaker_id , out_filename in tqdm (synthesis_list ):
5053 wav_path = in_dir / wav_path
5154 wav , _ = librosa .load (
5255 wav_path .with_suffix (".wav" ),
5356 sr = cfg .preprocessing .sr )
57+ ref_loudness = meter .integrated_loudness (wav )
5458 wav = wav / np .abs (wav ).max () * 0.999
5559
5660 mel = librosa .feature .melspectrogram (
@@ -72,6 +76,8 @@ def convert(cfg):
7276 _ , _ , indices = encoder .encode (mel )
7377 output = vocoder .generate (indices , speaker )
7478
79+ output_loudness = meter .integrated_loudness (output )
80+ output = pyloudnorm .normalize .loudness (output , output_loudness , ref_loudness )
7581 path = out_dir / out_filename
7682 librosa .output .write_wav (path .with_suffix (".wav" ), output .astype (np .float32 ), sr = cfg .preprocessing .sr )
7783
Original file line number Diff line number Diff line change @@ -2,4 +2,5 @@ numpy>=1.17
22scipy >= 1.4
33librosa >= 0.7
44tqdm >= 4.45.0
5- hydra-core >= 0.11
5+ hydra-core >= 0.11
6+ pyloudnorm >= 0.1.0
You can’t perform that action at this time.
0 commit comments