From f0ceffc39b51fbf82be22ae0d6ef695ea8fbfb8d Mon Sep 17 00:00:00 2001
From: alastorid <alastorid@gmail.com>
Date: Thu, 26 Dec 2024 03:20:07 +0800
Subject: [PATCH 1/3] Merge remote-tracking branch 'zgldh/main'

---
 melo/api.py      |  2 +-
 melo/utils.py    | 11 +++++++++++
 requirements.txt |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/melo/api.py b/melo/api.py
index 236ea8f17..567df0678 100644
--- a/melo/api.py
+++ b/melo/api.py
@@ -122,7 +122,7 @@ def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_s
                     )[0][0, 0].data.cpu().float().numpy()
                 del x_tst, tones, lang_ids, bert, ja_bert, x_tst_lengths, speakers
                 # 
-            audio_list.append(audio)
+            audio_list.append(utils.fix_loudness(audio,self.hps.data.sampling_rate))
         torch.cuda.empty_cache()
         audio = self.audio_numpy_concat(audio_list, sr=self.hps.data.sampling_rate, speed=speed)
 
diff --git a/melo/utils.py b/melo/utils.py
index bafca5a6d..bbe3fdeae 100644
--- a/melo/utils.py
+++ b/melo/utils.py
@@ -13,11 +13,22 @@
 from melo.text.cleaner import clean_text
 from melo import commons
 
+import pyloudnorm as pyln
+
 MATPLOTLIB_FLAG = False
 
 logger = logging.getLogger(__name__)
 
+def fix_loudness(input, rate):
+    # 峰值归一化至 -1 dB
+    peak_normalized_audio = pyln.normalize.peak(input, -1.0)
+
+    # 测量响度
+    meter = pyln.Meter(rate)
+    loudness = meter.integrated_loudness(peak_normalized_audio)
 
+    # 响度归一化至 -12 dB LUFS
+    return pyln.normalize.loudness(peak_normalized_audio, loudness, -12.0)
 
 def get_text_for_tts_infer(text, language_str, hps, device, symbol_to_id=None):
     norm_text, phone, tone, word2ph = clean_text(text, language_str)
diff --git a/requirements.txt b/requirements.txt
index a79f61599..1cc2f0086 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,3 +27,4 @@ langid==1.1.6
 tqdm
 tensorboard==2.16.2
 loguru==0.7.2
+pyloudnorm
\ No newline at end of file

From 76c31870b5f4a3584d3bbe3f09a1191766649fa1 Mon Sep 17 00:00:00 2001
From: alastorid <alastorid@gmail.com>
Date: Thu, 26 Dec 2024 04:53:50 +0800
Subject: [PATCH 2/3] SAY GOODBYE TO CLIPPING

---
 melo/utils.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/melo/utils.py b/melo/utils.py
index bbe3fdeae..94ce92946 100644
--- a/melo/utils.py
+++ b/melo/utils.py
@@ -19,16 +19,10 @@
 
 logger = logging.getLogger(__name__)
 
-def fix_loudness(input, rate):
-    # 峰值归一化至 -1 dB
-    peak_normalized_audio = pyln.normalize.peak(input, -1.0)
+def fix_loudness(data, rate):
+    peak_normalized_audio = pyln.normalize.peak(data, -1.0)
 
-    # 测量响度
-    meter = pyln.Meter(rate)
-    loudness = meter.integrated_loudness(peak_normalized_audio)
-
-    # 响度归一化至 -12 dB LUFS
-    return pyln.normalize.loudness(peak_normalized_audio, loudness, -12.0)
+    return peak_normalized_audio
 
 def get_text_for_tts_infer(text, language_str, hps, device, symbol_to_id=None):
     norm_text, phone, tone, word2ph = clean_text(text, language_str)

From df784b7d2615358a30a3dbdb55566e8d386b8184 Mon Sep 17 00:00:00 2001
From: alastorid <alastorid@gmail.com>
Date: Tue, 21 Jan 2025 09:09:40 -0500
Subject: [PATCH 3/3] Rework fix_loudness to normalize_loudness to resolve
 potential issues

---
 melo/api.py      |  2 +-
 melo/utils.py    | 25 +++++++++++++++++++------
 requirements.txt |  1 -
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/melo/api.py b/melo/api.py
index 567df0678..a3a65f574 100644
--- a/melo/api.py
+++ b/melo/api.py
@@ -122,7 +122,7 @@ def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_s
                     )[0][0, 0].data.cpu().float().numpy()
                 del x_tst, tones, lang_ids, bert, ja_bert, x_tst_lengths, speakers
                 # 
-            audio_list.append(utils.fix_loudness(audio,self.hps.data.sampling_rate))
+            audio_list.append(utils.normalize_loudness(audio))
         torch.cuda.empty_cache()
         audio = self.audio_numpy_concat(audio_list, sr=self.hps.data.sampling_rate, speed=speed)
 
diff --git a/melo/utils.py b/melo/utils.py
index 94ce92946..2186f6c45 100644
--- a/melo/utils.py
+++ b/melo/utils.py
@@ -13,16 +13,29 @@
 from melo.text.cleaner import clean_text
 from melo import commons
 
-import pyloudnorm as pyln
-
 MATPLOTLIB_FLAG = False
 
 logger = logging.getLogger(__name__)
 
-def fix_loudness(data, rate):
-    peak_normalized_audio = pyln.normalize.peak(data, -1.0)
-
-    return peak_normalized_audio
+def normalize_loudness(audio):
+    max_val = np.max(np.abs(audio))
+    if max_val == 0:
+        return audio
+    scale_factor = (1 - 1e-6) / max_val
+    normalized_audio = audio * scale_factor
+
+    if False:
+        print(f'--- normalize ---')
+        print(f"Before normalization:")
+        print(f"  Min value: {np.min(audio)}")
+        print(f"  Max value: {np.max(audio)}")
+        print(f"  Length: {len(audio)}")
+        print(f"After normalization:")
+        print(f"  Min value: {np.min(normalized_audio)}")
+        print(f"  Max value: {np.max(normalized_audio)}")
+        print(f"  Length: {len(normalized_audio)}")
+
+    return normalized_audio
 
 def get_text_for_tts_infer(text, language_str, hps, device, symbol_to_id=None):
     norm_text, phone, tone, word2ph = clean_text(text, language_str)
diff --git a/requirements.txt b/requirements.txt
index 1cc2f0086..a79f61599 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,4 +27,3 @@ langid==1.1.6
 tqdm
 tensorboard==2.16.2
 loguru==0.7.2
-pyloudnorm
\ No newline at end of file