fix(dpmodel): fix normalize scale of initial parameters (#4774)

njzjz · Copilot · web-flow · commit 9c17b96aabe3 · 2025-05-30T05:46:57.000Z
The current scale is too large. This PR makes it consistent with PT.

&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;
## Summary by CodeRabbit

- **Refactor**
- Improved the initialization of certain neural network parameters for
enhanced stability and consistency. No changes to user-facing
functionality.
&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;

---------

Signed-off-by: Jinzhe Zeng &lt;jinzhe.zeng@ustc.edu.cn&gt;
Signed-off-by: Jinzhe Zeng &lt;njzjz@qq.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
@@ -105,9 +105,18 @@ def __init__(
         # only use_timestep when skip connection is established.
         use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2)
         rng = np.random.default_rng(seed)
-        self.w = rng.normal(size=(num_in, num_out)).astype(prec)
-        self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None
-        self.idt = rng.normal(size=(num_out,)).astype(prec) if use_timestep else None
+        scale_factor = 1.0 / np.sqrt(num_out + num_in)
+        self.w = rng.normal(size=(num_in, num_out), scale=scale_factor).astype(prec)
+        self.b = (
+            rng.normal(size=(num_out,), scale=scale_factor).astype(prec)
+            if bias
+            else None
+        )
+        self.idt = (
+            rng.normal(size=(num_out,), scale=scale_factor).astype(prec)
+            if use_timestep
+            else None
+        )
         self.activation_function = (
             activation_function if activation_function is not None else "none"
         )