feat(pt): type embedding can still be compress even if attn_layer != 0 (#5066)

OutisLi · web-flow · commit 0ad1ab609d02 · 2025-12-01T15:11:44.000Z
&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;
## Summary by CodeRabbit

* **New Features**
* Split compression into independent type-embedding (TEBD) and geometric
modes, enabling partial compression when attention layer ≠ 0.

* **Documentation**
* Expanded backend-specific guidance describing full vs partial
compression rules and prerequisites (e.g., TEBD input mode).

* **Tests**
* Added tests covering non-zero attention-layer scenarios to validate
partial compression behavior.

* **Bug Fixes**
* Improved eligibility checks and clearer runtime warnings when
geometric compression is skipped.

&lt;sub&gt;✏️ Tip: You can customize this high-level summary in your review
settings.&lt;/sub&gt;
&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import warnings
 from typing import (
     Any,
     Callable,
@@ -304,7 +305,8 @@ def __init__(
         self.use_econf_tebd = use_econf_tebd
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
-        self.compress = False
+        self.tebd_compress = False
+        self.geo_compress = False
         self.type_embedding = TypeEmbedNet(
             ntypes,
             tebd_dim,
@@ -592,12 +594,17 @@ def enable_compression(
         check_frequency
             The overflow check frequency
         """
-        # do some checks before the mocel compression process
-        if self.compress:
+        # do some checks before the model compression process
+        if self.tebd_compress or self.geo_compress:
             raise ValueError("Compression is already enabled.")
+
+        if self.tebd_input_mode != "strip":
+            raise RuntimeError("Type embedding compression only works in strip mode")
+
         assert not self.se_atten.resnet_dt, (
             "Model compression error: descriptor resnet_dt must be false!"
         )
+
         for tt in self.se_atten.exclude_types:
             if (tt[0] not in range(self.se_atten.ntypes)) or (
                 tt[1] not in range(self.se_atten.ntypes)
@@ -609,6 +616,7 @@ def enable_compression(
                     + str(self.se_atten.ntypes)
                     + "!"
                 )
+
         if (
             self.se_atten.ntypes * self.se_atten.ntypes
             - len(self.se_atten.exclude_types)
@@ -618,38 +626,38 @@ def enable_compression(
                 "Empty embedding-nets are not supported in model compression!"
             )
 
-        if self.se_atten.attn_layer != 0:
-            raise RuntimeError("Cannot compress model when attention layer is not 0.")
-
-        if self.tebd_input_mode != "strip":
-            raise RuntimeError("Cannot compress model when tebd_input_mode == 'concat'")
-
-        data = self.serialize()
-        self.table = DPTabulate(
-            self,
-            data["neuron"],
-            data["type_one_side"],
-            data["exclude_types"],
-            ActivationFn(data["activation_function"]),
-        )
-        self.table_config = [
-            table_extrapolate,
-            table_stride_1,
-            table_stride_2,
-            check_frequency,
-        ]
-        self.lower, self.upper = self.table.build(
-            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
-        )
-
-        self.se_atten.enable_compression(
-            self.table.data, self.table_config, self.lower, self.upper
-        )
-
         # Enable type embedding compression
         self.se_atten.type_embedding_compression(self.type_embedding)
+        self.tebd_compress = True
+
+        if self.se_atten.attn_layer == 0:
+            data = self.serialize()
+            self.table = DPTabulate(
+                self,
+                data["neuron"],
+                data["type_one_side"],
+                data["exclude_types"],
+                ActivationFn(data["activation_function"]),
+            )
+            self.table_config = [
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+            ]
+            self.lower, self.upper = self.table.build(
+                min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
+            )
 
-        self.compress = True
+            self.se_atten.enable_compression(
+                self.table.data, self.table_config, self.lower, self.upper
+            )
+            self.geo_compress = True
+        else:
+            warnings.warn(
+                "Attention layer is not 0, only type embedding is compressed. Geometric part is not compressed.",
+                UserWarning,
+            )
 
     def forward(
         self,
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import warnings
 from typing import (
     Any,
     Callable,
@@ -938,38 +939,39 @@ def enable_compression(
                 "Repinit empty embedding-nets are not supported in model compression!"
             )
 
-        if self.repinit.attn_layer != 0:
-            raise RuntimeError(
-                "Cannot compress model when repinit attention layer is not 0."
-            )
-
         if self.repinit.tebd_input_mode != "strip":
             raise RuntimeError(
-                "Cannot compress model when repinit tebd_input_mode == 'concat'"
+                "Cannot compress model when repinit tebd_input_mode != 'strip'"
             )
 
-        # repinit doesn't have a serialize method
-        data = self.serialize()
-        self.table = DPTabulate(
-            self,
-            data["repinit_args"]["neuron"],
-            data["repinit_args"]["type_one_side"],
-            data["exclude_types"],
-            ActivationFn(data["repinit_args"]["activation_function"]),
-        )
-        self.table_config = [
-            table_extrapolate,
-            table_stride_1,
-            table_stride_2,
-            check_frequency,
-        ]
-        self.lower, self.upper = self.table.build(
-            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
-        )
+        if self.repinit.attn_layer == 0:
+            # repinit doesn't have a serialize method
+            data = self.serialize()
+            self.table = DPTabulate(
+                self,
+                data["repinit_args"]["neuron"],
+                data["repinit_args"]["type_one_side"],
+                data["exclude_types"],
+                ActivationFn(data["repinit_args"]["activation_function"]),
+            )
+            self.table_config = [
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+            ]
+            self.lower, self.upper = self.table.build(
+                min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
+            )
 
-        self.repinit.enable_compression(
-            self.table.data, self.table_config, self.lower, self.upper
-        )
+            self.repinit.enable_compression(
+                self.table.data, self.table_config, self.lower, self.upper
+            )
+        else:
+            warnings.warn(
+                "Attention layer is not 0, only type embedding is compressed. Geometric part is not compressed.",
+                UserWarning,
+            )
 
         # Enable type embedding compression for repinit (se_atten)
         self.repinit.type_embedding_compression(self.type_embedding)
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
@@ -275,9 +275,10 @@ def __init__(
             self.filter_layers_strip = filter_layers_strip
         self.stats = None
 
-        # For geometric compression
-        self.compress = False
+        self.tebd_compress = False
+        self.geo_compress = False
         self.is_sorted = False
+        # For geometric compression
         self.compress_info = nn.ParameterList(
             [nn.Parameter(torch.zeros(0, dtype=self.prec, device="cpu"))]
         )
@@ -452,7 +453,7 @@ def enable_compression(
             device="cpu",
         )
         self.compress_data[0] = table_data[net].to(device=env.DEVICE, dtype=self.prec)
-        self.compress = True
+        self.geo_compress = True
 
     def type_embedding_compression(self, type_embedding_net: TypeEmbedNet) -> None:
         """Enable type embedding compression for strip mode.
@@ -504,6 +505,8 @@ def type_embedding_compression(self, type_embedding_net: TypeEmbedNet) -> None:
                     del self.type_embd_data
                 self.register_buffer("type_embd_data", embd_tensor)
 
+        self.tebd_compress = True
+
     def forward(
         self,
         nlist: torch.Tensor,
@@ -630,7 +633,7 @@ def forward(
             # nf x (nl x nnei)
             nei_type = torch.gather(extended_atype, dim=1, index=nlist_index)
             if self.type_one_side:
-                if self.compress:
+                if self.tebd_compress:
                     tt_full = self.type_embd_data
                 else:
                     # (ntypes+1, tebd_dim) -> (ntypes+1, ng)
@@ -644,7 +647,7 @@ def forward(
                 idx_j = nei_type.view(-1)
                 # (nf x nl x nnei)
                 idx = (idx_i + idx_j).to(torch.long)
-                if self.compress:
+                if self.tebd_compress:
                     # ((ntypes+1)^2, ng)
                     tt_full = self.type_embd_data
                 else:
@@ -671,7 +674,7 @@ def forward(
             gg_t = gg_t.reshape(nfnl, nnei, ng)
             if self.smooth:
                 gg_t = gg_t * sw.reshape(-1, self.nnei, 1)
-            if self.compress:
+            if self.geo_compress:
                 ss = ss.reshape(-1, 1)
                 gg_t = gg_t.reshape(-1, gg_t.size(-1))
                 xyz_scatter = torch.ops.deepmd.tabulate_fusion_se_atten(
@@ -719,7 +722,7 @@ def forward(
         return (
             result.view(nframes, nloc, self.filter_neuron[-1] * self.axis_neuron),
             gg.view(nframes, nloc, self.nnei, self.filter_neuron[-1])
-            if not self.compress
+            if not self.geo_compress
             else None,
             dmatrix.view(nframes, nloc, self.nnei, 4)[..., 1:],
             rot_mat.view(nframes, nloc, self.filter_neuron[-1], 3),
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -505,7 +505,7 @@ def descrpt_se_atten_common_args() -> list[Argument]:
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_attn = "The length of hidden vectors in attention layers"
-    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and tebd_input_mode=='strip'"
+    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` works for any attn_layer value (for pytorch backend only, for other backends, attn_layer=0 is still needed to compress) when tebd_input_mode=='strip'. When attn_layer!=0, only type embedding is compressed, geometric parts are not compressed."
     doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
     doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
 
diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
@@ -38,6 +38,10 @@ Type embedding is within this descriptor with the {ref}`tebd_dim <model[standard
 
 ## Model compression
 
-Model compression is supported when {ref}`repinit/tebd_input_mode <model[standard]/descriptor[dpa2]/repinit/tebd_input_mode>` is `strip`, but only the `repinit` part is compressed.
+Model compression is supported when {ref}`repinit/tebd_input_mode <model[standard]/descriptor[dpa2]/repinit/tebd_input_mode>` is `strip`.
+
+- If {ref}`repinit/attn_layer <model[standard]/descriptor[dpa2]/repinit/attn_layer>` is `0`, both the type embedding and geometric parts inside `repinit` are compressed.
+- If `repinit/attn_layer` is not `0`, only the type embedding tables are compressed and the geometric attention layers remain as neural networks.
+
 An example is given in `examples/water/dpa2/input_torch_compressible.json`.
 The performance improvement will be limited if other parts are more expensive.
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
@@ -134,7 +134,9 @@ You can use descriptor `"se_atten_v2"` and is not allowed to set `tebd_input_mod
 
 Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
-Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
+:::{note}
+Model compression support differs across backends. See [Model compression](#model-compression) for backend-specific requirements.
+:::
 
 ## Type embedding
 
@@ -182,7 +184,13 @@ DPA-1 supports both the [standard data format](../data/system.md) and the [mixed
 
 ## Model compression
 
-Model compression is supported only when there is no attention layer (`attn_layer` is 0) and `tebd_input_mode` is `strip`.
+### TensorFlow {{ tensorflow_icon }}
+
+Model compression is supported only when the descriptor attention depth {ref}`attn_layer <model[standard]/descriptor[se_atten]/attn_layer>` is 0 and {ref}`tebd_input_mode <model[standard]/descriptor[se_atten]/tebd_input_mode>` is `"strip"`. Attention layers higher than 0 cannot be compressed in the TensorFlow implementation because the geometric part is tabulated from the static computation graph.
+
+### PyTorch {{ pytorch_icon }}
+
+Model compression is supported for any {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` value when {ref}`tebd_input_mode <model[standard]/descriptor[se_atten_v2]/tebd_input_mode>` is `"strip"`. When `attn_layer` is 0, both the type embedding and geometric parts are compressed. When `attn_layer` is not 0, only the type embedding is compressed while the geometric part keeps the neural network implementation (a warning is emitted during compression).
 
 ## Training example
 
diff --git a/source/tests/pt/test_model_compression_se_atten.py b/source/tests/pt/test_model_compression_se_atten.py