refactor(pt): Streamline type embedding compression logic in dpa1 and se_atten

OutisLi · OutisLi · commit d70a39724da0 · 2025-11-18T22:44:14.000+08:00
- Simplified the type embedding compression process by consolidating methods and removing unnecessary conditions.
- Enhanced clarity in the handling of one-side and two-side type embeddings, ensuring consistent functionality across both modes.
- Updated comments for better understanding of the compression logic and its implications on performance.

These changes contribute to cleaner code and improved maintainability of the descriptor model.
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
@@ -646,10 +646,8 @@ def enable_compression(
             self.table.data, self.table_config, self.lower, self.upper
         )
 
-        # Enable type embedding compression only for two-side mode
-        # TODO: why not enable for one-side mode? (do not consider this for now)
-        if not self.se_atten.type_one_side:
-            self.se_atten.enable_type_embedding_compression(self.type_embedding)
+        # Enable type embedding compression
+        self.se_atten.type_embedding_compression(self.type_embedding)
 
         self.compress = True
 
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
@@ -275,7 +275,7 @@ def __init__(
             self.filter_layers_strip = filter_layers_strip
         self.stats = None
 
-        # add for compression
+        # For geometric compression
         self.compress = False
         self.is_sorted = False
         self.compress_info = nn.ParameterList(
@@ -284,9 +284,8 @@ def __init__(
         self.compress_data = nn.ParameterList(
             [nn.Parameter(torch.zeros(0, dtype=self.prec, device=env.DEVICE))]
         )
-        # For type embedding compression (strip mode, two-side only)
-        self.compress_type_embd = False
-        self.two_side_embd_data = None
+        # For type embedding compression
+        self.type_embd_data = None
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
@@ -453,49 +452,44 @@ def enable_compression(
         self.compress_data[0] = table_data[net].to(device=env.DEVICE, dtype=self.prec)
         self.compress = True
 
-    def enable_type_embedding_compression(
-        self, type_embedding_net: TypeEmbedNet
-    ) -> None:
-        """Enable type embedding compression for strip mode (two-side only).
-
-        This method precomputes the type embedding network outputs for all possible
-        type pairs, following the same approach as TF backend's compression:
-
-        TF approach:
-        1. get_two_side_type_embedding(): creates (ntypes+1)^2 type pair combinations
-        2. make_data(): applies embedding network to get precomputed outputs
-        3. In forward: lookup precomputed values instead of real-time computation
+    def type_embedding_compression(self, type_embedding_net: TypeEmbedNet) -> None:
+        """Enable type embedding compression for strip mode.
 
-        PyTorch implementation:
-        - Precomputes all (ntypes+1)^2 type pair embedding network outputs
-        - Stores in buffer for proper serialization and device management
-        - Uses lookup during inference to avoid redundant computations
+        Precomputes embedding network outputs for all type combinations:
+        - One-side: (ntypes+1) combinations (neighbor types only)
+        - Two-side: (ntypes+1)² combinations (neighbor x center type pairs)
 
         Parameters
         ----------
         type_embedding_net : TypeEmbedNet
             The type embedding network that provides get_full_embedding() method
         """
         with torch.no_grad():
-            # Get full type embedding: (ntypes+1) x t_dim
+            # Get full type embedding: (ntypes+1) x tebd_dim
             full_embd = type_embedding_net.get_full_embedding(env.DEVICE)
             nt, t_dim = full_embd.shape
 
-            # Create all type pair combinations [neighbor, center]
-            # for a fixed row i, all columns j have different neighbor types
-            embd_nei = full_embd.view(1, nt, t_dim).expand(nt, nt, t_dim)
-            # for a fixed row i, all columns j share the same center type i
-            embd_center = full_embd.view(nt, 1, t_dim).expand(nt, nt, t_dim)
-            two_side_embd = torch.cat([embd_nei, embd_center], dim=-1).reshape(
-                -1, t_dim * 2
-            )
-
-            # Apply strip embedding network and store
-            # index logic: index = center_type * nt + neighbor_type
-            self.two_side_embd_data = self.filter_layers_strip.networks[0](
-                two_side_embd
-            ).detach()
-            self.compress_type_embd = True
+            if self.type_one_side:
+                # One-side: only neighbor types, much simpler!
+                # Precompute for all (ntypes+1) neighbor types
+                self.type_embd_data = self.filter_layers_strip.networks[0](
+                    full_embd
+                ).detach()
+            else:
+                # Two-side: all (ntypes+1)² type pair combinations
+                # Create [neighbor, center] combinations
+                # for a fixed row i, all columns j have different neighbor types
+                embd_nei = full_embd.view(1, nt, t_dim).expand(nt, nt, t_dim)
+                # for a fixed row i, all columns j share the same center type i
+                embd_center = full_embd.view(nt, 1, t_dim).expand(nt, nt, t_dim)
+                two_side_embd = torch.cat([embd_nei, embd_center], dim=-1).reshape(
+                    -1, t_dim * 2
+                )
+                # Precompute for all type pairs
+                # Index formula: idx = center_type * nt + neighbor_type
+                self.type_embd_data = self.filter_layers_strip.networks[0](
+                    two_side_embd
+                ).detach()
 
     def forward(
         self,
@@ -622,22 +616,24 @@ def forward(
             nlist_index = nlist.reshape(nb, nloc * nnei)
             # nf x (nl x nnei)
             nei_type = torch.gather(extended_atype, dim=1, index=nlist_index)
-            # (nf x nl x nnei) x ng
-            nei_type_index = nei_type.view(-1, 1).expand(-1, ng).type(torch.long)
             if self.type_one_side:
-                tt_full = self.filter_layers_strip.networks[0](type_embedding)
-                # (nf x nl x nnei) x ng
-                gg_t = torch.gather(tt_full, dim=0, index=nei_type_index)
+                if self.type_embd_data is not None:
+                    tt_full = self.type_embd_data
+                else:
+                    # (ntypes+1, tebd_dim) -> (ntypes+1, ng)
+                    tt_full = self.filter_layers_strip.networks[0](type_embedding)
+                # (nf*nl*nnei,) -> (nf*nl*nnei, ng)
+                gg_t = tt_full[nei_type.view(-1).type(torch.long)]
             else:
                 idx_i = torch.tile(
                     atype.reshape(-1, 1) * ntypes_with_padding, [1, nnei]
                 ).view(-1)
                 idx_j = nei_type.view(-1)
                 # (nf x nl x nnei)
                 idx = (idx_i + idx_j).to(torch.long)
-                if self.compress_type_embd and self.two_side_embd_data is not None:
+                if self.type_embd_data is not None:
                     # (ntypes^2, ng)
-                    tt_full = self.two_side_embd_data
+                    tt_full = self.type_embd_data
                 else:
                     # (ntypes) * ntypes * nt
                     type_embedding_nei = torch.tile(