formatting issues

martinalex000 · martinalex000 · commit 9e6c6c86fae2 · 2025-09-01T10:25:15.000+02:00
diff --git a/bhc/api.py b/bhc/api.py
@@ -45,7 +45,9 @@ class AbstractHierarchicalClustering(ABC):
     def build(self): ...
 
 
-class AbstractBayesianBasedHierarchicalClustering(AbstractHierarchicalClustering, ABC):
+class AbstractBayesianBasedHierarchicalClustering(
+    AbstractHierarchicalClustering, ABC
+):
     def __init__(self, data, model, alpha, cut_allowed):
         self.data = data
         self.model = model
diff --git a/bhc/core/bhc.py b/bhc/core/bhc.py
@@ -8,7 +8,9 @@
 import bhc.api as api
 
 
-class BayesianHierarchicalClustering(api.AbstractBayesianBasedHierarchicalClustering):
+class BayesianHierarchicalClustering(
+    api.AbstractBayesianBasedHierarchicalClustering
+):
     """
     Reference: HELLER, Katherine A.; GHAHRAMANI, Zoubin.
                Bayesian hierarchical clustering.
@@ -52,7 +54,9 @@ def build(self):
         tmp_merge = np.empty((pair_count, 5), dtype=float)
         row = 0
         for i in range(n_objects):
-            log_p_k_row = self.model.row_of_log_likelihood_for_pairs(self.data, i)
+            log_p_k_row = self.model.row_of_log_likelihood_for_pairs(
+                self.data, i
+            )
             for j in range(i + 1, n_objects):
                 # compute log(d_k)
                 n_ch = n[i] + n[j]
@@ -78,7 +82,9 @@ def build(self):
         data_per_cluster = [np.array([self.data[i]]) for i in range(n_objects)]
         while active_nodes.size > 1:
             # find i, j with the highest probability of the merged hypothesis
-            position = np.argmax(tmp_merge[:, 2])  # returns the first occurrence
+            position = np.argmax(
+                tmp_merge[:, 2]
+            )  # returns the first occurrence
             i, j, log_r, r1, r2 = tmp_merge[position]
             i = int(i)
             j = int(j)
@@ -131,7 +137,9 @@ def build(self):
             log_p = np.append(log_p, log_p_ij)
 
             # for every pair ij x active
-            collected_merge_info = np.empty((len(active_nodes) - 1, 5), dtype=float)
+            collected_merge_info = np.empty(
+                (len(active_nodes) - 1, 5), dtype=float
+            )
             for k in range(active_nodes.size - 1):
                 # compute log(d_k)
                 n_ch = n[k] + n[ij]
diff --git a/bhc/core/prior.py b/bhc/core/prior.py
@@ -16,7 +16,8 @@ class NormalInverseWishart(AbstractPrior):
     Reference: MURPHY, Kevin P.
                Conjugate Bayesian analysis of the Gaussian distribution.
                def, v. 1, n. 2σ2, p. 16, 2007.
-               https://www.cse.iitk.ac.in/users/piyush/courses/tpmi_winter19/readings/bayesGauss.pdf
+               https://www.cse.iitk.ac.in/users/piyush/courses/
+               tpmi_winter19/readings/bayesGauss.pdf
     """
 
     def __init__(self, s_mat, r, v, m):
@@ -42,13 +43,16 @@ def row_of_log_likelihood_for_pairs(
         i,  # index of the row you want (int)
     ):
         """
-        Returns 1D array containing the log-likelihoods for pairs of points needed for the
-        initialization of bhc. This function combines i with all other points j > i and returns
-        the log-likelihood of those clusters (containing two points each).
+        Returns 1D array containing the log-likelihoods for pairs of points
+        needed for the initialization of bhc. This function combines i with
+        all other points j > i and returns the log-likelihood of those
+        clusters (containing two points each).
         """
         N, d = X.shape
         if d != self.s_mat.shape[0]:
-            raise ValueError("data dimension and prior scale matrix do not match")
+            raise ValueError(
+                "data dimension and prior scale matrix do not match"
+            )
 
         # ------------------------------------------------------------------
         # Pairwise sufficient statistics – only for j > i (batched)
@@ -72,7 +76,7 @@ def row_of_log_likelihood_for_pairs(
         # ------------------------------------------------------------------
         rp = self.r + 2.0  # each cluster has two points
         vp = self.v + 2.0
-        sign, logdet = slogdet(s_mat_p)  # (N-i-1,)
+        sign, logdet = np.linalg.slogdet(s_mat_p)  # (N-i-1,)
         log_prior_post = (
             LOG2 * (vp * d / 2.0)
             + (d / 2.0) * np.log(2.0 * np.pi / rp)
@@ -87,7 +91,9 @@ def row_of_log_likelihood_for_pairs(
     def __calc_log_prior(s_mat, r, v):
         d = s_mat.shape[0]
         log_prior = LOG2 * (v * d / 2.0) + (d / 2.0) * np.log(2.0 * np.pi / r)
-        log_prior += multigammaln(v / 2.0, d) - (v / 2.0) * np.log(np.linalg.det(s_mat))
+        log_prior += multigammaln(v / 2.0, d) - (v / 2.0) * np.log(
+            np.linalg.det(s_mat)
+        )
         return log_prior
 
     @staticmethod
@@ -96,7 +102,9 @@ def __calc_posterior(x_mat, s_mat, r, v, m):
         x_bar = np.mean(x_mat, axis=0)
         rp = r + n
         vp = v + n
-        s_mat_t = np.zeros(s_mat.shape) if n == 1 else (n - 1) * np.cov(x_mat.T)
+        s_mat_t = (
+            np.zeros(s_mat.shape) if n == 1 else (n - 1) * np.cov(x_mat.T)
+        )
         dt = (x_bar - m)[np.newaxis]
         s_mat_p = s_mat + s_mat_t + (r * n / rp) * np.dot(dt.T, dt)
         return s_mat_p, rp, vp