Skip to content

Commit 9e6c6c8

Browse files
committed
formatting issues
1 parent b169084 commit 9e6c6c8

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

bhc/api.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ class AbstractHierarchicalClustering(ABC):
4545
def build(self): ...
4646

4747

48-
class AbstractBayesianBasedHierarchicalClustering(AbstractHierarchicalClustering, ABC):
48+
class AbstractBayesianBasedHierarchicalClustering(
49+
AbstractHierarchicalClustering, ABC
50+
):
4951
def __init__(self, data, model, alpha, cut_allowed):
5052
self.data = data
5153
self.model = model

bhc/core/bhc.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import bhc.api as api
99

1010

11-
class BayesianHierarchicalClustering(api.AbstractBayesianBasedHierarchicalClustering):
11+
class BayesianHierarchicalClustering(
12+
api.AbstractBayesianBasedHierarchicalClustering
13+
):
1214
"""
1315
Reference: HELLER, Katherine A.; GHAHRAMANI, Zoubin.
1416
Bayesian hierarchical clustering.
@@ -52,7 +54,9 @@ def build(self):
5254
tmp_merge = np.empty((pair_count, 5), dtype=float)
5355
row = 0
5456
for i in range(n_objects):
55-
log_p_k_row = self.model.row_of_log_likelihood_for_pairs(self.data, i)
57+
log_p_k_row = self.model.row_of_log_likelihood_for_pairs(
58+
self.data, i
59+
)
5660
for j in range(i + 1, n_objects):
5761
# compute log(d_k)
5862
n_ch = n[i] + n[j]
@@ -78,7 +82,9 @@ def build(self):
7882
data_per_cluster = [np.array([self.data[i]]) for i in range(n_objects)]
7983
while active_nodes.size > 1:
8084
# find i, j with the highest probability of the merged hypothesis
81-
position = np.argmax(tmp_merge[:, 2]) # returns the first occurrence
85+
position = np.argmax(
86+
tmp_merge[:, 2]
87+
) # returns the first occurrence
8288
i, j, log_r, r1, r2 = tmp_merge[position]
8389
i = int(i)
8490
j = int(j)
@@ -131,7 +137,9 @@ def build(self):
131137
log_p = np.append(log_p, log_p_ij)
132138

133139
# for every pair ij x active
134-
collected_merge_info = np.empty((len(active_nodes) - 1, 5), dtype=float)
140+
collected_merge_info = np.empty(
141+
(len(active_nodes) - 1, 5), dtype=float
142+
)
135143
for k in range(active_nodes.size - 1):
136144
# compute log(d_k)
137145
n_ch = n[k] + n[ij]

bhc/core/prior.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ class NormalInverseWishart(AbstractPrior):
1616
Reference: MURPHY, Kevin P.
1717
Conjugate Bayesian analysis of the Gaussian distribution.
1818
def, v. 1, n. 2σ2, p. 16, 2007.
19-
https://www.cse.iitk.ac.in/users/piyush/courses/tpmi_winter19/readings/bayesGauss.pdf
19+
https://www.cse.iitk.ac.in/users/piyush/courses/
20+
tpmi_winter19/readings/bayesGauss.pdf
2021
"""
2122

2223
def __init__(self, s_mat, r, v, m):
@@ -42,13 +43,16 @@ def row_of_log_likelihood_for_pairs(
4243
i, # index of the row you want (int)
4344
):
4445
"""
45-
Returns 1D array containing the log-likelihoods for pairs of points needed for the
46-
initialization of bhc. This function combines i with all other points j > i and returns
47-
the log-likelihood of those clusters (containing two points each).
46+
Returns 1D array containing the log-likelihoods for pairs of points
47+
needed for the initialization of bhc. This function combines i with
48+
all other points j > i and returns the log-likelihood of those
49+
clusters (containing two points each).
4850
"""
4951
N, d = X.shape
5052
if d != self.s_mat.shape[0]:
51-
raise ValueError("data dimension and prior scale matrix do not match")
53+
raise ValueError(
54+
"data dimension and prior scale matrix do not match"
55+
)
5256

5357
# ------------------------------------------------------------------
5458
# Pairwise sufficient statistics – only for j > i (batched)
@@ -72,7 +76,7 @@ def row_of_log_likelihood_for_pairs(
7276
# ------------------------------------------------------------------
7377
rp = self.r + 2.0 # each cluster has two points
7478
vp = self.v + 2.0
75-
sign, logdet = slogdet(s_mat_p) # (N-i-1,)
79+
sign, logdet = np.linalg.slogdet(s_mat_p) # (N-i-1,)
7680
log_prior_post = (
7781
LOG2 * (vp * d / 2.0)
7882
+ (d / 2.0) * np.log(2.0 * np.pi / rp)
@@ -87,7 +91,9 @@ def row_of_log_likelihood_for_pairs(
8791
def __calc_log_prior(s_mat, r, v):
8892
d = s_mat.shape[0]
8993
log_prior = LOG2 * (v * d / 2.0) + (d / 2.0) * np.log(2.0 * np.pi / r)
90-
log_prior += multigammaln(v / 2.0, d) - (v / 2.0) * np.log(np.linalg.det(s_mat))
94+
log_prior += multigammaln(v / 2.0, d) - (v / 2.0) * np.log(
95+
np.linalg.det(s_mat)
96+
)
9197
return log_prior
9298

9399
@staticmethod
@@ -96,7 +102,9 @@ def __calc_posterior(x_mat, s_mat, r, v, m):
96102
x_bar = np.mean(x_mat, axis=0)
97103
rp = r + n
98104
vp = v + n
99-
s_mat_t = np.zeros(s_mat.shape) if n == 1 else (n - 1) * np.cov(x_mat.T)
105+
s_mat_t = (
106+
np.zeros(s_mat.shape) if n == 1 else (n - 1) * np.cov(x_mat.T)
107+
)
100108
dt = (x_bar - m)[np.newaxis]
101109
s_mat_p = s_mat + s_mat_t + (r * n / rp) * np.dot(dt.T, dt)
102110
return s_mat_p, rp, vp

0 commit comments

Comments
 (0)