Skip to content

Commit 67c2e9b

Browse files
committed
changes for AMIA
1 parent 858ac89 commit 67c2e9b

File tree

10 files changed

+143
-23
lines changed

10 files changed

+143
-23
lines changed

.pypirc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
[distutils]
2-
index-servers=
3-
testpypi
2+
index-servers =
3+
pypi
4+
testpypi
45

5-
[testpypi]
6-
repository: https://test.pypi.org/simple/Vampyr-MTL-Max-JJ
7-
username: Max_JJ
8-
password: Haoyan13911233286
6+
[testpypi]
7+
repository: https://test.pypi.org/simple/Vampyr-MTL-Max-JJ
8+
username: Max_JJ
9+
password: Haoyan13911233286

Vampyr_MTL/evaluations/utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from sklearn.model_selection import train_test_split
22
import numpy as np
33
import pandas as pd
4+
from collections import defaultdict, OrderedDict
5+
import plotly.figure_factory as ff
46

57
class opts:
68
def __init__(self, maxIter, init):
@@ -48,3 +50,74 @@ def MTL_data_extract(df, task_feat, target):
4850
y = tmp1.loc[:, df.columns == target].values
4951
Y.append(np.array(y))
5052
return X, Y
53+
54+
def RFA(df, task, target, top=10):
55+
def reformat(cols, w, top=10):
56+
# Task -> coln
57+
RFA = OrderedDict()
58+
cols = np.array(cols)
59+
fet, task = w.shape
60+
total = {}
61+
all_tasks = []
62+
for i in range(task):
63+
col = w[:,i].flatten()
64+
index = sorted(range(len(col)), key=lambda i: col[i], reverse=True)[:top]
65+
e = set(cols[index])
66+
RFA["task {}".format(i+1)] = e
67+
all_tasks.append("task {}".format(i+1))
68+
total = set.union(e, total)
69+
print("all top {} colns are {}".format(top, total))
70+
# Coln -> tasks
71+
ret = defaultdict(lambda: [])
72+
# dataframe to visualize
73+
df_v = pd.DataFrame(False, index=list(total), columns=all_tasks)
74+
df_v2 = pd.DataFrame(None, index = list(total), columns=[str(p+1) for p in range(len(all_tasks))])
75+
df_RFA = []
76+
for t in all_tasks:
77+
df_RFA.append(list(RFA[t]))
78+
for i in total:
79+
count = 1
80+
for k, v in RFA.items():
81+
if i in v:
82+
ret[i].append(k)
83+
df_v[k][i]=True
84+
df_v2[str(count)][i] = int(k[-2:])
85+
if(len(k)==6):
86+
df_v2[str(count)][i] = int(k[-1])
87+
count+=1
88+
return df_v, all_tasks, list(total), df_v2, df_RFA, RFA
89+
90+
def sort_df(df):
91+
fet, tsk = df.values.shape
92+
ret = pd.DataFrame(None, columns=list(df.columns))
93+
ind = list(df.index)
94+
seq = []
95+
for i in range(tsk):
96+
for j in range(fet):
97+
if(np.count_nonzero(~np.isnan(list(df_v2.iloc[j].values)))==i+1):
98+
ret.loc[len(ret)] = df_v2.iloc[j].values
99+
seq.append(ind[j])
100+
ret = ret.rename(index={i:j for i,j in zip(range(fet), seq)})
101+
return ret
102+
103+
104+
def get_z_text(z, mp):
105+
x, y = z.shape
106+
ret = np.empty([x, y],dtype="S10")
107+
for i in range(x):
108+
for j in range(y):
109+
ret[i][j]=mp[z[i][j]]
110+
return ret.astype(str).tolist()
111+
112+
all_col = (df.loc[:, (df.columns != target)&(df.columns != tasks)].columns).tolist()
113+
df_v, all_tasks, total, df_v2, RFA, index = reformat(all_col, mtl_clf.W, top=top)
114+
115+
mp = {i+1:"Task_{}".format(i) for i in range(len(X))}
116+
mp[None] = ''
117+
mp[np.nan] = ''
118+
df_v3 = sort_df(df_v2)
119+
z_text = get_z_text(df_v3.values, mp)
120+
fig = ff.create_annotated_heatmap(z = df_v3.values.tolist(), annotation_text=z_text, y=list(df_v3.index))
121+
fig.update_xaxes(showticklabels=False, showgrid=False)
122+
return fig
123+

Vampyr_MTL/functions/MTL_Cluster_Least_L21.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def fit(self, X, Y, **kwargs):
9393
gamma = 1.0
9494
gamma_inc = 2
9595

96-
for it in trange(self.opts.maxIter, file=sys.stdout, desc='outer loop'):
96+
for it in trange(self.opts.maxIter, file=sys.stdout, desc='Training'):
9797
alpha = (t_old - 1)/t
9898
Ws = (1 + alpha) * Wz - alpha * Wz_old
9999
if(isspmatrix(Mz)):
@@ -106,7 +106,7 @@ def fit(self, X, Y, **kwargs):
106106

107107
in_it = 0
108108
# for in_it in trange(2,file=sys.stdout, leave=False, unit_scale=True, desc='inner loop'):
109-
for in_it in trange(1000,file=sys.stdout, leave=False, unit_scale=True, desc='inner loop'):
109+
while True:
110110
Wzp = Ws - gWs/gamma
111111
Mzp, Mzp_Pz, Mzp_DiagSigz = self.singular_projection (Ms - gMs/gamma, self.k)
112112
Fzp = self.funVal_eval(Wzp, Mzp_Pz, Mzp_DiagSigz)

Vampyr_MTL/functions/MTL_Least_L21.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ def fit(self, X, Y, **kwargs):
7979
gamma = 1
8080
gamma_inc = 2
8181

82-
for it in trange(self.opts.maxIter, file=sys.stdout, desc='outer loop'):
82+
for it in trange(self.opts.maxIter, file=sys.stdout, desc='Training'):
8383
alpha = (t_old - 1)/t
8484
Ws = (1 + alpha) * Wz - alpha * Wz_old
8585
# compute function value and gradients of the search point
8686
gWs = self.gradVal_eval(Ws)
8787
Fs = self.funVal_eval(Ws)
8888
in_it = 0
8989

90-
for in_it in trange(1000,file=sys.stdout, leave=False, unit_scale=True, desc='inner loop'):
90+
while True:
9191
Wzp = self.FGLasso_projection(Ws - gWs/gamma, self.rho1 / gamma)
9292
Fzp = self.funVal_eval(Wzp)
9393
delta_Wzp = Wzp - Ws

Vampyr_MTL/functions/MTL_Logistic_L21.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,15 @@ def fit(self, X, Y, **kwargs):
9999
gamma = 1
100100
gamma_inc = 2
101101

102-
for it in trange(self.opts.maxIter, file=sys.stdout, desc='outer loop'):
102+
for it in trange(self.opts.maxIter, file=sys.stdout, desc='Training'):
103103
alpha = (t_old - 1)/t
104104

105105
Ws = (1 + alpha) * Wz - alpha * Wz_old
106106
Cs = (1 + alpha) * Cz - alpha * Cz_old
107107

108108
gWs, gCs, Fs = self.gradVal_eval(Ws, Cs)
109109

110-
for in_it in trange(1000,file=sys.stdout, leave=False, unit_scale=True, desc='inner loop'):
110+
while True:
111111
Wzp = self.FGLasso_projection(Ws - gWs/gamma, self.rho1 / gamma)
112112
Czp = Cs - gCs/gamma
113113
Fzp = self.funVal_eval(Wzp, Czp)

Vampyr_MTL/functions/MTL_Softmax_L21.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,15 @@ def fit(self, X, Y, **kwargs):
136136
gamma = 1
137137
gamma_inc = 2
138138

139-
for it in trange(self.opts.maxIter, file=sys.stdout, desc='outer loop'):
139+
for it in trange(self.opts.maxIter, file=sys.stdout, desc='Training'):
140140
alpha = (t_old - 1)/t
141141

142142
Ws = (1 + alpha) * Wz - alpha * Wz_old
143143
Cs = (1 + alpha) * Cz - alpha * Cz_old
144144

145145
gWs, gCs, Fs = self.gradVal_eval(Ws, Cs)
146146

147-
for in_it in trange(1000,file=sys.stdout, leave=False, unit_scale=True, desc='inner loop'):
147+
while True:
148148
Wzp = self.FGLasso_projection(Ws - gWs/gamma, self.rho1 / gamma)
149149
Czp = Cs - gCs/gamma
150150
Fzp = self.funVal_eval(Wzp, Czp)
@@ -156,7 +156,7 @@ def fit(self, X, Y, **kwargs):
156156
r_sum = (nrm_delta_Czp + nrm_delta_Wzp)/2
157157

158158
Fzp_gamma = Fs + np.sum(delta_Wzp*gWs) + np.sum(delta_Czp*gCs)+ gamma/2 * r_sum*2
159-
if (r_sum <=1e-20):
159+
if (r_sum <=1e-28):
160160
bFlag=1 # this shows that, the gradient step makes little improvement
161161
break
162162
if (Fzp <= Fzp_gamma):
@@ -178,19 +178,23 @@ def fit(self, X, Y, **kwargs):
178178
if(self.opts.tFlag == 0):
179179
if it>=2:
180180
if (abs( funcVal[-1] - funcVal[-2]) <= self.opts.tol):
181+
print("Terminate 0")
181182
break
182183

183184
elif(self.opts.tFlag == 1):
184185
if it>=2:
185186
if (abs( funcVal[-1] - funcVal[-2] ) <= self.opts.tol* funcVal[-2]):
187+
print("Terminate 1")
186188
break
187189

188190
elif(self.opts.tFlag == 2):
189191
if ( funcVal[-1]<= self.opts.tol):
192+
print("Terminate 2")
190193
break
191194

192195
elif(self.opts.tFlag == 3):
193196
if it>=self.opts.maxIter:
197+
print("Terminate 3")
194198
break
195199

196200
t_old = t
@@ -322,7 +326,7 @@ def unit_funcVal_eval(self, w, c, task_idx):
322326
weight = np.ones((1, self.Y[task_idx].shape[0]))/self.task_num
323327
z = -self.Y[task_idx]*(np.transpose(self.X[task_idx])@w + c)
324328
hinge = np.maximum(z, 0)
325-
funcVal = np.sum(weight @ (np.log(np.exp(-hinge)+np.exp(z-hinge))+hinge))
329+
funcVal = np.sum(weight @ (np.log(np.exp((-hinge).astype(np.float))+np.exp((z-hinge).astype(np.float)))+hinge))
326330
return funcVal
327331

328332
def get_params(self, deep = False):

Vampyr_MTL/functions/init_opts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def init_opts(opts):
88

99
# Default values
1010
DEFAULT_MAX_ITERATION = 1000
11-
DEFAULT_TOLERANCE = 1e-4
11+
DEFAULT_TOLERANCE = 1e-10
1212
MINIMUM_TOLERANCE = eps * 100
1313
DEFAULT_TERMINATION_COND = 1
1414
DEFAULT_INIT = 0

Vampyr_MTL/functions/tests/test_softmax_L21_hinge.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,11 @@
22
import numpy as np
33
import pandas as pd
44
from sklearn import datasets
5+
from sklearn import preprocessing
56
from ...evaluations.utils import MTL_data_extract, MTL_data_split, opts
67
from .test_data import get_data
7-
8-
# class opts:
9-
# def __init__(self, maxIter, init):
10-
# self.maxIter = maxIter
11-
# self.init = init
12-
# self.pFlag = False
8+
from sklearn.linear_model import LogisticRegression
9+
import os
1310

1411
opts = opts(1000,2)
1512

@@ -22,8 +19,53 @@
2219
X_i, Y_i = MTL_data_extract(df2, 'cat2', 'target')
2320
X_train_c, X_test_c, Y_train_c, Y_test_c = MTL_data_split(X_i, Y_i, test_size=0.4)
2421

22+
print(os.getcwd())
23+
print('???????????????')
24+
df3 = pd.read_csv('./cleaned_BRFSS.csv')
25+
26+
def normalize(X):
27+
for i in range(len(X)):
28+
min_max_scaler = preprocessing.MinMaxScaler()
29+
X[i] = min_max_scaler.fit_transform(X[i])
30+
return X
2531

2632
class Test_softmax_classification(object):
33+
def test_real_data(self):
34+
df4 = df3[(df3['ADDEPEV2']==2)|(df3['ADDEPEV2']==1)]
35+
# opts.tol = 1e-20
36+
X, Y = MTL_data_extract(df4, "ADDEPEV2", "_BMI5CAT")
37+
task = [0]*2
38+
taskT = 0
39+
for i in range(1):
40+
X_train, X_test, Y_train, Y_test = MTL_data_split(X, Y, test_size=0.998)
41+
X_train = normalize(X_train)
42+
X_test = normalize(X_test)
43+
for i in range(len(Y_train)):
44+
Y_train[i] = Y_train[i].astype(int)
45+
clf = MTL_Softmax_L21(opts)
46+
clf.fit(X_train, Y_train)
47+
pred = clf.predict(X_test)
48+
49+
c_t = 0
50+
total = 0
51+
for i in range(len(pred)):
52+
correct = np.sum(pred[i]==Y_test[i])
53+
sub = len(pred[i])
54+
task[i] = max(task[i], correct/sub*100)
55+
total += sub
56+
c_t += correct
57+
taskT = max(taskT, c_t/total*100)
58+
print("accurcy for task 1 is {}%".format(task[0]))
59+
print("accurcy for task 2 is {}%".format(task[1]))
60+
print("total accuracy is {}%".format(taskT))
61+
62+
for i in range(len(pred)):
63+
clf = LogisticRegression(random_state=0).fit(X_train[i], Y_train[i])
64+
s = clf.score(X_test[i], Y_test[i])
65+
print("SKLearn accuracy for task {} is {}%".format(i, s*100))
66+
67+
assert c_t/total*100 == 0
68+
2769
def test_soft_numerical_accuracy(self):
2870
ult_thres = 0.5
2971
thres = 0.9
15.7 KB
Binary file not shown.
8.31 KB
Binary file not shown.

0 commit comments

Comments
 (0)