Skip to content

Commit 1471a22

Browse files
committed
trying to add my attack. add requirements.txt and edit .gitignore
not running yet
1 parent 8e8c16d commit 1471a22

File tree

5 files changed

+194
-138
lines changed

5 files changed

+194
-138
lines changed

.gitignore

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,9 @@ plot/figures/
3535

3636
plot/
3737
plot/figures/
38-
# *.sh
38+
# *.sh
39+
40+
# Add new
41+
MUSE/data/
42+
MUSE/simnpo_news_eval.csv
43+
MUSE/temp/

MUSE/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SUPPORTED_METRICS = ['verbmem_f', 'privleak', 'knowmem_f', 'knowmem_r']
1+
SUPPORTED_METRICS = ['verbmem_f', 'privleak', 'knowmem_f', 'knowmem_r', 'loss_landscape']
22

33
CORPORA = ['news', 'books']
44

MUSE/eval.py

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from metrics.verbmem import eval as eval_ve rbmem
1+
from metrics.verbmem import eval as eval_verbmem
22
from metrics.privleak import eval as eval_privleak
33
from metrics.knowmem import eval as eval_knowmem
44
from utils import load_model, load_tokenizer, write_csv, read_json, write_json
@@ -9,9 +9,24 @@
99
from typing import List, Dict, Literal
1010
from pandas import DataFrame
1111

12+
import importlib.util
13+
import os
14+
15+
input_loss_landscape_utils_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'src', 'input_loss_landscape', 'utils.py'))
16+
spec = importlib.util.spec_from_file_location("input_loss_landscape_utils", input_loss_landscape_utils_path)
17+
input_loss_landscape_utils = importlib.util.module_from_spec(spec)
18+
spec.loader.exec_module(input_loss_landscape_utils)
19+
20+
input_loss_landscape_eval = input_loss_landscape_utils.input_loss_landscape_eval
21+
print(f"Current working directory: {os.getcwd()}") # Ensure the current working directory is set correctly
22+
23+
24+
25+
26+
1227

1328
def eval_model(
14-
model: LlamaForCausalLM,
29+
model,
1530
tokenizer: LlamaTokenizer = LLAMA_DIR,
1631
metrics: List[str] = SUPPORTED_METRICS,
1732
corpus: Literal['news', 'books'] | None = None,
@@ -30,13 +45,13 @@ def eval_model(
3045
knowmem_retain_qa_icl_file: str | None = None,
3146
temp_dir: str | None = None,
3247
DEBUG: bool = False,
33-
) -> Dict[str, float]:
48+
): # -> Dict[str, float]:
3449
# Argument sanity check
3550
if not metrics:
3651
raise ValueError(f"Specify `metrics` to be a non-empty list.")
3752
for metric in metrics:
3853
if metric not in SUPPORTED_METRICS:
39-
raise ValueError(f"Given metric {metric} is not supported.")
54+
raise ValueError(f"Given metric {metric} is not supported. supported metrics are: {SUPPORTED_METRICS=}.")
4055
if corpus is not None and corpus not in CORPORA:
4156
raise ValueError(f"Invalid corpus. `corpus` should be either 'news' or 'books'.")
4257
if corpus is not None:
@@ -51,7 +66,9 @@ def eval_model(
5166

5267
out = {}
5368
model = model.to('cuda')
54-
debug_subset_len = 3 if DEBUG else None
69+
debug_subset_len = 2 if DEBUG else None
70+
print(f"{DEBUG=}")
71+
plots = {}
5572

5673
# 1. verbmem_f
5774
if 'verbmem_f' in metrics:
@@ -78,16 +95,20 @@ def eval_model(
7895
forget_data = forget_data[:debug_subset_len]
7996
retain_data = retain_data[:debug_subset_len]
8097
holdout_data = holdout_data[:debug_subset_len]
81-
auc, log = eval_privleak(
98+
99+
privleak_output_dir = os.path.abspath(os.path.join(temp_dir, "privleak") if temp_dir is not None else None)
100+
auc, log, privleak_plots = eval_privleak(
82101
forget_data=forget_data,
83102
retain_data=retain_data,
84103
holdout_data=holdout_data,
85-
model=model, tokenizer=tokenizer
104+
model=model, tokenizer=tokenizer,
105+
plot_dir=privleak_output_dir
86106
)
87107
if temp_dir is not None:
88108
write_json(auc, os.path.join(temp_dir, "privleak/auc.json"))
89109
write_json(log, os.path.join(temp_dir, "privleak/log.json"))
90110
out['privleak'] = (auc[privleak_auc_key] - AUC_RETRAIN[corpus][privleak_auc_key]) / AUC_RETRAIN[corpus][privleak_auc_key] * 100
111+
plots['privleak'] = privleak_plots
91112

92113
# 3. knowmem_f
93114
if 'knowmem_f' in metrics:
@@ -129,7 +150,37 @@ def eval_model(
129150
write_json(log, os.path.join(temp_dir, "knowmem_r/log.json"))
130151
out['knowmem_r'] = agg[knowmem_agg_key] * 100
131152

132-
return out
153+
# 5. loss_landscape
154+
if 'loss_landscape' in metrics:
155+
forget_data = read_json(privleak_forget_file)
156+
retain_data = read_json(privleak_retain_file)
157+
holdout_data = read_json(privleak_holdout_file)
158+
if DEBUG:
159+
forget_data = forget_data[:debug_subset_len]
160+
retain_data = retain_data[:debug_subset_len]
161+
holdout_data = holdout_data[:debug_subset_len]
162+
163+
loss_landscape = os.path.abspath(os.path.join(temp_dir, "loss_landscape") if temp_dir is not None else None)
164+
165+
return forget_data, retain_data, holdout_data, model, tokenizer, loss_landscape
166+
167+
# auc, log, loss_landscape_plots = input_loss_landscape_eval(
168+
# forget_data=forget_data,
169+
# retain_data=retain_data,
170+
# holdout_data=holdout_data,
171+
# model=model, tokenizer=tokenizer,
172+
# plot_dir=loss_landscape,
173+
# model_name='distilgpt2-finetuned-wikitext2',
174+
# create_new_file=True,
175+
# )
176+
# if temp_dir is not None:
177+
# write_json(auc, os.path.join(temp_dir, "loss_landscape/auc.json"))
178+
# write_json(log, os.path.join(temp_dir, "loss_landscape/log.json"))
179+
180+
# out['loss_landscape'] = auc
181+
# plots['loss_landscape'] = loss_landscape_plots
182+
183+
# return out, plots
133184

134185

135186
def load_then_eval_models(
@@ -141,7 +192,7 @@ def load_then_eval_models(
141192
metrics: List[str] = SUPPORTED_METRICS,
142193
temp_dir: str = "temp",
143194
DEBUG: bool = False,
144-
) -> DataFrame:
195+
): # -> DataFrame:
145196
print(out_file)
146197
# Argument sanity check
147198
if not model_dirs:
@@ -156,15 +207,21 @@ def load_then_eval_models(
156207
for model_dir, name in zip(model_dirs, names):
157208
model = load_model(model_dir)
158209
tokenizer = load_tokenizer(tokenizer_dir)
159-
res = eval_model(
210+
211+
return eval_model(
160212
model, tokenizer, metrics, corpus,
161213
temp_dir=os.path.join(temp_dir, name),
162214
DEBUG=DEBUG
163215
)
164-
out.append({'name': name} | res)
165-
if out_file is not None: write_csv(out, out_file)
166-
# DataFrame(out).to_csv(out_file, index=False)
167-
return DataFrame(out)
216+
# res, plots = eval_model(
217+
# model, tokenizer, metrics, corpus,
218+
# temp_dir=os.path.join(temp_dir, name),
219+
# DEBUG=DEBUG
220+
# )
221+
# out.append({'name': name} | res)
222+
# if out_file is not None: write_csv(out, out_file)
223+
# # DataFrame(out).to_csv(out_file, index=False)
224+
# return DataFrame(out), plots
168225

169226

170227
if __name__ == '__main__':

0 commit comments

Comments
 (0)