Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
47abb2a
log saved files with abs path
liranc6 Jul 28, 2025
8e8c16d
Fix HF warnings (attention_mask, pad_token_id) and add DEBUG mode for…
liranc6 Jul 28, 2025
1471a22
trying to add my attack. add requirements.txt and edit .gitignore
liranc6 Jul 31, 2025
e1f241b
add ipynb file
liranc6 Jul 31, 2025
9bc0ffb
Update: Improve evaluation flexibility and debug workflows
liranc6 Aug 11, 2025
aba8ff4
add notebooks and update .gitignore
liranc6 Aug 11, 2025
ff7a134
update notes
liranc6 Aug 11, 2025
5d32603
update notes
liranc6 Aug 11, 2025
226d7ed
end of day
liranc6 Aug 11, 2025
72db95b
Update: Enhance ILL analysis and add classifier exploration tools
liranc6 Aug 12, 2025
f61a5a1
Fix: Resolve import issues and implement working ILL feature computat…
liranc6 Aug 28, 2025
b2999c6
Refactor: Analyze loss landscape of first neighbor sentences
liranc6 Aug 28, 2025
99b3126
Fix: Ensure plot directory exists before saving heatmap
liranc6 Aug 28, 2025
733fba0
Add: Jupyter notebook for ILL evaluation on TOFU dataset
liranc6 Aug 28, 2025
026ab42
finish for today, working mostly
liranc6 Sep 14, 2025
993f9b0
after submission and then add wandb
liranc6 Nov 15, 2025
9ebce7b
Add: Comprehensive experiment and analysis pipeline for unlearning ev…
liranc6 Nov 17, 2025
c376a3f
Add: Initial ablation analysis notebooks for parameter sweeps and res…
liranc6 Nov 17, 2025
a6ce7a5
Update: Enhance ablation results notebooks with updated command gener…
liranc6 Nov 23, 2025
6d1563d
Add: Add notebook to scrape results and sync ablations notebooks
liranc6 Nov 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,11 @@ plot/figures/

plot/
plot/figures/
# *.sh
# *.sh

# Add new
MUSE/data/
MUSE/simnpo_news_eval.csv
MUSE/temp/

**/*plots*/
2 changes: 1 addition & 1 deletion MUSE/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUPPORTED_METRICS = ['verbmem_f', 'privleak', 'knowmem_f', 'knowmem_r']
SUPPORTED_METRICS = ['verbmem_f', 'privleak', 'knowmem_f', 'knowmem_r', 'loss_landscape']

CORPORA = ['news', 'books']

Expand Down
188 changes: 160 additions & 28 deletions MUSE/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,33 @@
from typing import List, Dict, Literal
from pandas import DataFrame

import sys

curr_dir = os.path.dirname(__file__)
PROJECT_DIR = os.path.abspath(os.path.join(curr_dir, '..', '..'))
Unlearn_Simple_DIR = os.path.join(PROJECT_DIR, 'Unlearn-Simple')
MUSE_DIR = os.path.join(Unlearn_Simple_DIR, 'MUSE')

sys.path.append(os.path.join(PROJECT_DIR, 'src'))

# print sys paths that are being used for importing
# print("Current sys.path:")
# for path in sys.path:
# print(path)
# sys.path.append(os.path.join(PROJECT_DIR, 'src'))
import input_loss_landscape.utils as input_loss_landscape_utils

input_loss_landscape_eval = input_loss_landscape_utils.input_loss_landscape_eval

input_loss_landscape_eval = input_loss_landscape_utils.input_loss_landscape_eval






def eval_model(
model: LlamaForCausalLM,
model,
tokenizer: LlamaTokenizer = LLAMA_DIR,
metrics: List[str] = SUPPORTED_METRICS,
corpus: Literal['news', 'books'] | None = None,
Expand All @@ -29,17 +53,20 @@ def eval_model(
knowmem_retain_qa_file: str | None = None,
knowmem_retain_qa_icl_file: str | None = None,
temp_dir: str | None = None,
) -> Dict[str, float]:
DEBUG: bool = False,
kwargs: dict = {},
): # -> Dict[str, float]:
# Argument sanity check
if not metrics:
raise ValueError(f"Specify `metrics` to be a non-empty list.")
for metric in metrics:
if metric not in SUPPORTED_METRICS:
raise ValueError(f"Given metric {metric} is not supported.")
raise ValueError(f"Given metric {metric} is not supported. supported metrics are: {SUPPORTED_METRICS=}.")
if corpus is not None and corpus not in CORPORA:
raise ValueError(f"Invalid corpus. `corpus` should be either 'news' or 'books'.")
if corpus is not None:
verbmem_forget_file = DEFAULT_DATA[corpus]['verbmem_forget_file'] if verbmem_forget_file is None else verbmem_forget_file
print(f"{privleak_forget_file=}, {privleak_retain_file=}, {privleak_holdout_file=}")
privleak_forget_file = DEFAULT_DATA[corpus]['privleak_forget_file'] if privleak_forget_file is None else privleak_forget_file
privleak_retain_file = DEFAULT_DATA[corpus]['privleak_retain_file'] if privleak_retain_file is None else privleak_retain_file
privleak_holdout_file = DEFAULT_DATA[corpus]['privleak_holdout_file'] if privleak_holdout_file is None else privleak_holdout_file
Expand All @@ -50,10 +77,15 @@ def eval_model(

out = {}
model = model.to('cuda')
debug_subset_len = 50 if DEBUG else None
print(f"{DEBUG=}")
plots = {}

# 1. verbmem_f
if 'verbmem_f' in metrics:
data = read_json(verbmem_forget_file)
data = read_json(os.path.join(MUSE_DIR, verbmem_forget_file))
if DEBUG:
data = data[:debug_subset_len]
agg, log = eval_verbmem(
prompts=[d['prompt'] for d in data],
gts=[d['gt'] for d in data],
Expand All @@ -67,21 +99,60 @@ def eval_model(

# 2. privleak
if 'privleak' in metrics:
auc, log = eval_privleak(
forget_data=read_json(privleak_forget_file),
retain_data=read_json(privleak_retain_file),
holdout_data=read_json(privleak_holdout_file),
model=model, tokenizer=tokenizer
)
if temp_dir is not None:
write_json(auc, os.path.join(temp_dir, "privleak/auc.json"))
write_json(log, os.path.join(temp_dir, "privleak/log.json"))
forget_data = read_json(os.path.join(MUSE_DIR, privleak_forget_file))
retain_data = read_json(os.path.join(MUSE_DIR, privleak_retain_file))
holdout_data = read_json(os.path.join(MUSE_DIR, privleak_holdout_file))
if DEBUG:
forget_data = forget_data[:debug_subset_len]
retain_data = retain_data[:debug_subset_len]
holdout_data = holdout_data[:debug_subset_len]

privleak_output_dir = os.path.abspath(os.path.join(temp_dir, "privleak") if temp_dir is not None else None)
create_new_files = kwargs.get('create_new_files', {})
create_new_privleak_files = create_new_files.get('privleak', True)
auc_path = os.path.join(privleak_output_dir, "auc.json")
log_path = os.path.join(privleak_output_dir, "log.json")
plots_dir = os.path.join(privleak_output_dir, "plots")

if create_new_privleak_files:
auc, log, privleak_plots = eval_privleak(
forget_data=forget_data,
retain_data=retain_data,
holdout_data=holdout_data,
model=model, tokenizer=tokenizer,
plot_dir=privleak_output_dir
)
if temp_dir is not None:
write_json(auc, auc_path)
write_json(log, log_path)
# save plots
os.makedirs(plots_dir, exist_ok=True)
for plot_name, plot_obj in privleak_plots.items():
plot_path = os.path.join(plots_dir, f"{plot_name}.png")
plot_obj.savefig(plot_path)
plot_obj.clf()

else:
# load auc, log, privleak_plots
auc = read_json(auc_path) if os.path.exists(auc_path) else {}
log = read_json(log_path) if os.path.exists(log_path) else {}
privleak_plots = {}
if os.path.isdir(plots_dir):
for plot_file in os.listdir(plots_dir):
if plot_file.endswith(".png"):
privleak_plots[os.path.splitext(plot_file)[0]] = os.path.join(plots_dir, plot_file)


out['privleak'] = (auc[privleak_auc_key] - AUC_RETRAIN[corpus][privleak_auc_key]) / AUC_RETRAIN[corpus][privleak_auc_key] * 100
plots['privleak'] = privleak_plots

# 3. knowmem_f
if 'knowmem_f' in metrics:
qa = read_json(knowmem_forget_qa_file)
icl = read_json(knowmem_forget_qa_icl_file)
qa = read_json(os.path.join(MUSE_DIR, knowmem_forget_qa_file))
icl = read_json(os.path.join(MUSE_DIR, knowmem_forget_qa_icl_file))
if DEBUG:
qa = qa[:debug_subset_len]
icl = icl[:debug_subset_len]
agg, log = eval_knowmem(
questions=[d['question'] for d in qa],
answers=[d['answer'] for d in qa],
Expand All @@ -97,8 +168,11 @@ def eval_model(

# 4. knowmem_r
if 'knowmem_r' in metrics:
qa = read_json(knowmem_retain_qa_file)
icl = read_json(knowmem_retain_qa_icl_file)
qa = read_json(os.path.join(MUSE_DIR, knowmem_retain_qa_file))
icl = read_json(os.path.join(MUSE_DIR, knowmem_retain_qa_icl_file))
if DEBUG:
qa = qa[:debug_subset_len]
icl = icl[:debug_subset_len]
agg, log = eval_knowmem(
questions=[d['question'] for d in qa],
answers=[d['answer'] for d in qa],
Expand All @@ -112,7 +186,38 @@ def eval_model(
write_json(log, os.path.join(temp_dir, "knowmem_r/log.json"))
out['knowmem_r'] = agg[knowmem_agg_key] * 100

return out
# 5. loss_landscape
if 'loss_landscape' in metrics:
print(f"{os.path.abspath(privleak_forget_file)=}")
forget_data = read_json(os.path.join(MUSE_DIR, privleak_forget_file))
retain_data = read_json(os.path.join(MUSE_DIR, privleak_retain_file))
holdout_data = read_json(os.path.join(MUSE_DIR, privleak_holdout_file))
if DEBUG:
forget_data = forget_data[:debug_subset_len]
retain_data = retain_data[:debug_subset_len]
holdout_data = holdout_data[:debug_subset_len]

loss_landscape = os.path.abspath(os.path.join(temp_dir, "loss_landscape") if temp_dir is not None else None)

return forget_data, retain_data, holdout_data, model, tokenizer, loss_landscape

# auc, log, loss_landscape_plots = input_loss_landscape_eval(
# forget_data=forget_data,
# retain_data=retain_data,
# holdout_data=holdout_data,
# model=model, tokenizer=tokenizer,
# plot_dir=loss_landscape,
# model_name='distilgpt2-finetuned-wikitext2',
# create_new_file=True,
# )
# if temp_dir is not None:
# write_json(auc, os.path.join(temp_dir, "loss_landscape/auc.json"))
# write_json(log, os.path.join(temp_dir, "loss_landscape/log.json"))

# out['loss_landscape'] = auc
# plots['loss_landscape'] = loss_landscape_plots

# return out, plots


def load_then_eval_models(
Expand All @@ -122,8 +227,10 @@ def load_then_eval_models(
tokenizer_dir: str = LLAMA_DIR,
out_file: str | None = None,
metrics: List[str] = SUPPORTED_METRICS,
temp_dir: str = "temp"
) -> DataFrame:
temp_dir: str = "temp",
DEBUG: bool = False,
kwargs: dict = {},
): # -> DataFrame:
print(out_file)
# Argument sanity check
if not model_dirs:
Expand All @@ -138,14 +245,38 @@ def load_then_eval_models(
for model_dir, name in zip(model_dirs, names):
model = load_model(model_dir)
tokenizer = load_tokenizer(tokenizer_dir)
res = eval_model(

privleak_files = kwargs.get('privleak_files', {})
if privleak_files:
privleak_forget_file = privleak_files.get('privleak_forget_file', None)
privleak_retain_file = privleak_files.get('privleak_retain_file', None)
privleak_holdout_file = privleak_files.get('privleak_holdout_file', None)

return eval_model(
model, tokenizer, metrics, corpus,
temp_dir=os.path.join(temp_dir, name),
DEBUG=DEBUG,
privleak_forget_file = privleak_forget_file,
privleak_retain_file=privleak_retain_file,
privleak_holdout_file=privleak_holdout_file,
kwargs=kwargs,
)

else:
return eval_model(
model, tokenizer, metrics, corpus,
temp_dir=os.path.join(temp_dir, name)
)
out.append({'name': name} | res)
if out_file is not None: write_csv(out, out_file)
# DataFrame(out).to_csv(out_file, index=False)
return DataFrame(out)
temp_dir=os.path.join(temp_dir, name),
DEBUG=DEBUG
)
# res, plots = eval_model(
# model, tokenizer, metrics, corpus,
# temp_dir=os.path.join(temp_dir, name),
# DEBUG=DEBUG
# )
# out.append({'name': name} | res)
# if out_file is not None: write_csv(out, out_file)
# # DataFrame(out).to_csv(out_file, index=False)
# return DataFrame(out), plots


if __name__ == '__main__':
Expand All @@ -159,4 +290,5 @@ def load_then_eval_models(
parser.add_argument('--metrics', type=str, nargs='+', default=SUPPORTED_METRICS)
args = parser.parse_args()

load_then_eval_models(**vars(args))
load_then_eval_models(**vars(args))

31 changes: 24 additions & 7 deletions MUSE/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
{'question': question, 'answer': answer}
for question, answer in zip(questions, answers)
]
write_json(knowmem, f"data/{corpus}/knowmem/{split}.json")
full_path = os.path.abspath(f"data/{corpus}/knowmem/{split}.json")
write_json(knowmem, full_path)
print(f"Saved {len(knowmem)} knowmem entries to {full_path}")

for split in ['forget']:
data = load_dataset(f"muse-bench/MUSE-{Corpus}", 'verbmem', split='forget')
Expand All @@ -21,20 +23,35 @@
{'prompt': prompt, 'gt': gt}
for prompt, gt in zip(prompts, gts)
]
write_json(verbmem, f"data/{corpus}/verbmem/forget.json")
full_path = os.path.abspath(f"data/{corpus}/verbmem/{split}.json")
write_json(verbmem, full_path)
print(f"Saved {len(verbmem)} verbmem entries to {full_path}")

for split in ['forget', 'retain', 'holdout']:
privleak = load_dataset(f"muse-bench/MUSE-{Corpus}", 'privleak', split=split)['text']
write_json(privleak, f"data/{corpus}/privleak/{split}.json")
privleak = list(privleak)
path = f"data/{corpus}/privleak/{split}.json"
full_path = os.path.abspath(path)
write_json(privleak, full_path)
print(f"Saved {len(privleak)} privleak entries to {full_path}")

for split in ['forget', 'holdout', 'retain1', 'retain2']:
raw = load_dataset(f"muse-bench/MUSE-{Corpus}", 'raw', split=split)['text']
write_json(raw, f"data/{corpus}/raw/{split}.json")
write_text("\n\n".join(raw), f"data/{corpus}/raw/{split}.txt")
raw = list(raw)
full_path_json = os.path.abspath(f"data/{corpus}/raw/{split}.json")
write_json(raw, full_path_json)
print(f"Saved {len(raw)} raw entries to {full_path_json}")
full_path_txt = os.path.abspath(f"data/{corpus}/raw/{split}.txt")
write_text("\n\n".join(raw), full_path_txt)
print(f"Saved {len(raw)} raw entries to {full_path_txt}")


for crit in ['scal', 'sust']:
for fold in range(1, 5):
data = load_dataset(f"muse-bench/MUSE-News", crit, split=f"forget_{fold}")['text']
write_json(data, f"data/news/{crit}/forget_{fold}.json")
write_text("\n\n".join(data), f"data/news/{crit}/forget_{fold}.txt")
data = list(data) # Convert to list
full_path_json = os.path.abspath(f"data/news/{crit}/forget_{fold}.json")
write_json(data, full_path_json)
full_path_txt = os.path.abspath(f"data/news/{crit}/forget_{fold}.txt")
write_text("\n\n".join(data), full_path_txt)
print(f"Saved {len(data)} {crit} entries to {full_path_txt}")
21 changes: 14 additions & 7 deletions MUSE/metrics/knowmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ def get_prefix_before_words_occur(string: str, words: List[str]) -> str:

def eval(
model, tokenizer,
questions: List[str], answers: List[str],
icl_qs: List[str] = [], icl_as: List[str] = [],
questions: List[str],
answers: List[str],
icl_qs: List[str] = [],
icl_as: List[str] = [],
max_new_tokens : int = 32
):
assert len(questions) == len(answers)
Expand All @@ -28,18 +30,23 @@ def eval(
for question, answer in tzip(questions, answers):
prompt = general_prompt + f"Question: {question}\nAnswer: "

# Encode the `prompt` into `input_ids`
input_ids = tokenizer(
# Encode the `prompt` into `input_ids` and `attention_mask`
inputs = tokenizer(
prompt,
return_tensors='pt',
add_special_tokens=True).input_ids
add_special_tokens=True
)
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask

# Use the `model` to generate the continuation of the `input_ids`.
output_ids = model.generate(
input_ids.to(model.device),
input_ids=input_ids.to(model.device),
attention_mask=attention_mask.to(model.device),
max_new_tokens=max_new_tokens,
do_sample=False,
pad_token_id=tokenizer.pad_token_id)
pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
)
output_ids = output_ids[:, len(input_ids[0]):]

output = tokenizer.batch_decode(
Expand Down
Loading