Skip to content

Commit 3b5ce13

Browse files
committed
Add OLMo2 models.
1 parent 0d3f1f8 commit 3b5ce13

File tree

3 files changed

+101
-2
lines changed

3 files changed

+101
-2
lines changed

pyvene/models/intervenable_modelcard.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .backpack_gpt2.modelings_intervenable_backpack_gpt2 import *
1414
from .llava.modelings_intervenable_llava import *
1515
from .olmo.modelings_intervenable_olmo import *
16+
from .olmo2.modelings_intervenable_olmo2 import *
1617

1718
#########################################################################
1819
"""
@@ -62,7 +63,9 @@
6263
hf_models.gemma2.modeling_gemma2.Gemma2Model: gemma2_type_to_module_mapping,
6364
hf_models.gemma2.modeling_gemma2.Gemma2ForCausalLM: gemma2_lm_type_to_module_mapping,
6465
hf_models.olmo.modeling_olmo.OlmoModel: olmo_type_to_module_mapping,
65-
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_module_mapping,
66+
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_module_mapping,
67+
hf_models.olmo2.modeling_olmo2.Olmo2Model: olmo2_type_to_module_mapping,
68+
hf_models.olmo2.modeling_olmo2.Olmo2ForCausalLM: olmo2_lm_type_to_module_mapping,
6669
hf_models.blip.modeling_blip.BlipForQuestionAnswering: blip_type_to_module_mapping,
6770
hf_models.blip.modeling_blip.BlipForImageTextRetrieval: blip_itm_type_to_module_mapping,
6871
MLPModel: mlp_type_to_module_mapping,
@@ -97,7 +100,9 @@
97100
hf_models.gemma2.modeling_gemma2.Gemma2Model: gemma2_type_to_dimension_mapping,
98101
hf_models.gemma2.modeling_gemma2.Gemma2ForCausalLM: gemma2_lm_type_to_dimension_mapping,
99102
hf_models.olmo.modeling_olmo.OlmoModel: olmo_type_to_dimension_mapping,
100-
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_dimension_mapping,
103+
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_dimension_mapping,
104+
hf_models.olmo2.modeling_olmo2.Olmo2Model: olmo2_type_to_dimension_mapping,
105+
hf_models.olmo2.modeling_olmo2.Olmo2ForCausalLM: olmo2_lm_type_to_dimension_mapping,
101106
hf_models.blip.modeling_blip.BlipForQuestionAnswering: blip_type_to_dimension_mapping,
102107
hf_models.blip.modeling_blip.BlipForImageTextRetrieval: blip_itm_type_to_dimension_mapping,
103108
MLPModel: mlp_type_to_dimension_mapping,

pyvene/models/olmo2/__init__.py

Whitespace-only changes.
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
Each modeling file in this library is a mapping between
3+
abstract naming of intervention anchor points and actual
4+
model module defined in the huggingface library.
5+
6+
We also want to let the intervention library know how to
7+
config the dimensions of intervention based on model config
8+
defined in the huggingface library.
9+
"""
10+
11+
12+
import torch
13+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
14+
from ..constants import *
15+
16+
17+
olmo2_type_to_module_mapping = {
18+
"block_input": ("layers[%s]", CONST_INPUT_HOOK),
19+
"block_output": ("layers[%s]", CONST_OUTPUT_HOOK),
20+
"mlp_activation": ("layers[%s].mlp.act_fn", CONST_OUTPUT_HOOK),
21+
"mlp_output": ("layers[%s].mlp", CONST_OUTPUT_HOOK),
22+
"mlp_input": ("layers[%s].mlp", CONST_INPUT_HOOK),
23+
"attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK),
24+
"head_attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK, (split_head_and_permute, "n_head")),
25+
"attention_output": ("layers[%s].self_attn", CONST_OUTPUT_HOOK),
26+
"attention_input": ("layers[%s].self_attn", CONST_INPUT_HOOK),
27+
"query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK),
28+
"key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK),
29+
"value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK),
30+
"head_query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_head")),
31+
"head_key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_kv_head")),
32+
"head_value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_kv_head")),
33+
}
34+
35+
36+
olmo2_type_to_dimension_mapping = {
37+
"n_head": ("num_attention_heads",),
38+
"n_kv_head": ("num_key_value_heads",),
39+
"block_input": ("hidden_size",),
40+
"block_output": ("hidden_size",),
41+
"mlp_activation": ("intermediate_size",),
42+
"mlp_output": ("hidden_size",),
43+
"mlp_input": ("hidden_size",),
44+
"attention_value_output": ("hidden_size",),
45+
"head_attention_value_output": ("hidden_size/num_attention_heads",),
46+
"attention_output": ("hidden_size",),
47+
"attention_input": ("hidden_size",),
48+
"query_output": ("hidden_size",),
49+
"key_output": ("hidden_size",),
50+
"value_output": ("hidden_size",),
51+
"head_query_output": ("hidden_size/num_attention_heads",),
52+
"head_key_output": ("hidden_size/num_attention_heads",),
53+
"head_value_output": ("hidden_size/num_attention_heads",),
54+
}
55+
56+
57+
"""olmo2 model with LM head"""
58+
olmo2_lm_type_to_module_mapping = {}
59+
for k, v in olmo2_type_to_module_mapping.items():
60+
olmo2_lm_type_to_module_mapping[k] = (f"model.{v[0]}", ) + v[1:]
61+
62+
63+
olmo2_lm_type_to_dimension_mapping = olmo2_type_to_dimension_mapping
64+
65+
66+
"""olmo2 model with classifier head"""
67+
olmo2_classifier_type_to_module_mapping = {}
68+
for k, v in olmo2_type_to_module_mapping.items():
69+
olmo2_classifier_type_to_module_mapping[k] = (f"model.{v[0]}", ) + v[1:]
70+
71+
72+
olmo2_classifier_type_to_dimension_mapping = olmo2_type_to_dimension_mapping
73+
74+
75+
def create_olmo2(
76+
name="allenai/OLMo-2-1124-7B", cache_dir=None, dtype=torch.bfloat16, config=None,
77+
revision='main'
78+
):
79+
"""Creates a OLMo2 Causal LM model, config, and tokenizer from the given name and revision"""
80+
if config is None:
81+
config = AutoConfig.from_pretrained(name, cache_dir=cache_dir)
82+
olmo2 = AutoModelForCausalLM.from_pretrained(
83+
name,
84+
config=config,
85+
cache_dir=cache_dir,
86+
torch_dtype=dtype,
87+
revision=revision
88+
)
89+
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir=cache_dir)
90+
else:
91+
olmo2 = AutoModelForCausalLM(config, cache_dir=cache_dir, revision=revision)
92+
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir=cache_dir)
93+
print("loaded model")
94+
return config, tokenizer, olmo2

0 commit comments

Comments
 (0)