@@ -145,8 +145,8 @@ def create_bedrock_judge_metrics():
145145 ),
146146 examples = medical_accuracy_examples ,
147147 version = "v1" ,
148- # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
149- model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
148+ model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
149+ # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
150150 parameters = {
151151 "anthropic_version" : "bedrock-2023-05-31" ,
152152 "temperature" : 0.0 ,
@@ -194,8 +194,8 @@ def create_bedrock_judge_metrics():
194194 ),
195195 examples = clinical_reasoning_examples ,
196196 version = "v1" ,
197- # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
198- model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
197+ model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
198+ # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
199199 parameters = {
200200 "anthropic_version" : "bedrock-2023-05-31" ,
201201 "temperature" : 0.0 ,
@@ -241,8 +241,8 @@ def create_bedrock_judge_metrics():
241241 ),
242242 examples = patient_safety_examples ,
243243 version = "v1" ,
244- # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
245- model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
244+ model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
245+ # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
246246 parameters = {
247247 "anthropic_version" : "bedrock-2023-05-31" ,
248248 "temperature" : 0.0 ,
@@ -481,7 +481,7 @@ def evaluate_model_qualitatively(model_config, dataset):
481481 mlflow .log_param ("qualitative_evaluation_endpoint" , endpoint_name )
482482 mlflow .log_param ("qualitative_evaluation_num_samples" , num_samples )
483483 mlflow .log_param ("qualitative_evaluation_timestamp" , datetime .now ().isoformat ())
484- mlflow .log_param ("llm_judge_model" , "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" )
484+ mlflow .log_param ("llm_judge_model" , "bedrock:/us. anthropic.claude-3-haiku-20240307-v1:0" )
485485
486486 # Load the test dataset
487487 try :
0 commit comments