@@ -145,8 +145,8 @@ def create_bedrock_judge_metrics():
145145 ),
146146 examples = medical_accuracy_examples ,
147147 version = "v1" ,
148- model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
149- # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
148+ # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
149+ model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
150150 parameters = {
151151 "anthropic_version" : "bedrock-2023-05-31" ,
152152 "temperature" : 0.0 ,
@@ -194,8 +194,8 @@ def create_bedrock_judge_metrics():
194194 ),
195195 examples = clinical_reasoning_examples ,
196196 version = "v1" ,
197- model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
198- # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
197+ # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
198+ model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
199199 parameters = {
200200 "anthropic_version" : "bedrock-2023-05-31" ,
201201 "temperature" : 0.0 ,
@@ -241,8 +241,8 @@ def create_bedrock_judge_metrics():
241241 ),
242242 examples = patient_safety_examples ,
243243 version = "v1" ,
244- model = "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" ,
245- # model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
244+ # model="bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0",
245+ model = "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" ,
246246 parameters = {
247247 "anthropic_version" : "bedrock-2023-05-31" ,
248248 "temperature" : 0.0 ,
@@ -481,7 +481,8 @@ def evaluate_model_qualitatively(model_config, dataset):
481481 mlflow .log_param ("qualitative_evaluation_endpoint" , endpoint_name )
482482 mlflow .log_param ("qualitative_evaluation_num_samples" , num_samples )
483483 mlflow .log_param ("qualitative_evaluation_timestamp" , datetime .now ().isoformat ())
484- mlflow .log_param ("llm_judge_model" , "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0" )
484+ mlflow .log_param ("llm_judge_model" , "bedrock:/anthropic.claude-3-haiku-20240307-v1:0" )
485+ # mlflow.log_param("llm_judge_model", "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0")
485486
486487 # Load the test dataset
487488 try :
0 commit comments