11"""This sample demonstrates how to use Azure AI Foundry SDK to run GitHub model catalog with evaluation.
22It is leveraging your endpoint and key. The call is synchronous.
33
4+ For those who have Azure credentials, you can run the risk and safety evaluators from Azure AI.
5+
46Azure Evaluation SDK: https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/evaluate-sdk
57"""
68
1618
1719
1820token = os .environ ['GITHUB_TOKEN' ]
19- inferencing_model_name = "gpt-4o-mini"
20- evaluation_model_name = "gpt-4o-mini"
21- api_version = "2024-08-01-preview"
22- endpoint = "https://models.inference.ai.azure.com"
21+
22+ # Target model is the model to be evaluated.
23+ target_model_name = "Mistral-small"
24+ target_model_endpoint = "https://models.inference.ai.azure.com"
25+ # Judge model is the model to evaluate the target model.
26+ judge_model_name = "gpt-4o-mini"
27+ judge_model_endpoint = "https://models.inference.ai.azure.com"
2328
2429evaluation_name = "GitHub models evaluation"
2530eval_data_file = Path ("./eval_data.jsonl" )
@@ -39,7 +44,7 @@ def generate_eval_data():
3944 with eval_data_file .open ("w" ) as f :
4045 for eval_data_query in eval_data_queries :
4146 client = ChatCompletionsClient (
42- endpoint = endpoint ,
47+ endpoint = target_model_endpoint ,
4348 credential = AzureKeyCredential (token ),
4449 )
4550
@@ -49,7 +54,7 @@ def generate_eval_data():
4954 SystemMessage (content = context ),
5055 UserMessage (content = eval_data_query ["query" ]),
5156 ],
52- model = inferencing_model_name ,
57+ model = target_model_name ,
5358 temperature = 1. ,
5459 max_tokens = 1000 ,
5560 top_p = 1.
@@ -69,10 +74,9 @@ def generate_eval_data():
6974
7075def run_perf_and_quality_evaluators ():
7176 model_config = {
72- "azure_endpoint" : endpoint ,
73- "azure_deployment" : evaluation_model_name ,
77+ "azure_endpoint" : judge_model_endpoint ,
78+ "azure_deployment" : judge_model_name ,
7479 "api_key" : token ,
75- "api_version" : api_version ,
7680 }
7781
7882 evaluators = {
@@ -118,6 +122,7 @@ def run_risk_and_safety_evaluators_with_azure():
118122 "GroundednessProEvaluator" : evaluation .GroundednessProEvaluator (azure_ai_project = azure_ai_project , credential = credential ),
119123 }
120124
125+ risk_and_safety_result_dict = {}
121126 with eval_data_file .open ("r" ) as f :
122127 for line in f :
123128 eval_data = json .loads (line )
@@ -127,6 +132,10 @@ def run_risk_and_safety_evaluators_with_azure():
127132 else :
128133 score = evaluator (query = eval_data ["query" ], response = eval_data ["response" ], context = eval_data ["context" ])
129134 print (f"{ name } : { score } " )
135+ risk_and_safety_result_dict [name ] = score
136+
137+ with eval_result_file_risk_and_safety .open ("w" ) as f :
138+ f .write (json .dumps (risk_and_safety_result_dict , indent = 4 ))
130139
131140
132141if __name__ == "__main__" :
@@ -136,5 +145,5 @@ def run_risk_and_safety_evaluators_with_azure():
136145 # Run performance and quality evaluators with GitHub model catalog.
137146 run_perf_and_quality_evaluators ()
138147
139- # Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
148+ # # Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
140149 # run_risk_and_safety_evaluators_with_azure()
0 commit comments