Skip to content

Commit fc35b75

Browse files
committed
Separate result JSON files as well as target/judge model parameters
1 parent 2548c07 commit fc35b75

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

samples/python/azure_ai_evaluation/evaluation.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""This sample demonstrates how to use Azure AI Foundry SDK to run GitHub model catalog with evaluation.
22
It is leveraging your endpoint and key. The call is synchronous.
33
4+
For those who have Azure credentials, you can run the risk and safety evaluators from Azure AI.
5+
46
Azure Evaluation SDK: https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/evaluate-sdk
57
"""
68

@@ -16,10 +18,13 @@
1618

1719

1820
token = os.environ['GITHUB_TOKEN']
19-
inferencing_model_name = "gpt-4o-mini"
20-
evaluation_model_name = "gpt-4o-mini"
21-
api_version = "2024-08-01-preview"
22-
endpoint = "https://models.inference.ai.azure.com"
21+
22+
# Target model is the model to be evaluated.
23+
target_model_name = "Mistral-small"
24+
target_model_endpoint = "https://models.inference.ai.azure.com"
25+
# Judge model is the model to evaluate the target model.
26+
judge_model_name = "gpt-4o-mini"
27+
judge_model_endpoint = "https://models.inference.ai.azure.com"
2328

2429
evaluation_name = "GitHub models evaluation"
2530
eval_data_file = Path("./eval_data.jsonl")
@@ -39,7 +44,7 @@ def generate_eval_data():
3944
with eval_data_file.open("w") as f:
4045
for eval_data_query in eval_data_queries:
4146
client = ChatCompletionsClient(
42-
endpoint=endpoint,
47+
endpoint=target_model_endpoint,
4348
credential=AzureKeyCredential(token),
4449
)
4550

@@ -49,7 +54,7 @@ def generate_eval_data():
4954
SystemMessage(content=context),
5055
UserMessage(content=eval_data_query["query"]),
5156
],
52-
model=inferencing_model_name,
57+
model=target_model_name,
5358
temperature=1.,
5459
max_tokens=1000,
5560
top_p=1.
@@ -69,10 +74,9 @@ def generate_eval_data():
6974

7075
def run_perf_and_quality_evaluators():
7176
model_config = {
72-
"azure_endpoint": endpoint,
73-
"azure_deployment": evaluation_model_name,
77+
"azure_endpoint": judge_model_endpoint,
78+
"azure_deployment": judge_model_name,
7479
"api_key": token,
75-
"api_version": api_version,
7680
}
7781

7882
evaluators = {
@@ -118,6 +122,7 @@ def run_risk_and_safety_evaluators_with_azure():
118122
"GroundednessProEvaluator": evaluation.GroundednessProEvaluator(azure_ai_project=azure_ai_project, credential=credential),
119123
}
120124

125+
risk_and_safety_result_dict = {}
121126
with eval_data_file.open("r") as f:
122127
for line in f:
123128
eval_data = json.loads(line)
@@ -127,6 +132,10 @@ def run_risk_and_safety_evaluators_with_azure():
127132
else:
128133
score = evaluator(query=eval_data["query"], response=eval_data["response"], context=eval_data["context"])
129134
print(f"{name}: {score}")
135+
risk_and_safety_result_dict[name] = score
136+
137+
with eval_result_file_risk_and_safety.open("w") as f:
138+
f.write(json.dumps(risk_and_safety_result_dict, indent=4))
130139

131140

132141
if __name__ == "__main__":
@@ -136,5 +145,5 @@ def run_risk_and_safety_evaluators_with_azure():
136145
# Run performance and quality evaluators with GitHub model catalog.
137146
run_perf_and_quality_evaluators()
138147

139-
# Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
148+
# # Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
140149
# run_risk_and_safety_evaluators_with_azure()

0 commit comments

Comments
 (0)