Skip to content

Commit fc35b75

Browse files
committed
Separate result JSON files as well as target/judge model parameters
1 parent 2548c07 commit fc35b75

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

samples/python/azure_ai_evaluation/evaluation.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""This sample demonstrates how to use Azure AI Foundry SDK to run GitHub model catalog with evaluation.
22
It is leveraging your endpoint and key. The call is synchronous.
33
4+
For those who have Azure credentials, you can run the risk and safety evaluators from Azure AI.
5+
46
Azure Evaluation SDK: https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/evaluate-sdk
57
"""
68

@@ -16,10 +18,13 @@
1618

1719

1820
token = os.environ['GITHUB_TOKEN']
19-
inferencing_model_name = "gpt-4o-mini"
20-
evaluation_model_name = "gpt-4o-mini"
21-
api_version = "2024-08-01-preview"
22-
endpoint = "https://models.inference.ai.azure.com"
21+
22+
# Target model is the model to be evaluated.
23+
target_model_name = "Mistral-small"
24+
target_model_endpoint = "https://models.inference.ai.azure.com"
25+
# Judge model is the model to evaluate the target model.
26+
judge_model_name = "gpt-4o-mini"
27+
judge_model_endpoint = "https://models.inference.ai.azure.com"
2328

2429
evaluation_name = "GitHub models evaluation"
2530
eval_data_file = Path("./eval_data.jsonl")
@@ -39,7 +44,7 @@ def generate_eval_data():
3944
with eval_data_file.open("w") as f:
4045
for eval_data_query in eval_data_queries:
4146
client = ChatCompletionsClient(
42-
endpoint=endpoint,
47+
endpoint=target_model_endpoint,
4348
credential=AzureKeyCredential(token),
4449
)
4550

@@ -49,7 +54,7 @@ def generate_eval_data():
4954
SystemMessage(content=context),
5055
UserMessage(content=eval_data_query["query"]),
5156
],
52-
model=inferencing_model_name,
57+
model=target_model_name,
5358
temperature=1.,
5459
max_tokens=1000,
5560
top_p=1.
@@ -69,10 +74,9 @@ def generate_eval_data():
6974

7075
def run_perf_and_quality_evaluators():
7176
model_config = {
72-
"azure_endpoint": endpoint,
73-
"azure_deployment": evaluation_model_name,
77+
"azure_endpoint": judge_model_endpoint,
78+
"azure_deployment": judge_model_name,
7479
"api_key": token,
75-
"api_version": api_version,
7680
}
7781

7882
evaluators = {
@@ -118,6 +122,7 @@ def run_risk_and_safety_evaluators_with_azure():
118122
"GroundednessProEvaluator": evaluation.GroundednessProEvaluator(azure_ai_project=azure_ai_project, credential=credential),
119123
}
120124

125+
risk_and_safety_result_dict = {}
121126
with eval_data_file.open("r") as f:
122127
for line in f:
123128
eval_data = json.loads(line)
@@ -127,6 +132,10 @@ def run_risk_and_safety_evaluators_with_azure():
127132
else:
128133
score = evaluator(query=eval_data["query"], response=eval_data["response"], context=eval_data["context"])
129134
print(f"{name}: {score}")
135+
risk_and_safety_result_dict[name] = score
136+
137+
with eval_result_file_risk_and_safety.open("w") as f:
138+
f.write(json.dumps(risk_and_safety_result_dict, indent=4))
130139

131140

132141
if __name__ == "__main__":
@@ -136,5 +145,5 @@ def run_risk_and_safety_evaluators_with_azure():
136145
# Run performance and quality evaluators with GitHub model catalog.
137146
run_perf_and_quality_evaluators()
138147

139-
# Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
148+
# # Uncomment the following code with Azure credentials, then we can run the risk and safety evaluators from Azure AI.
140149
# run_risk_and_safety_evaluators_with_azure()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy