Skip to content

Commit 9486728

Browse files
jayesh-tannaJayesh Tanna
andauthored
Extract fine-tuning operations into separate methods for better modul… (#44060)
* Extract fine-tuning operations into separate methods for better modularity * applying black * resolving comments * resolving review comment --------- Co-authored-by: Jayesh Tanna <jatanna@microsoft.com>
1 parent 85a9b78 commit 9486728

File tree

2 files changed

+241
-125
lines changed

2 files changed

+241
-125
lines changed

sdk/ai/azure-ai-projects/samples/finetuning/sample_finetuning_supervised_job.py

Lines changed: 125 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -56,78 +56,35 @@
5656
resource_group = os.environ["AZURE_AI_PROJECTS_AZURE_RESOURCE_GROUP"]
5757
account_name = os.environ["AZURE_AI_PROJECTS_AZURE_AOAI_ACCOUNT"]
5858

59-
with (
60-
DefaultAzureCredential() as credential,
61-
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
62-
project_client.get_openai_client() as openai_client,
63-
):
64-
# [START finetuning_supervised_job_sample]
65-
print("Uploading training file...")
66-
with open(training_file_path, "rb") as f:
67-
train_file = openai_client.files.create(file=f, purpose="fine-tune")
68-
print(f"Uploaded training file with ID: {train_file.id}")
69-
70-
print("Uploading validation file...")
71-
with open(validation_file_path, "rb") as f:
72-
validation_file = openai_client.files.create(file=f, purpose="fine-tune")
73-
print(f"Uploaded validation file with ID: {validation_file.id}")
74-
75-
print("Waits for the training and validation files to be processed...")
76-
openai_client.files.wait_for_processing(train_file.id)
77-
openai_client.files.wait_for_processing(validation_file.id)
78-
79-
print("Creating supervised fine-tuning job")
80-
fine_tuning_job = openai_client.fine_tuning.jobs.create(
81-
training_file=train_file.id,
82-
validation_file=validation_file.id,
83-
model=model_name,
84-
method={
85-
"type": "supervised",
86-
"supervised": {"hyperparameters": {"n_epochs": 3, "batch_size": 1, "learning_rate_multiplier": 1.0}},
87-
},
88-
extra_body={
89-
"trainingType": "Standard"
90-
}, # Recommended approach to set trainingType. Omitting this field may lead to unsupported behavior.
91-
)
92-
print(fine_tuning_job)
9359

94-
print(f"Getting fine-tuning job with ID: {fine_tuning_job.id}")
95-
retrieved_job = openai_client.fine_tuning.jobs.retrieve(fine_tuning_job.id)
96-
print(retrieved_job)
60+
def pause_job(openai_client, job_id):
61+
"""Pause a fine-tuning job.
9762
98-
print("Listing all fine-tuning jobs:")
99-
for job in openai_client.fine_tuning.jobs.list():
100-
print(job)
63+
Job needs to be in running state in order to pause.
64+
"""
65+
print(f"Pausing fine-tuning job with ID: {job_id}")
66+
paused_job = openai_client.fine_tuning.jobs.pause(job_id)
67+
print(paused_job)
10168

102-
print("Listing only 10 fine-tuning jobs:")
103-
for job in openai_client.fine_tuning.jobs.list(limit=10):
104-
print(job)
10569

106-
print(f"Pausing fine-tuning job with ID: {fine_tuning_job.id}")
107-
paused_job = openai_client.fine_tuning.jobs.pause(fine_tuning_job.id)
108-
print(paused_job)
70+
def resume_job(openai_client, job_id):
71+
"""Resume a fine-tuning job.
10972
110-
print(f"Resuming fine-tuning job with ID: {fine_tuning_job.id}")
111-
resumed_job = openai_client.fine_tuning.jobs.resume(fine_tuning_job.id)
73+
Job needs to be in paused state in order to resume.
74+
"""
75+
print(f"Resuming fine-tuning job with ID: {job_id}")
76+
resumed_job = openai_client.fine_tuning.jobs.resume(job_id)
11277
print(resumed_job)
11378

114-
print(f"Listing events of fine-tuning job: {fine_tuning_job.id}")
115-
for event in openai_client.fine_tuning.jobs.list_events(fine_tuning_job.id):
116-
print(event)
117-
118-
# Note that to retrieve the checkpoints, job needs to be in terminal state.
119-
print(f"Listing checkpoints of fine-tuning job: {fine_tuning_job.id}")
120-
for checkpoint in openai_client.fine_tuning.jobs.checkpoints.list(fine_tuning_job.id):
121-
print(checkpoint)
12279

123-
print(f"Cancelling fine-tuning job with ID: {fine_tuning_job.id}")
124-
cancelled_job = openai_client.fine_tuning.jobs.cancel(fine_tuning_job.id)
125-
print(f"Successfully cancelled fine-tuning job: {cancelled_job.id}, Status: {cancelled_job.status}")
80+
def deploy_model(openai_client, credential, job_id):
81+
"""Deploy the fine-tuned model.
12682
127-
# Deploy model (using Azure Management SDK - azure-mgmt-cognitiveservices)
128-
# Note: Deployment can only be started after the fine-tuning job completes successfully.
129-
print(f"Getting fine-tuning job with ID: {fine_tuning_job.id}")
130-
fine_tuned_model_name = openai_client.fine_tuning.jobs.retrieve(fine_tuning_job.id).fine_tuned_model
83+
Deploy model using Azure Management SDK (azure-mgmt-cognitiveservices).
84+
Note: Deployment can only be started after the fine-tuning job completes successfully.
85+
"""
86+
print(f"Retrieving fine-tuning job with ID: {job_id}")
87+
fine_tuned_model_name = openai_client.fine_tuning.jobs.retrieve(job_id).fine_tuned_model
13188
deployment_name = "gpt-4-1-fine-tuned"
13289

13390
with CognitiveServicesManagementClient(credential=credential, subscription_id=subscription_id) as cogsvc_client:
@@ -140,6 +97,7 @@
14097

14198
deployment_config = Deployment(properties=deployment_properties, sku=deployment_sku)
14299

100+
print(f"Deploying fine-tuned model: {fine_tuned_model_name} with deployment name: {deployment_name}")
143101
deployment = cogsvc_client.deployments.begin_create_or_update(
144102
resource_group_name=resource_group,
145103
account_name=account_name,
@@ -149,12 +107,114 @@
149107

150108
while deployment.status() not in ["Succeeded", "Failed"]:
151109
time.sleep(30)
152-
print(f"Status: {deployment.status()}")
110+
print(f"Deployment status: {deployment.status()}")
153111

112+
print(f"Model deployment completed: {deployment_name}")
113+
return deployment_name
114+
115+
116+
def infer(openai_client, deployment_name):
117+
"""Perform inference on the deployed fine-tuned model."""
154118
print(f"Testing fine-tuned model via deployment: {deployment_name}")
155119

156120
response = openai_client.responses.create(
157121
model=deployment_name, input=[{"role": "user", "content": "Who invented the telephone?"}]
158122
)
159123
print(f"Model response: {response.output_text}")
160-
# [END finetuning_supervised_job_sample]
124+
125+
126+
def list_jobs(openai_client):
127+
"""List fine-tuning jobs."""
128+
print("Listing all fine-tuning jobs:")
129+
for job in openai_client.fine_tuning.jobs.list():
130+
print(job)
131+
132+
133+
def list_events(openai_client, job_id):
134+
"""List events of a fine-tuning job."""
135+
print(f"Listing events of fine-tuning job: {job_id}")
136+
for event in openai_client.fine_tuning.jobs.list_events(job_id):
137+
print(event)
138+
139+
140+
def list_checkpoints(openai_client, job_id):
141+
"""List checkpoints of a fine-tuning job.
142+
143+
Note that to retrieve the checkpoints, job needs to be in terminal state.
144+
"""
145+
print(f"Listing checkpoints of fine-tuning job: {job_id}")
146+
for checkpoint in openai_client.fine_tuning.jobs.checkpoints.list(job_id):
147+
print(checkpoint)
148+
149+
150+
def cancel_job(openai_client, job_id):
151+
"""Cancel a fine-tuning job."""
152+
print(f"Cancelling fine-tuning job with ID: {job_id}")
153+
cancelled_job = openai_client.fine_tuning.jobs.cancel(job_id)
154+
print(f"Successfully cancelled fine-tuning job: {cancelled_job.id}, Status: {cancelled_job.status}")
155+
156+
157+
def retrieve_job(openai_client, job_id):
158+
"""Retrieve a fine-tuning job."""
159+
print(f"Getting fine-tuning job with ID: {job_id}")
160+
retrieved_job = openai_client.fine_tuning.jobs.retrieve(job_id)
161+
print(retrieved_job)
162+
163+
164+
def main() -> None:
165+
with (
166+
DefaultAzureCredential() as credential,
167+
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
168+
project_client.get_openai_client() as openai_client,
169+
):
170+
print("Uploading training file...")
171+
with open(training_file_path, "rb") as f:
172+
train_file = openai_client.files.create(file=f, purpose="fine-tune")
173+
print(f"Uploaded training file with ID: {train_file.id}")
174+
175+
print("Uploading validation file...")
176+
with open(validation_file_path, "rb") as f:
177+
validation_file = openai_client.files.create(file=f, purpose="fine-tune")
178+
print(f"Uploaded validation file with ID: {validation_file.id}")
179+
180+
print("Waits for the training and validation files to be processed...")
181+
openai_client.files.wait_for_processing(train_file.id)
182+
openai_client.files.wait_for_processing(validation_file.id)
183+
184+
print("Creating supervised fine-tuning job")
185+
fine_tuning_job = openai_client.fine_tuning.jobs.create(
186+
training_file=train_file.id,
187+
validation_file=validation_file.id,
188+
model=model_name,
189+
method={
190+
"type": "supervised",
191+
"supervised": {"hyperparameters": {"n_epochs": 3, "batch_size": 1, "learning_rate_multiplier": 1.0}},
192+
},
193+
extra_body={
194+
"trainingType": "Standard"
195+
}, # Recommended approach to set trainingType. Omitting this field may lead to unsupported behavior.
196+
)
197+
print(fine_tuning_job)
198+
199+
# Uncomment any of the following methods to test specific functionalities:
200+
# retrieve_job(openai_client, fine_tuning_job.id)
201+
202+
# list_jobs(openai_client)
203+
204+
# pause_job(openai_client, fine_tuning_job.id)
205+
206+
# resume_job(openai_client, fine_tuning_job.id)
207+
208+
# list_events(openai_client, fine_tuning_job.id)
209+
210+
# list_checkpoints(openai_client, fine_tuning_job.id)
211+
212+
# cancel_job(openai_client, fine_tuning_job.id)
213+
214+
# deployment_name = deploy_model(openai_client, credential, fine_tuning_job.id)
215+
216+
# infer(openai_client, deployment_name)
217+
218+
219+
if __name__ == "__main__":
220+
main()

0 commit comments

Comments
 (0)