Skip to content

Commit 44d4969

Browse files
committed
update grader to o4-mini
1 parent 04da8bf commit 44d4969

8 files changed

+881
-1063
lines changed

labs/3-customization/31-basic-finetuning.ipynb

Lines changed: 48 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -372,43 +372,17 @@
372372
},
373373
{
374374
"cell_type": "code",
375-
"execution_count": null,
375+
"execution_count": 8,
376376
"id": "f024bb0f",
377377
"metadata": {},
378378
"outputs": [
379379
{
380380
"name": "stdout",
381381
"output_type": "stream",
382382
"text": [
383-
"{\n",
384-
" \"id\": \"ftjob-9a89ce8f64994a46befa93136433c43e\",\n",
385-
" \"created_at\": 1762435747,\n",
386-
" \"error\": null,\n",
387-
" \"fine_tuned_model\": null,\n",
388-
" \"finished_at\": null,\n",
389-
" \"hyperparameters\": {\n",
390-
" \"batch_size\": 1,\n",
391-
" \"learning_rate_multiplier\": 1.0,\n",
392-
" \"n_epochs\": 3\n",
393-
" },\n",
394-
" \"model\": \"gpt-4o-2024-08-06\",\n",
395-
" \"object\": \"fine_tuning.job\",\n",
396-
" \"organization_id\": null,\n",
397-
" \"result_files\": [\n",
398-
" \"file-04cffffb55664b39bc9193f650dd991e\"\n",
399-
" ],\n",
400-
" \"seed\": 105,\n",
401-
" \"status\": \"running\",\n",
402-
" \"trained_tokens\": null,\n",
403-
" \"training_file\": \"file-adcc886357074524b0158c9b28e75c23\",\n",
404-
" \"validation_file\": \"file-2c40fbf6d80647b2ac0181e911b9e41c\",\n",
405-
" \"estimated_finish\": 1762437316,\n",
406-
" \"integrations\": null,\n",
407-
" \"metadata\": null,\n",
408-
" \"method\": null\n",
409-
"}\n",
410-
"Elapsed time: 58 minutes 18 seconds\n",
411-
"Status: running\n"
383+
"Fine-tuning job ftjob-9a89ce8f64994a46befa93136433c43e finished with status: succeeded\n",
384+
"Checking other fine-tune jobs for this resource.\n",
385+
"Found 3 fine-tune jobs.\n"
412386
]
413387
}
414388
],
@@ -456,7 +430,7 @@
456430
},
457431
{
458432
"cell_type": "code",
459-
"execution_count": null,
433+
"execution_count": 9,
460434
"id": "ebe8fa88",
461435
"metadata": {},
462436
"outputs": [
@@ -467,44 +441,44 @@
467441
"{\n",
468442
" \"data\": [\n",
469443
" {\n",
470-
" \"id\": \"ftevent-978b59aa8f7b4d5699f24e4ebf63e3e9\",\n",
471-
" \"created_at\": 1762285152,\n",
444+
" \"id\": \"ftevent-6337f67454304675a4522961b1d4e650\",\n",
445+
" \"created_at\": 1762439853,\n",
472446
" \"level\": \"info\",\n",
473447
" \"message\": \"Training tokens billed: 8000\",\n",
474448
" \"object\": \"fine_tuning.job.event\",\n",
475449
" \"data\": null,\n",
476450
" \"type\": \"message\"\n",
477451
" },\n",
478452
" {\n",
479-
" \"id\": \"ftevent-16ff596841cf492a8e69de11591e6650\",\n",
480-
" \"created_at\": 1762285151,\n",
453+
" \"id\": \"ftevent-7e03223d12b14b409b3bf873ce2b1fe9\",\n",
454+
" \"created_at\": 1762439853,\n",
481455
" \"level\": \"info\",\n",
482456
" \"message\": \"Model Evaluation Passed.\",\n",
483457
" \"object\": \"fine_tuning.job.event\",\n",
484458
" \"data\": null,\n",
485459
" \"type\": \"message\"\n",
486460
" },\n",
487461
" {\n",
488-
" \"id\": \"ftevent-b6255ab17ed74b079f9c5023dd435d9e\",\n",
489-
" \"created_at\": 1762285151,\n",
462+
" \"id\": \"ftevent-9611c6358b3940be845726f3198a86f8\",\n",
463+
" \"created_at\": 1762439853,\n",
490464
" \"level\": \"info\",\n",
491-
" \"message\": \"Completed results file: file-7537f2e0e43c4982b345df363653865e\",\n",
465+
" \"message\": \"Completed results file: file-04cffffb55664b39bc9193f650dd991e\",\n",
492466
" \"object\": \"fine_tuning.job.event\",\n",
493467
" \"data\": null,\n",
494468
" \"type\": \"message\"\n",
495469
" },\n",
496470
" {\n",
497-
" \"id\": \"ftevent-bcf121c1b49d4e2e8386582ffaeed200\",\n",
498-
" \"created_at\": 1762285145,\n",
471+
" \"id\": \"ftevent-009808a1aef34f4baf91ce92bc7a4a65\",\n",
472+
" \"created_at\": 1762439824,\n",
499473
" \"level\": \"info\",\n",
500474
" \"message\": \"Job succeeded.\",\n",
501475
" \"object\": \"fine_tuning.job.event\",\n",
502476
" \"data\": null,\n",
503477
" \"type\": \"message\"\n",
504478
" },\n",
505479
" {\n",
506-
" \"id\": \"ftevent-808de1bd56fbf96808de1bd56fbf9680\",\n",
507-
" \"created_at\": 1762283265,\n",
480+
" \"id\": \"ftevent-808de1d3dc516b7808de1d3dc516b780\",\n",
481+
" \"created_at\": 1762438027,\n",
508482
" \"level\": \"info\",\n",
509483
" \"message\": \"Step 120: training loss=1.3205604553222656\",\n",
510484
" \"object\": \"fine_tuning.job.event\",\n",
@@ -520,8 +494,8 @@
520494
" \"type\": \"metrics\"\n",
521495
" },\n",
522496
" {\n",
523-
" \"id\": \"ftevent-808de1bd569c9b5808de1bd569c9b580\",\n",
524-
" \"created_at\": 1762283255,\n",
497+
" \"id\": \"ftevent-808de1d3dbf20d6808de1d3dbf20d680\",\n",
498+
" \"created_at\": 1762438017,\n",
525499
" \"level\": \"info\",\n",
526500
" \"message\": \"Step 110: training loss=1.7486391067504883\",\n",
527501
" \"object\": \"fine_tuning.job.event\",\n",
@@ -535,8 +509,8 @@
535509
" \"type\": \"metrics\"\n",
536510
" },\n",
537511
" {\n",
538-
" \"id\": \"ftevent-808de1bd563d3d4808de1bd563d3d480\",\n",
539-
" \"created_at\": 1762283245,\n",
512+
" \"id\": \"ftevent-808de1d3db92af5808de1d3db92af580\",\n",
513+
" \"created_at\": 1762438007,\n",
540514
" \"level\": \"info\",\n",
541515
" \"message\": \"Step 100: training loss=1.3873664140701294\",\n",
542516
" \"object\": \"fine_tuning.job.event\",\n",
@@ -550,8 +524,8 @@
550524
" \"type\": \"metrics\"\n",
551525
" },\n",
552526
" {\n",
553-
" \"id\": \"ftevent-808de1bd55dddf3808de1bd55dddf380\",\n",
554-
" \"created_at\": 1762283235,\n",
527+
" \"id\": \"ftevent-808de1d3db33514808de1d3db3351480\",\n",
528+
" \"created_at\": 1762437997,\n",
555529
" \"level\": \"info\",\n",
556530
" \"message\": \"Step 90: training loss=1.6716580390930176\",\n",
557531
" \"object\": \"fine_tuning.job.event\",\n",
@@ -565,8 +539,8 @@
565539
" \"type\": \"metrics\"\n",
566540
" },\n",
567541
" {\n",
568-
" \"id\": \"ftevent-808de1bd557e812808de1bd557e81280\",\n",
569-
" \"created_at\": 1762283225,\n",
542+
" \"id\": \"ftevent-808de1d3dad3f33808de1d3dad3f3380\",\n",
543+
" \"created_at\": 1762437987,\n",
570544
" \"level\": \"info\",\n",
571545
" \"message\": \"Step 80: training loss=1.714286208152771\",\n",
572546
" \"object\": \"fine_tuning.job.event\",\n",
@@ -582,8 +556,8 @@
582556
" \"type\": \"metrics\"\n",
583557
" },\n",
584558
" {\n",
585-
" \"id\": \"ftevent-808de1bd551f231808de1bd551f23180\",\n",
586-
" \"created_at\": 1762283215,\n",
559+
" \"id\": \"ftevent-808de1d3da74952808de1d3da7495280\",\n",
560+
" \"created_at\": 1762437977,\n",
587561
" \"level\": \"info\",\n",
588562
" \"message\": \"Step 70: training loss=1.7350307703018188\",\n",
589563
" \"object\": \"fine_tuning.job.event\",\n",
@@ -620,7 +594,7 @@
620594
},
621595
{
622596
"cell_type": "code",
623-
"execution_count": null,
597+
"execution_count": 10,
624598
"id": "7088ebaf",
625599
"metadata": {},
626600
"outputs": [
@@ -631,10 +605,10 @@
631605
"{\n",
632606
" \"data\": [\n",
633607
" {\n",
634-
" \"id\": \"ftchkpt-ba4b5bc790cc4aa5b2e371a15a9d55c7\",\n",
635-
" \"created_at\": 1762283826,\n",
636-
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-153a8a4806f84480acba096c9cfd24f4\",\n",
637-
" \"fine_tuning_job_id\": \"ftjob-153a8a4806f84480acba096c9cfd24f4\",\n",
608+
" \"id\": \"ftchkpt-657bc04e673a4859aa78f382e08952df\",\n",
609+
" \"created_at\": 1762438554,\n",
610+
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-9a89ce8f64994a46befa93136433c43e\",\n",
611+
" \"fine_tuning_job_id\": \"ftjob-9a89ce8f64994a46befa93136433c43e\",\n",
638612
" \"metrics\": {\n",
639613
" \"full_valid_loss\": 1.5513120925787725,\n",
640614
" \"full_valid_mean_token_accuracy\": 0.6022727272727273,\n",
@@ -648,10 +622,10 @@
648622
" \"step_number\": 120\n",
649623
" },\n",
650624
" {\n",
651-
" \"id\": \"ftchkpt-077392233af442378e8b21f4a993ddb3\",\n",
652-
" \"created_at\": 1762283632,\n",
653-
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-153a8a4806f84480acba096c9cfd24f4:ckpt-step-80\",\n",
654-
" \"fine_tuning_job_id\": \"ftjob-153a8a4806f84480acba096c9cfd24f4\",\n",
625+
" \"id\": \"ftchkpt-814049bb70924860b0ff629c7de9b878\",\n",
626+
" \"created_at\": 1762438363,\n",
627+
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-9a89ce8f64994a46befa93136433c43e:ckpt-step-80\",\n",
628+
" \"fine_tuning_job_id\": \"ftjob-9a89ce8f64994a46befa93136433c43e\",\n",
655629
" \"metrics\": {\n",
656630
" \"full_valid_loss\": 1.714390501831517,\n",
657631
" \"full_valid_mean_token_accuracy\": 0.5681818181818182,\n",
@@ -665,10 +639,10 @@
665639
" \"step_number\": 80\n",
666640
" },\n",
667641
" {\n",
668-
" \"id\": \"ftchkpt-93bb67e237cc4ac49ea87647b9608974\",\n",
669-
" \"created_at\": 1762283434,\n",
670-
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-153a8a4806f84480acba096c9cfd24f4:ckpt-step-40\",\n",
671-
" \"fine_tuning_job_id\": \"ftjob-153a8a4806f84480acba096c9cfd24f4\",\n",
642+
" \"id\": \"ftchkpt-5e96236e41eb41bd9befaf132ff6e22a\",\n",
643+
" \"created_at\": 1762438170,\n",
644+
" \"fine_tuned_model_checkpoint\": \"gpt-4o-2024-08-06.ft-9a89ce8f64994a46befa93136433c43e:ckpt-step-40\",\n",
645+
" \"fine_tuning_job_id\": \"ftjob-9a89ce8f64994a46befa93136433c43e\",\n",
672646
" \"metrics\": {\n",
673647
" \"full_valid_loss\": 2.319527850006566,\n",
674648
" \"full_valid_mean_token_accuracy\": 0.4356060606060606,\n",
@@ -711,7 +685,7 @@
711685
},
712686
{
713687
"cell_type": "code",
714-
"execution_count": null,
688+
"execution_count": 11,
715689
"id": "4a77c5b8",
716690
"metadata": {},
717691
"outputs": [
@@ -720,11 +694,11 @@
720694
"output_type": "stream",
721695
"text": [
722696
"{\n",
723-
" \"id\": \"ftjob-153a8a4806f84480acba096c9cfd24f4\",\n",
724-
" \"created_at\": 1762280586,\n",
697+
" \"id\": \"ftjob-9a89ce8f64994a46befa93136433c43e\",\n",
698+
" \"created_at\": 1762435747,\n",
725699
" \"error\": null,\n",
726-
" \"fine_tuned_model\": \"gpt-4o-2024-08-06.ft-153a8a4806f84480acba096c9cfd24f4\",\n",
727-
" \"finished_at\": 1762285152,\n",
700+
" \"fine_tuned_model\": \"gpt-4o-2024-08-06.ft-9a89ce8f64994a46befa93136433c43e\",\n",
701+
" \"finished_at\": 1762439854,\n",
728702
" \"hyperparameters\": {\n",
729703
" \"batch_size\": 1,\n",
730704
" \"learning_rate_multiplier\": 1.0,\n",
@@ -734,14 +708,14 @@
734708
" \"object\": \"fine_tuning.job\",\n",
735709
" \"organization_id\": null,\n",
736710
" \"result_files\": [\n",
737-
" \"file-7537f2e0e43c4982b345df363653865e\"\n",
711+
" \"file-04cffffb55664b39bc9193f650dd991e\"\n",
738712
" ],\n",
739713
" \"seed\": 105,\n",
740714
" \"status\": \"succeeded\",\n",
741715
" \"trained_tokens\": 10485,\n",
742-
" \"training_file\": \"file-2586012805e944559349b1d4f1c87d6d\",\n",
743-
" \"validation_file\": \"file-764e755d5a034ea8b8ecc7a5ce051527\",\n",
744-
" \"estimated_finish\": 1762282555,\n",
716+
" \"training_file\": \"file-adcc886357074524b0158c9b28e75c23\",\n",
717+
" \"validation_file\": \"file-2c40fbf6d80647b2ac0181e911b9e41c\",\n",
718+
" \"estimated_finish\": 1762437316,\n",
745719
" \"integrations\": null,\n",
746720
" \"metadata\": null,\n",
747721
" \"method\": null\n",

labs/3-customization/32-custom-grader.ipynb

Lines changed: 52 additions & 51 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)