@@ -53,22 +53,14 @@ jobs:
5353 # - uses: actions/checkout@v2
5454
5555 - name : Display CUDA Version
56- run : |
57- if command -v nvcc &> /dev/null; then
58- echo "CUDA Version:"
59- nvcc --version || true
60- else
61- echo "nvcc not found. Ensure CUDA is installed."
62- fi
56+ run : |
57+ echo "CUDA Version:"
58+ nvcc --version || true
6359
6460 - name : Display cuDNN Version
6561 run : |
66- if [ -f /usr/local/cuda/include/cudnn_version.h ]; then
67- echo "cuDNN Version:"
68- cat /usr/local/cuda/include/cudnn_version.h | grep CUDNN_MAJOR -A 2 || true
69- else
70- echo "cuDNN not found. Ensure cuDNN is installed."
71- fi
62+ echo "cuDNN Version:"
63+ cat /usr/local/cuda/include/cudnn_version.h | grep CUDNN_MAJOR -A 2 || true
7264
7365 - name : Verify EC2 Instance
7466 run : |
@@ -153,7 +145,11 @@ jobs:
153145
154146 - name : Run DVC commands in container
155147 run : |
148+ mkdir model_storage
149+ touch best_model_checkpoint.txt
156150 docker run --rm --gpus=all \
151+ -v model_storage:/workspace/model_storage \
152+ -v best_model_checkpoint.txt:/workspace/best_model_checkpoint.txt
157153 -e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \
158154 -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \
159155 -e AWS_DEFAULT_REGION=${{ secrets.AWS_REGION }} \
@@ -173,6 +169,24 @@ jobs:
173169 # # Stop the container after retrieving logs
174170 # docker stop $CONTAINER_ID
175171
172+ - name : Read best checkpoint file name
173+ id : read_checkpoint
174+ run : |
175+ checkpoint_file=$(head -n 1 best_model_checkpoint.txt)
176+ echo "CHECKPOINT_FILE=$checkpoint_file" >> $GITHUB_ENV
177+
178+ - name : Get latest commit ID
179+ id : get_commit_id
180+ run : echo "COMMIT_ID=$(git rev-parse HEAD)" >> $GITHUB_ENV
181+
182+ - name : Upload checkpoint to S3
183+ run : |
184+ checkpoint_path="${{ env.CHECKPOINT_FILE }}" # Use the checkpoint path from the file
185+ bucket_name="mybucket-emlo-mumbai/session-08-checkpoint/" # Change to your S3 bucket name
186+ s3_key="session-08-checkpoint/${{ env.COMMIT_ID }}/$(basename "$checkpoint_path")" # Define S3 key
187+ echo "Uploading $checkpoint_path to s3://$bucket_name/$s3_key"
188+ aws s3 cp "$checkpoint_path" "s3://$bucket_name/$s3_key"
189+
176190 - name : Clean previous images and containers
177191 run : |
178192 docker system prune -f
0 commit comments