@@ -4,11 +4,11 @@ parameters:
44 pytorch_stable_image :
55 type : string
66 # https://hub.docker.com/r/pytorch/pytorch/tags
7- default : " pytorch/pytorch:1.9 .0-cuda11.1 -cudnn8-runtime"
7+ default : " pytorch/pytorch:1.10 .0-cuda11.3 -cudnn8-runtime"
88 pytorch_stable_image_devel :
99 type : string
1010 # https://hub.docker.com/r/pytorch/pytorch/tags
11- default : " pytorch/pytorch:1.9 .0-cuda11.1 -cudnn8-devel"
11+ default : " pytorch/pytorch:1.10 .0-cuda11.3 -cudnn8-devel"
1212 workingdir :
1313 type : string
1414 default : " /tmp/ignite"
@@ -26,7 +26,7 @@ parameters:
2626one_gpu : &one_gpu
2727 machine :
2828 # https://circleci.com/docs/2.0/configuration-reference/#available-linux-gpu-images
29- image : ubuntu-1604 -cuda-11.1:202012 -01 # CUDA v11.1, Docker v19.03.13 , nvidia-container-toolkit v1.4.0 -1
29+ image : ubuntu-2004 -cuda-11.2:202103 -01 # CUDA v11.2. 1, Docker v20.10.5 , nvidia-container-toolkit v1.4.2 -1
3030 docker_layer_caching : true
3131 # https://circleci.com/product/features/resource-classes/#linux-vm
3232 resource_class : gpu.small
@@ -40,7 +40,7 @@ one_gpu_windows: &one_gpu_windows
4040two_gpus : &two_gpus
4141 machine :
4242 # https://circleci.com/docs/2.0/configuration-reference/#available-linux-gpu-images
43- image : ubuntu-1604 -cuda-11.1:202012 -01 # CUDA v11.1, Docker v19.03.13 , nvidia-container-toolkit v1.4.0 -1
43+ image : ubuntu-2004 -cuda-11.2:202103 -01 # CUDA v11.2. 1, Docker v20.10.5 , nvidia-container-toolkit v1.4.2 -1
4444 docker_layer_caching : true
4545 # https://circleci.com/product/features/resource-classes/#linux-vm
4646 resource_class : gpu.medium
@@ -54,12 +54,12 @@ install_latest_nvidia: &install_latest_nvidia
5454 name : Install latest NVidia-driver and CUDA
5555 command : |
5656 sudo apt-get purge nvidia* && sudo apt-get autoremove
57- sudo apt-get update && sudo apt-get install -y --no-install-recommends nvidia-455 cuda-drivers-455
57+ sudo apt-get update && sudo apt-get install -y --no-install-recommends nvidia-driver-470
5858 # Install nvidia-container-runtime
5959 sudo apt-get install -y nvidia-container-runtime
6060 # Reload driver : https://stackoverflow.com/a/45319156/6309199
6161 # lsof | grep nvidia -> kill Xvfb
62- sudo lsof | grep "/usr/bin/Xvfb" | head -1 | awk '{print $2}' | xargs -I {} sudo kill -9 {}
62+ sudo lsof | grep "/usr/bin/Xvfb" | head -1 | awk '{print $2}' | xargs -I {} sudo kill -9 {} || echo "Command 'sudo lsof ...' is failed"
6363 # lsmod | grep nvidia
6464 sudo rmmod nvidia_uvm && sudo rmmod nvidia_drm && sudo rmmod nvidia_modeset && sudo rmmod nvidia
6565 # reload driver
@@ -86,9 +86,6 @@ run_pytorch_container: &run_pytorch_container
8686 docker run --gpus=all --rm -itd --shm-size 16G -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image >>
8787 docker exec -it pthd nvidia-smi
8888 docker exec -it pthd ls
89- # temporarily manually install v1.9.1
90- # https://github.com/pytorch/ignite/pull/2211#issuecomment-927080841
91- export update_pth_cmd='conda install -y pytorch==1.9.1 -c pytorch -c nvidia'
9289 docker exec -it pthd /bin/bash -c "$update_pth_cmd"
9390
9491run_pytorch_devel_container : &run_pytorch_devel_container
@@ -100,9 +97,6 @@ run_pytorch_devel_container: &run_pytorch_devel_container
10097 docker run --gpus=all --rm -itd --shm-size 16G -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image_devel >>
10198 docker exec -it pthd nvidia-smi
10299 docker exec -it pthd ls
103- # temporarily manually install v1.9.1
104- # https://github.com/pytorch/ignite/pull/2211#issuecomment-927080841
105- export update_pth_cmd='conda install -y pytorch==1.9.1 -c pytorch -c nvidia'
106100 docker exec -it pthd /bin/bash -c "$update_pth_cmd"
107101
108102install_dependencies : &install_dependencies
@@ -208,23 +202,25 @@ jobs:
208202 command : |
209203 bash .circleci/trigger_if_modified.sh "^(ignite|tests|examples|\.circleci).*"
210204
211- - run :
212- name : Update CUDA Driver for Windows
213- command : |
214- curl -O https://raw.githubusercontent.com/pytorch/pytorch/master/.circleci/scripts/windows_cuda_install.sh
215- mkdir -p "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
216- JOB_EXECUTOR="windows-with-nvidia-gpu" CUDA_VERSION="11.3" VC_PRODUCT="BuildTools" VC_YEAR="2019" bash ./windows_cuda_install.sh
217- bash -c "'/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe'"
205+ # - run:
206+ # name: Update CUDA Driver for Windows
207+ # command: |
208+ # curl -O https://raw.githubusercontent.com/pytorch/pytorch/master/.circleci/scripts/windows_cuda_install.sh
209+ # mkdir -p "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
210+ # JOB_EXECUTOR="windows-with-nvidia-gpu" CUDA_VERSION="11.3" VC_PRODUCT="BuildTools" VC_YEAR="2019" bash ./windows_cuda_install.sh
211+ # bash -c "'/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe'"
218212
219213 - run :
220214 name : Install dependencies
221215 command : |
222216 conda --version
223217 # We have to use cuda 10.2 on Windows:
224218 # https://github.com/pytorch/ignite/issues/1843
225- conda install -y pytorch torchvision cudatoolkit=10.2 -c pytorch
219+ conda install -y pytorch==1.9.1 torchvision cudatoolkit=10.2 -c pytorch
226220 pip install -r requirements-dev.txt
227221 pip install .
222+ python -c "import torch; print(torch.__version__, torch.version.cuda, torch.cuda.is_available())"
223+ python -c "import torch; torch.cuda.is_available()"
228224
229225 - run :
230226 # https://github.com/pytorch/ignite/issues/1737
@@ -330,6 +326,7 @@ jobs:
330326 name : Trigger job if modified
331327 command : |
332328 bash .circleci/trigger_if_modified.sh "^(ignite|tests|examples|\.circleci).*"
329+ - << : *install_latest_nvidia
333330 - << : *pull_pytorch_stable_devel_image
334331 - << : *run_pytorch_devel_container
335332 - << : *install_dependencies
@@ -461,7 +458,10 @@ workflows:
461458 unless : << pipeline.parameters.should_build_docker_images >>
462459 jobs :
463460 - one_gpu_tests
464- - one_gpu_windows_tests
461+ # Disabled windows tests as NVidia driver is too old
462+ # > c:\tools\miniconda3\lib\site-packages\torch\cuda\__init__.py:52: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 10010). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ..\c10\cuda\CUDAFunctions.cpp:115.)
463+ # > return torch._C._cuda_getDeviceCount() > 0
464+ # - one_gpu_windows_tests
465465 - two_gpus_tests
466466 - two_gpus_check_dist_cifar10_example
467467 - two_gpus_hvd_tests
0 commit comments