|
39 | 39 | if common_utils.SEED is None: |
40 | 40 | common_utils.SEED = 1234 |
41 | 41 |
|
42 | | -_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else []) + (["xpu"] if torch.xpu.is_available() else []) |
| 42 | +_DEVICES = ( |
| 43 | + ["cpu"] |
| 44 | + + (["cuda"] if torch.cuda.is_available() else []) |
| 45 | + + (["xpu"] if torch.xpu.is_available() else []) |
| 46 | +) |
43 | 47 | _DEVICE = get_current_accelerator_device() |
44 | 48 |
|
45 | 49 |
|
@@ -184,7 +188,9 @@ def test_int8_weight_only_training(self, compile, device): |
184 | 188 | ], |
185 | 189 | ) |
186 | 190 | @parametrize("module_swap", [False, True]) |
187 | | - @pytest.mark.skipif(not torch.accelerator.is_available(), reason="GPU not available") |
| 191 | + @pytest.mark.skipif( |
| 192 | + not torch.accelerator.is_available(), reason="GPU not available" |
| 193 | + ) |
188 | 194 | def test_int8_mixed_precision_training(self, compile, config, module_swap): |
189 | 195 | _reset() |
190 | 196 | bsize = 64 |
@@ -223,7 +229,9 @@ def snr(ref, actual): |
223 | 229 |
|
224 | 230 | @pytest.mark.skip("Flaky on CI") |
225 | 231 | @parametrize("compile", [False, True]) |
226 | | - @pytest.mark.skipif(not torch.accelerator.is_available(), reason="GPU not available") |
| 232 | + @pytest.mark.skipif( |
| 233 | + not torch.accelerator.is_available(), reason="GPU not available" |
| 234 | + ) |
227 | 235 | def test_bitnet_training(self, compile): |
228 | 236 | # reference implementation |
229 | 237 | # https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf |
@@ -298,7 +306,7 @@ def world_size(self) -> int: |
298 | 306 | return _FSDP_WORLD_SIZE |
299 | 307 |
|
300 | 308 | @skip_if_lt_x_gpu(_FSDP_WORLD_SIZE) |
301 | | - @pytest.mark.skipif(not torch.accelerator.is_available(), reason="GPU not available") |
| 309 | + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") |
302 | 310 | def test_fsdp2_correctness(self): |
303 | 311 | mp_policy = MixedPrecisionPolicy() |
304 | 312 |
|
@@ -389,14 +397,18 @@ def _run_subtest(self, args): |
389 | 397 | ) |
390 | 398 |
|
391 | 399 | @skip_if_lt_x_gpu(_FSDP_WORLD_SIZE) |
392 | | - @pytest.mark.skipif(not torch.accelerator.is_available(), reason="GPU not available") |
| 400 | + @pytest.mark.skipif( |
| 401 | + not torch.accelerator.is_available(), reason="GPU not available" |
| 402 | + ) |
393 | 403 | def test_precompute_bitnet_scale(self): |
394 | 404 | from torchao.prototype.quantized_training.bitnet import ( |
395 | 405 | get_bitnet_scale, |
396 | 406 | precompute_bitnet_scale_for_fsdp, |
397 | 407 | ) |
398 | 408 |
|
399 | | - model = nn.Sequential(nn.Linear(32, 64), nn.GELU(), nn.Linear(64, 32)).to(_DEVICE) |
| 409 | + model = nn.Sequential(nn.Linear(32, 64), nn.GELU(), nn.Linear(64, 32)).to( |
| 410 | + _DEVICE |
| 411 | + ) |
400 | 412 | model_fsdp = copy.deepcopy(model) |
401 | 413 | quantize_(model_fsdp, bitnet_training()) |
402 | 414 | fully_shard(model_fsdp) |
|
0 commit comments