File tree Expand file tree Collapse file tree 3 files changed +14
-11
lines changed
Expand file tree Collapse file tree 3 files changed +14
-11
lines changed Original file line number Diff line number Diff line change 11# Changelog
22
3+ ## Unreleased
4+
5+ ### Fixed
6+ - Fix ` edsnlp.utils.file_system.normalize_fs_path ` file system detection not working correctly
7+
38## v0.11.1 (2024-04-02)
49
510### Added
Original file line number Diff line number Diff line change @@ -42,24 +42,21 @@ def normalize_fs_path(
4242 filesystem : Optional [FileSystem ],
4343 path : Union [str , Path ],
4444) -> Tuple [AbstractFileSystem , str ]:
45- path = str (path )
45+ has_protocol = isinstance (path , str ) and "://" in path
4646
47- if filesystem is None or (isinstance (path , str ) and "://" in path ):
48- path = (
49- os .path .abspath (path )
50- if isinstance (path , Path ) or "://" in path
51- else f"file://{ os .path .abspath (path )} "
52- )
53- inferred_fs , fs_path = pyarrow .fs .FileSystem .from_uri (path )
47+ # We need to detect the fs from the path
48+ if filesystem is None or has_protocol :
49+ uri : str = path if has_protocol else f"file://{ os .path .abspath (path )} "
50+ inferred_fs , fs_path = pyarrow .fs .FileSystem .from_uri (uri )
5451 filesystem = filesystem or inferred_fs
5552 assert inferred_fs .type_name == filesystem .type_name , (
5653 f"Protocol { inferred_fs .type_name } in path does not match "
5754 f"filesystem { filesystem .type_name } "
5855 )
59- path = fs_path
56+ path = fs_path # path without protocol
6057
6158 return (
6259 ArrowFSWrapper (filesystem )
6360 if isinstance (filesystem , pyarrow .fs .FileSystem )
6461 else filesystem
65- ), path
62+ ), str ( path )
Original file line number Diff line number Diff line change 1+ import os
12from pathlib import Path
23
34import pyarrow .dataset
@@ -242,7 +243,7 @@ def test_read_to_parquet(blank_nlp, tmpdir):
242243 fs = pyarrow .fs .LocalFileSystem ()
243244 doc = list (
244245 edsnlp .data .read_parquet (
245- input_dir ,
246+ input_dir . relative_to ( os . getcwd ()) ,
246247 converter = "omop" ,
247248 span_attributes = ["etat" , "assertion" ],
248249 doc_attributes = ["context_var" ],
You can’t perform that action at this time.
0 commit comments