From d9a4896ab0dfa1330246e80c21af6c76107138a2 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 7 Apr 2024 22:12:01 +0200 Subject: [PATCH 01/11] Revert changes to LocalFileSystem._strip_protocol but keep changes in tests This partially reverts 16535529c79403778738d3226819acd1aecb7bc0 --- fsspec/implementations/local.py | 122 +++++++++++++++----------------- 1 file changed, 57 insertions(+), 65 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 9d2b335cf..d179935ac 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -3,6 +3,7 @@ import logging import os import os.path as osp +import re import shutil import stat import tempfile @@ -15,12 +16,6 @@ logger = logging.getLogger("fsspec.local") -def _remove_prefix(text: str, prefix: str): - if text.startswith(prefix): - return text[len(prefix) :] - return text - - class LocalFileSystem(AbstractFileSystem): """Interface to files on local storage @@ -121,8 +116,8 @@ def lexists(self, path, **kwargs): return osp.lexists(path) def cp_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1, remove_trailing_slash=True) - path2 = self._strip_protocol(path2, remove_trailing_slash=True) + path1 = self._strip_protocol(path1).rstrip("/") + path2 = self._strip_protocol(path2).rstrip("/") if self.auto_mkdir: self.makedirs(self._parent(path2), exist_ok=True) if self.isfile(path1): @@ -151,8 +146,8 @@ def put_file(self, path1, path2, callback=None, **kwargs): return self.cp_file(path1, path2, **kwargs) def mv_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1, remove_trailing_slash=True) - path2 = self._strip_protocol(path2, remove_trailing_slash=True) + path1 = self._strip_protocol(path1).rstrip("/") + path2 = self._strip_protocol(path2).rstrip("/") shutil.move(path1, path2) def link(self, src, dst, **kwargs): @@ -176,7 +171,7 @@ def rm(self, path, recursive=False, maxdepth=None): path = [path] for p in path: - p = self._strip_protocol(p, remove_trailing_slash=True) + p = self._strip_protocol(p).rstrip("/") if self.isdir(p): if not recursive: raise ValueError("Cannot delete directory, set recursive=True") @@ -219,32 +214,24 @@ def modified(self, path): @classmethod def _parent(cls, path): - path = cls._strip_protocol(path, remove_trailing_slash=True) - if os.sep == "/": - # posix native - return path.rsplit("/", 1)[0] or "/" + path = cls._strip_protocol(path).rstrip("/") + if "/" in path: + return path.rsplit("/", 1)[0] else: - # NT - path_ = path.rsplit("/", 1)[0] - if len(path_) <= 3: - if path_[1:2] == ":": - # nt root (something like c:/) - return path_[0] + ":/" - # More cases may be required here - return path_ + return cls.root_marker @classmethod - def _strip_protocol(cls, path, remove_trailing_slash=False): + def _strip_protocol(cls, path): path = stringify_path(path) - if path.startswith("file:"): - path = _remove_prefix(_remove_prefix(path, "file://"), "file:") - if os.sep == "\\": - path = path.lstrip("/") + if path.startswith("file://"): + path = path[7:] + elif path.startswith("file:"): + path = path[5:] + elif path.startswith("local://"): + path = path[8:] elif path.startswith("local:"): - path = _remove_prefix(_remove_prefix(path, "local://"), "local:") - if os.sep == "\\": - path = path.lstrip("/") - return make_path_posix(path, remove_trailing_slash) + path = path[6:] + return make_path_posix(path).rstrip("/") or cls.root_marker def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) @@ -257,42 +244,47 @@ def chmod(self, path, mode): return os.chmod(path, mode) -def make_path_posix(path, remove_trailing_slash=False): - """Make path generic for current OS""" - if not isinstance(path, str): - if isinstance(path, (list, set, tuple)): - return type(path)(make_path_posix(p, remove_trailing_slash) for p in path) - else: - path = str(stringify_path(path)) - if os.sep == "/": - # Native posix +def make_path_posix(path, sep=os.sep): + """Make path generic""" + if isinstance(path, (list, set, tuple)): + return type(path)(make_path_posix(p) for p in path) + if "~" in path: + path = osp.expanduser(path) + if sep == "/": + # most common fast case for posix if path.startswith("/"): - # most common fast case for posix - return path.rstrip("/") or "/" if remove_trailing_slash else path - elif path.startswith("~"): - return make_path_posix(osp.expanduser(path), remove_trailing_slash) - elif path.startswith("./"): + return path + if path.startswith("./"): path = path[2:] - path = f"{os.getcwd()}/{path}" - return path.rstrip("/") or "/" if remove_trailing_slash else path return f"{os.getcwd()}/{path}" - else: - # NT handling - if len(path) > 1: - if path[1] == ":": - # windows full path like "C:\\local\\path" - if len(path) <= 3: - # nt root (something like c:/) - return path[0] + ":/" - path = path.replace("\\", "/").replace("//", "/") - return path.rstrip("/") if remove_trailing_slash else path - elif path[0] == "~": - return make_path_posix(osp.expanduser(path), remove_trailing_slash) - elif path.startswith(("\\\\", "//")): - # windows UNC/DFS-style paths - path = "//" + path[2:].replace("\\", "/").replace("//", "/") - return path.rstrip("/") if remove_trailing_slash else path - return make_path_posix(osp.abspath(path), remove_trailing_slash) + if ( + (sep not in path and "/" not in path) + or (sep == "/" and not path.startswith("/")) + or (sep == "\\" and ":" not in path and not path.startswith("\\\\")) + ): + # relative path like "path" or "rel\\path" (win) or rel/path" + if os.sep == "\\": + # abspath made some more '\\' separators + return make_path_posix(osp.abspath(path)) + else: + return f"{os.getcwd()}/{path}" + if path.startswith("file://"): + path = path[7:] + if re.match("/[A-Za-z]:", path): + # for windows file URI like "file:///C:/folder/file" + # or "file:///C:\\dir\\file" + path = path[1:].replace("\\", "/").replace("//", "/") + if path.startswith("\\\\"): + # special case for windows UNC/DFS-style paths, do nothing, + # just flip the slashes around (case below does not work!) + return path.replace("\\", "/") + if re.match("[A-Za-z]:", path): + # windows full path like "C:\\local\\path" + return path.lstrip("\\").replace("\\", "/").replace("//", "/") + if path.startswith("\\"): + # windows network path like "\\server\\path" + return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/") + return path def trailing_sep(path): From c9eaf46e9f74fe6127f6da8426d7cf40ec3932ff Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 7 Apr 2024 22:16:07 +0200 Subject: [PATCH 02/11] Restore _strip_protocol and _parent behavior prior to revert --- fsspec/implementations/local.py | 21 ++++++++++----------- fsspec/implementations/tests/test_local.py | 3 +-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index d179935ac..ce5a1dd63 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -116,8 +116,8 @@ def lexists(self, path, **kwargs): return osp.lexists(path) def cp_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1).rstrip("/") - path2 = self._strip_protocol(path2).rstrip("/") + path1 = self._strip_protocol(path1) + path2 = self._strip_protocol(path2) if self.auto_mkdir: self.makedirs(self._parent(path2), exist_ok=True) if self.isfile(path1): @@ -146,8 +146,8 @@ def put_file(self, path1, path2, callback=None, **kwargs): return self.cp_file(path1, path2, **kwargs) def mv_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1).rstrip("/") - path2 = self._strip_protocol(path2).rstrip("/") + path1 = self._strip_protocol(path1) + path2 = self._strip_protocol(path2) shutil.move(path1, path2) def link(self, src, dst, **kwargs): @@ -171,7 +171,7 @@ def rm(self, path, recursive=False, maxdepth=None): path = [path] for p in path: - p = self._strip_protocol(p).rstrip("/") + p = self._strip_protocol(p) if self.isdir(p): if not recursive: raise ValueError("Cannot delete directory, set recursive=True") @@ -214,11 +214,8 @@ def modified(self, path): @classmethod def _parent(cls, path): - path = cls._strip_protocol(path).rstrip("/") - if "/" in path: - return path.rsplit("/", 1)[0] - else: - return cls.root_marker + path = cls._strip_protocol(path) + return osp.dirname(path) or cls.root_marker @classmethod def _strip_protocol(cls, path): @@ -231,7 +228,9 @@ def _strip_protocol(cls, path): path = path[8:] elif path.startswith("local:"): path = path[6:] - return make_path_posix(path).rstrip("/") or cls.root_marker + drive, path = osp.splitdrive(make_path_posix(path)) + path = path.rstrip("/") or cls.root_marker + return drive + path def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index b594cbc41..271ad891f 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -474,7 +474,6 @@ def test_make_path_posix(): assert make_path_posix("/posix") == f"{drive}:/posix" # Windows drive requires trailing slash assert make_path_posix("C:\\") == "C:/" - assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/" else: assert make_path_posix("/a/posix/path") == "/a/posix/path" assert make_path_posix("/posix") == "/posix" @@ -656,7 +655,7 @@ def test_strip_protocol_expanduser(): assert "~" not in stripped assert "file://" not in stripped assert stripped.startswith(os.path.expanduser("~").replace("\\", "/")) - path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True) + path = LocalFileSystem._strip_protocol("./") assert not path.endswith("/") From 516b06266025e06a3c7cb8bc71217664087d47cc Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Mon, 15 Apr 2024 21:17:50 +0200 Subject: [PATCH 03/11] tests: add more tests for LocalFileSystem._strip_protocol --- fsspec/implementations/tests/test_local.py | 172 ++++++++++++++++++--- 1 file changed, 152 insertions(+), 20 deletions(-) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 271ad891f..503fbbd17 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -648,26 +648,158 @@ def test_pickle(tmpdir): assert f2.read() == f.read() -def test_strip_protocol_expanduser(): - path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar" - stripped = LocalFileSystem._strip_protocol(path) - assert path != stripped - assert "~" not in stripped - assert "file://" not in stripped - assert stripped.startswith(os.path.expanduser("~").replace("\\", "/")) - path = LocalFileSystem._strip_protocol("./") - assert not path.endswith("/") - - -def test_strip_protocol_no_authority(): - path = "file:\\foo\\bar" if WIN else "file:/foo/bar" - stripped = LocalFileSystem._strip_protocol(path) - assert "file:" not in stripped - assert stripped.endswith("/foo/bar") - if WIN: - assert ( - LocalFileSystem._strip_protocol("file://C:\\path\\file") == "C:/path/file" - ) +@pytest.fixture() +def cwd(): + yield os.getcwd().replace("\\", "/") + + +@pytest.fixture() +def current_drive(cwd): + drive = os.path.splitdrive(cwd)[0] + assert not drive or (len(drive) == 2 and drive.endswith(":")) + yield drive + + +@pytest.fixture() +def user_home(): + pth = os.path.expanduser("~").replace("\\", "/") + assert not pth.endswith("/") + yield pth + + +def winonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(not WIN, reason="Windows only")) + + +def posixonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(WIN, reason="Posix only")) + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file://~/foo/bar", "{user_home}/foo/bar"), + ("~/foo/bar", "{user_home}/foo/bar"), + winonly("~\\foo\\bar", "{user_home}/foo/bar"), + winonly("file://~\\foo\\bar", "{user_home}/foo/bar"), + ], +) +def test_strip_protocol_expanduser(uri, expected, user_home): + expected = expected.format(user_home=user_home) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file://", "{cwd}"), + posixonly("file://.", "{cwd}/."), + winonly("file://.", "{cwd}"), + ("file://./", "{cwd}"), + ("./", "{cwd}"), + ("file:path", "{cwd}/path"), + ("file://path", "{cwd}/path"), + ("path", "{cwd}/path"), + ("./path", "{cwd}/path"), + winonly(".\\", "{cwd}"), + winonly("file://.\\path", "{cwd}/path"), + ], +) +def test_strip_protocol_relative_paths(uri, expected, cwd): + expected = expected.format(cwd=cwd) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + posixonly("file:/foo/bar", "/foo/bar"), + winonly("file:/foo/bar", "{current_drive}/foo/bar"), + winonly("file:\\foo\\bar", "{current_drive}/foo/bar"), + winonly("file://D:\\path\\file", "D:/path/file"), + ], +) +def test_strip_protocol_no_authority(uri, expected, cwd, current_drive): + expected = expected.format(cwd=cwd, current_drive=current_drive) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file:/path", "/path"), + ("file:///path", "/path"), + ("file:////path", "//path"), + ("local:/path", "/path"), + ("s3://bucket/key", "{cwd}/s3://bucket/key"), + ("/path", "/path"), + ("file:///", "/"), + ] + if not WIN + else [ + ("file:c:/path", "c:/path"), + ("file:/c:/path", "c:/path"), + ("file:/C:/path", "C:/path"), + ("file://c:/path", "c:/path"), + ("file:///c:/path", "c:/path"), + ("local:/path", "{current_drive}/path"), + ("s3://bucket/key", "s3://bucket/key"), + ("c:/path", "c:/path"), + ("c:\\path", "c:/path"), + ("file:///", "{current_drive}/"), + pytest.param( + "file://localhost/c:/path", + "c:/path", + marks=pytest.mark.xfail(reason="not supported"), + ), + ], +) +def test_strip_protocol_absolute_paths(uri, expected, current_drive, cwd): + expected = expected.format(current_drive=current_drive, cwd=cwd) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file:c|/path", "c:/path"), + ("file:/D|/path", "D:/path"), + ("file:///C|/path", "C:/path"), + ], +) +@pytest.mark.skipif(not WIN, reason="Windows only") +@pytest.mark.xfail(WIN, reason="not supported") +def test_strip_protocol_legacy_dos_uris(uri, expected): + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, stripped", + [ + ("file://remote/share/path", "{cwd}/remote/share/path"), + ("file:////remote/share/path", "//remote/share/path"), + ( + "file://///remote/share/path", + "///remote/share/path", + ), + ("//remote/share/path", "//remote/share/path"), + ("\\\\remote\\share\\path", "//remote/share/path"), + ], +) +@pytest.mark.skipif(not WIN, reason="Windows only") +def test_strip_protocol_windows_remote_shares(uri, stripped, cwd): + stripped = stripped.format(cwd=cwd) + + assert LocalFileSystem._strip_protocol(uri) == stripped def test_mkdir_twice_faile(tmpdir): From c5cc90fa0446133d658c0995538531e7e3e2f98e Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Mon, 29 Apr 2024 03:54:59 +0200 Subject: [PATCH 04/11] tests: update local tests --- fsspec/implementations/tests/test_local.py | 118 ++++++++++++++------- 1 file changed, 79 insertions(+), 39 deletions(-) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 503fbbd17..2b4f2f1d2 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -40,6 +40,35 @@ odir = os.getcwd() +@pytest.fixture() +def cwd(): + pth = os.getcwd().replace("\\", "/") + assert not pth.endswith("/") + yield pth + + +@pytest.fixture() +def current_drive(cwd): + drive = os.path.splitdrive(cwd)[0] + assert not drive or (len(drive) == 2 and drive.endswith(":")) + yield drive + + +@pytest.fixture() +def user_home(): + pth = os.path.expanduser("~").replace("\\", "/") + assert not pth.endswith("/") + yield pth + + +def winonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(not WIN, reason="Windows only")) + + +def posixonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(WIN, reason="Posix only")) + + @contextmanager def filetexts(d, open=open, mode="t"): """Dumps a number of textfiles to disk @@ -515,6 +544,49 @@ def test_parent(): assert LocalFileSystem._parent("/") == "/" +@pytest.mark.parametrize( + "path,parent", + [ + ("C:\\", "C:/"), + ("C:\\.", "C:/"), + ("C:\\.\\", "C:/"), + ("file:C:/", "C:/"), + ("file://C:/", "C:/"), + ("local:C:/", "C:/"), + ("local://C:/", "C:/"), + ("\\\\server\\share", "//server/share"), + ("\\\\server\\share\\", "//server/share"), + ("\\\\server\\share\\path", "//server/share"), + ("//server/share", "//server/share"), + ("//server/share/", "//server/share"), + ("//server/share/path", "//server/share"), + ("C:\\file or folder", "C:/"), + ("C:\\file or folder\\", "C:/"), + ("file:///", "{current_drive}/"), + ("file:///path", "{current_drive}/"), + ] + if WIN + else [ + ("/", "/"), + ("/.", "/"), + ("/./", "/"), + ("file:/", "/"), + ("file:///", "/"), + ("local:/", "/"), + ("local:///", "/"), + ("/file or folder", "/"), + ("/file or folder/", "/"), + ("file:///", "/"), + ("file:///path", "/"), + ("file://c/", "{cwd}"), + ], +) +def test_parent_edge_cases(path, parent, cwd, current_drive): + parent = parent.format(cwd=cwd, current_drive=current_drive) + + assert LocalFileSystem._parent(path) == parent + + def test_linked_files(tmpdir): tmpdir = str(tmpdir) fn0 = os.path.join(tmpdir, "target") @@ -648,33 +720,6 @@ def test_pickle(tmpdir): assert f2.read() == f.read() -@pytest.fixture() -def cwd(): - yield os.getcwd().replace("\\", "/") - - -@pytest.fixture() -def current_drive(cwd): - drive = os.path.splitdrive(cwd)[0] - assert not drive or (len(drive) == 2 and drive.endswith(":")) - yield drive - - -@pytest.fixture() -def user_home(): - pth = os.path.expanduser("~").replace("\\", "/") - assert not pth.endswith("/") - yield pth - - -def winonly(*args): - return pytest.param(*args, marks=pytest.mark.skipif(not WIN, reason="Windows only")) - - -def posixonly(*args): - return pytest.param(*args, marks=pytest.mark.skipif(WIN, reason="Posix only")) - - @pytest.mark.parametrize( "uri, expected", [ @@ -695,8 +740,7 @@ def test_strip_protocol_expanduser(uri, expected, user_home): "uri, expected", [ ("file://", "{cwd}"), - posixonly("file://.", "{cwd}/."), - winonly("file://.", "{cwd}"), + ("file://.", "{cwd}"), ("file://./", "{cwd}"), ("./", "{cwd}"), ("file:path", "{cwd}/path"), @@ -749,7 +793,7 @@ def test_strip_protocol_no_authority(uri, expected, cwd, current_drive): ("file://c:/path", "c:/path"), ("file:///c:/path", "c:/path"), ("local:/path", "{current_drive}/path"), - ("s3://bucket/key", "s3://bucket/key"), + ("s3://bucket/key", "{cwd}/s3://bucket/key"), ("c:/path", "c:/path"), ("c:\\path", "c:/path"), ("file:///", "{current_drive}/"), @@ -785,17 +829,13 @@ def test_strip_protocol_legacy_dos_uris(uri, expected): @pytest.mark.parametrize( "uri, stripped", [ - ("file://remote/share/path", "{cwd}/remote/share/path"), - ("file:////remote/share/path", "//remote/share/path"), - ( - "file://///remote/share/path", - "///remote/share/path", - ), - ("//remote/share/path", "//remote/share/path"), - ("\\\\remote\\share\\path", "//remote/share/path"), + ("file://remote/share/pth", "{cwd}/remote/share/pth"), + ("file:////remote/share/pth", "//remote/share/pth"), + ("file://///remote/share/pth", "///remote/share/pth"), + ("//remote/share/pth", "//remote/share/pth"), + winonly("\\\\remote\\share\\pth", "//remote/share/pth"), ], ) -@pytest.mark.skipif(not WIN, reason="Windows only") def test_strip_protocol_windows_remote_shares(uri, stripped, cwd): stripped = stripped.format(cwd=cwd) From 29a620d6a0e9bf39363dcd6dfd7a4af6c40ccf08 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Mon, 29 Apr 2024 03:57:35 +0200 Subject: [PATCH 05/11] local: _strip_protocol, _parent and make_posix_path win compatibility --- fsspec/implementations/local.py | 122 +++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 41 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index ce5a1dd63..3c5bf137c 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -3,7 +3,6 @@ import logging import os import os.path as osp -import re import shutil import stat import tempfile @@ -215,7 +214,18 @@ def modified(self, path): @classmethod def _parent(cls, path): path = cls._strip_protocol(path) - return osp.dirname(path) or cls.root_marker + if os.sep == "/": + # posix native + return path.rsplit("/", 1)[0] or "/" + else: + # NT + path_ = path.rsplit("/", 1)[0] + if len(path_) <= 3: + if path_[1:2] == ":": + # nt root (something like c:/) + return path_[0] + ":/" + # More cases may be required here + return path_ @classmethod def _strip_protocol(cls, path): @@ -228,9 +238,33 @@ def _strip_protocol(cls, path): path = path[8:] elif path.startswith("local:"): path = path[6:] - drive, path = osp.splitdrive(make_path_posix(path)) - path = path.rstrip("/") or cls.root_marker - return drive + path + + path = make_path_posix(path) + if os.sep != "/": + # This code-path is a stripped down version of + # > drive, path = ntpath.splitdrive(path) + if path[1:2] == ":": + # Absolute drive-letter path, e.g. X:\Windows + # Relative path with drive, e.g. X:Windows + drive, path = path[:2], path[2:] + elif path[:2] == "//": + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + if (index1 := path.find("/", 2)) == -1 or ( + index2 := path.find("/", index1 + 1) + ) == -1: + drive, path = path, "" + else: + drive, path = path[:index2], path[index2:] + else: + # Relative path, e.g. Windows + drive = "" + + path = path.rstrip("/") or cls.root_marker + return drive + path + + else: + return path.rstrip("/") or cls.root_marker def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) @@ -243,47 +277,53 @@ def chmod(self, path, mode): return os.chmod(path, mode) -def make_path_posix(path, sep=os.sep): - """Make path generic""" - if isinstance(path, (list, set, tuple)): - return type(path)(make_path_posix(p) for p in path) - if "~" in path: - path = osp.expanduser(path) - if sep == "/": - # most common fast case for posix +def make_path_posix(path): + """Make path generic for current OS""" + if not isinstance(path, str): + if isinstance(path, (list, set, tuple)): + return type(path)(make_path_posix(p) for p in path) + else: + path = str(stringify_path(path)) + if os.sep == "/": + # Native posix if path.startswith("/"): + # most common fast case for posix return path - if path.startswith("./"): + elif path.startswith("~"): + return osp.expanduser(path) + elif path.startswith("./"): path = path[2:] + elif path == ".": + path = "" return f"{os.getcwd()}/{path}" - if ( - (sep not in path and "/" not in path) - or (sep == "/" and not path.startswith("/")) - or (sep == "\\" and ":" not in path and not path.startswith("\\\\")) - ): - # relative path like "path" or "rel\\path" (win) or rel/path" - if os.sep == "\\": - # abspath made some more '\\' separators - return make_path_posix(osp.abspath(path)) + else: + # NT handling + if path[0:1] == "/" and path[2:3] == ":": + # path is like "/c:/local/path" + path = path[1:] + if path[1:2] == ":": + # windows full path like "C:\\local\\path" + if len(path) <= 3: + # nt root (something like c:/) + return path[0] + ":/" + path = path.replace("\\", "/") + return path + elif path[0:1] == "~": + return make_path_posix(osp.expanduser(path)) + elif path.startswith(("\\\\", "//")): + # windows UNC/DFS-style paths + return "//" + path[2:].replace("\\", "/") + elif path.startswith(("\\", "/")): + # windows relative path with root + path = path.replace("\\", "/") + return f"{osp.splitdrive(os.getcwd())[0]}{path}" else: - return f"{os.getcwd()}/{path}" - if path.startswith("file://"): - path = path[7:] - if re.match("/[A-Za-z]:", path): - # for windows file URI like "file:///C:/folder/file" - # or "file:///C:\\dir\\file" - path = path[1:].replace("\\", "/").replace("//", "/") - if path.startswith("\\\\"): - # special case for windows UNC/DFS-style paths, do nothing, - # just flip the slashes around (case below does not work!) - return path.replace("\\", "/") - if re.match("[A-Za-z]:", path): - # windows full path like "C:\\local\\path" - return path.lstrip("\\").replace("\\", "/").replace("//", "/") - if path.startswith("\\"): - # windows network path like "\\server\\path" - return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/") - return path + path = path.replace("\\", "/") + if path.startswith("./"): + path = path[2:] + elif path == ".": + path = "" + return f"{make_path_posix(os.getcwd())}/{path}" def trailing_sep(path): From 53d7660e69919f42e38e62bdc425826034b87112 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Wed, 1 May 2024 15:34:12 +0200 Subject: [PATCH 06/11] tests: add more make_posix_path tests --- fsspec/implementations/tests/test_local.py | 65 +++++++++++++++++++++- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 2b4f2f1d2..86ae0efd9 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -535,6 +535,62 @@ def test_make_path_posix(): assert userpath.endswith("/path") +@pytest.mark.parametrize( + "path", + [ + "/abc/def", + "abc/def", + "", + ".", + "//server/share", + "C:\\", + "d:/abc/def", + "e:", + pytest.param( + "f:foo", + marks=[pytest.mark.xfail(WIN, reason="unsupported")], + id="relative-path-with-drive", + ), + ], +) +def test_make_path_posix_returns_absolute_paths(path): + posix_pth = make_path_posix(path) + assert os.path.isabs(posix_pth) + + +@pytest.mark.parametrize("container_cls", [list, set, tuple]) +def test_make_path_posix_set_list_tuple(container_cls): + paths = container_cls( + [ + "/foo/bar", + "bar/foo", + ] + ) + posix_paths = make_path_posix(paths) + assert isinstance(posix_paths, container_cls) + assert posix_paths == container_cls( + [ + make_path_posix("/foo/bar"), + make_path_posix("bar/foo"), + ] + ) + + +@pytest.mark.parametrize( + "obj", + [ + 1, + True, + None, + object(), + ], +) +@pytest.mark.xfail(reason="all inputs coerced to string") +def test_make_path_posix_wrong_type(obj): + with pytest.raises(AttributeError): # should probably be changed to TypeError + make_path_posix(obj) + + def test_parent(): if WIN: assert LocalFileSystem._parent("C:\\file or folder") == "C:/" @@ -576,7 +632,6 @@ def test_parent(): ("local:///", "/"), ("/file or folder", "/"), ("/file or folder/", "/"), - ("file:///", "/"), ("file:///path", "/"), ("file://c/", "{cwd}"), ], @@ -764,6 +819,8 @@ def test_strip_protocol_relative_paths(uri, expected, cwd): posixonly("file:/foo/bar", "/foo/bar"), winonly("file:/foo/bar", "{current_drive}/foo/bar"), winonly("file:\\foo\\bar", "{current_drive}/foo/bar"), + winonly("file:D:\\path\\file", "D:/path/file"), + winonly("file:/D:\\path\\file", "D:/path/file"), winonly("file://D:\\path\\file", "D:/path/file"), ], ) @@ -800,7 +857,9 @@ def test_strip_protocol_no_authority(uri, expected, cwd, current_drive): pytest.param( "file://localhost/c:/path", "c:/path", - marks=pytest.mark.xfail(reason="not supported"), + marks=pytest.mark.xfail( + reason="rfc8089 section3 'localhost uri' not supported" + ), ), ], ) @@ -820,7 +879,7 @@ def test_strip_protocol_absolute_paths(uri, expected, current_drive, cwd): ], ) @pytest.mark.skipif(not WIN, reason="Windows only") -@pytest.mark.xfail(WIN, reason="not supported") +@pytest.mark.xfail(WIN, reason="legacy dos uris not supported") def test_strip_protocol_legacy_dos_uris(uri, expected): stripped = LocalFileSystem._strip_protocol(uri) assert expected == stripped From 10bb4d09680d8ee3098fe5118768f2e7dd67e921 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Wed, 1 May 2024 15:34:48 +0200 Subject: [PATCH 07/11] update make_posix_path docstring --- fsspec/implementations/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 3e024e79e..8cf0bc27b 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -278,7 +278,7 @@ def chmod(self, path, mode): def make_path_posix(path): - """Make path generic for current OS""" + """Make path generic and absolute for current OS""" if not isinstance(path, str): if isinstance(path, (list, set, tuple)): return type(path)(make_path_posix(p) for p in path) From 8ef54b032077ad0814f95ce5160926cc8c4c9081 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Wed, 1 May 2024 17:29:49 +0200 Subject: [PATCH 08/11] tests: check for os.path.isabs issue in make_path_posix tests --- fsspec/implementations/tests/test_local.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 86ae0efd9..0ffcb9ac1 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -542,10 +542,20 @@ def test_make_path_posix(): "abc/def", "", ".", - "//server/share", + "//server/share/", + "\\\\server\\share\\", "C:\\", "d:/abc/def", "e:", + pytest.param( + "\\\\server\\share", + marks=[ + pytest.mark.xfail( + WIN and sys.version_info < (3, 11), + reason="requires py3.11+ see: python/cpython#96290", + ) + ], + ), pytest.param( "f:foo", marks=[pytest.mark.xfail(WIN, reason="unsupported")], From 6f000a25a15a8bf005d1dc921fe7b1c360bd2099 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Thu, 2 May 2024 15:40:39 +0200 Subject: [PATCH 09/11] ci: limit git version to <2.45.0 until setuptools_scm is fixed --- ci/environment-py38.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 25f68e5dd..9c514de98 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - pip - - git + - git <2.45.0 - py - pip: - hadoop-test-cluster From d315173b11da2563834c6cc2a65b24f25de1741c Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Fri, 3 May 2024 00:48:43 +0200 Subject: [PATCH 10/11] fsspec.implementations.local: make_path_posix raise TypeError if input can't be converted to str --- fsspec/implementations/local.py | 4 +++- fsspec/implementations/tests/test_local.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 8cf0bc27b..9881606f1 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -283,7 +283,9 @@ def make_path_posix(path): if isinstance(path, (list, set, tuple)): return type(path)(make_path_posix(p) for p in path) else: - path = str(stringify_path(path)) + path = stringify_path(path) + if not isinstance(path, str): + raise TypeError(f"could not convert {path!r} to string") if os.sep == "/": # Native posix if path.startswith("/"): diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 0ffcb9ac1..8156a0df9 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -595,9 +595,8 @@ def test_make_path_posix_set_list_tuple(container_cls): object(), ], ) -@pytest.mark.xfail(reason="all inputs coerced to string") def test_make_path_posix_wrong_type(obj): - with pytest.raises(AttributeError): # should probably be changed to TypeError + with pytest.raises(TypeError): make_path_posix(obj) From 58cb4f49f5f844cdf108a6c7e742ec0c30f56fd0 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Fri, 3 May 2024 01:19:11 +0200 Subject: [PATCH 11/11] add tests for fsspec.utils.stringify_path and remove redundant check --- fsspec/tests/test_utils.py | 38 +++++++++++++++++++++++++++++++++++++- fsspec/utils.py | 2 -- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fsspec/tests/test_utils.py b/fsspec/tests/test_utils.py index 0efd5d91d..10fa89a2d 100644 --- a/fsspec/tests/test_utils.py +++ b/fsspec/tests/test_utils.py @@ -1,6 +1,6 @@ import io import sys -from pathlib import Path +from pathlib import Path, PurePath from unittest.mock import Mock import pytest @@ -440,3 +440,39 @@ def test_size(): f = io.BytesIO(b"hello") assert fsspec.utils.file_size(f) == 5 assert f.tell() == 0 + + +class _HasFspath: + def __fspath__(self): + return "foo" + + +class _HasPathAttr: + def __init__(self): + self.path = "foo" + + +@pytest.mark.parametrize( + "path,expected", + [ + # coerce to string + ("foo", "foo"), + (Path("foo"), "foo"), + (PurePath("foo"), "foo"), + (_HasFspath(), "foo"), + (_HasPathAttr(), "foo"), + # passthrough + (b"bytes", b"bytes"), + (None, None), + (1, 1), + (True, True), + (o := object(), o), + ([], []), + ((), ()), + (set(), set()), + ], +) +def test_stringify_path(path, expected): + path = fsspec.utils.stringify_path(path) + + assert path == expected diff --git a/fsspec/utils.py b/fsspec/utils.py index ba3f80be3..6e130aa75 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -350,8 +350,6 @@ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str: return filepath elif hasattr(filepath, "__fspath__"): return filepath.__fspath__() - elif isinstance(filepath, pathlib.Path): - return str(filepath) elif hasattr(filepath, "path"): return filepath.path else: