Skip to content

Add tls to ftp #1581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ Built-in Implementations
.. autoclass:: fsspec.implementations.ftp.FTPFileSystem
:members: __init__

.. autoclass:: fsspec.implementations.ftp.FTPTLSFileSystem
:members: __init__

.. autoclass:: fsspec.implementations.git.GitFileSystem
:members: __init__

Expand Down
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ The following libraries use ``fsspec`` internally for path and file handling:
maintainable and modular data science code
#. `pyxet`_, a Python library for mounting and
accessing very large datasets from XetHub
#. `Huggingface🤗 Datasets`_, a popular library to
#. `Huggingface🤗 Datasets`_, a popular library to
load&manipulate data for Deep Learning models

``fsspec`` filesystems are also supported by:
Expand Down
18 changes: 14 additions & 4 deletions fsspec/implementations/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import uuid
import warnings
from ftplib import FTP, Error, error_perm
from ftplib import FTP, FTP_TLS, Error, error_perm
from typing import Any

from ..spec import AbstractBufferedFile, AbstractFileSystem
Expand All @@ -27,6 +27,8 @@ def __init__(
tempdir=None,
timeout=30,
encoding="utf-8",
ssl=False,
prot_p=False,
**kwargs,
):
"""
Expand Down Expand Up @@ -68,16 +70,24 @@ def __init__(
self.blocksize = block_size
else:
self.blocksize = 2**16
self.ssl = ssl
self.prot_p = prot_p
self._connect()
if self.prot_p:
self.ftp.prot_p()

def _connect(self):
if self.ssl:
ftp_cls = FTP_TLS
else:
ftp_cls = FTP
if sys.version_info >= (3, 9):
self.ftp = FTP(timeout=self.timeout, encoding=self.encoding)
self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
elif self.encoding:
warnings.warn("`encoding` not supported for python<3.9, ignoring")
self.ftp = FTP(timeout=self.timeout)
self.ftp = ftp_cls(timeout=self.timeout)
else:
self.ftp = FTP(timeout=self.timeout)
self.ftp = ftp_cls(timeout=self.timeout)
self.ftp.connect(self.host, self.port)
self.ftp.login(*self.cred)

Expand Down
8 changes: 5 additions & 3 deletions fsspec/implementations/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,9 +986,11 @@ def _process_gen(self, gens):
out = {}
for gen in gens:
dimension = {
k: v
if isinstance(v, list)
else range(v.get("start", 0), v["stop"], v.get("step", 1))
k: (
v
if isinstance(v, list)
else range(v.get("start", 0), v["stop"], v.get("step", 1))
)
for k, v in gen["dimensions"].items()
}
products = (
Expand Down
38 changes: 38 additions & 0 deletions fsspec/implementations/tests/ftp_tls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

from pyftpdlib.authorizers import DummyAuthorizer
from pyftpdlib.handlers import TLS_FTPHandler
from pyftpdlib.servers import FTPServer


def ftp():
# Set up FTP server parameters
FTP_HOST = "0.0.0.0"
FTP_PORT = 2121 # Choose a free port for the FTP server
FTP_DIRECTORY = os.path.dirname(__file__)
print(FTP_DIRECTORY)

# Instantiate a dummy authorizer
authorizer = DummyAuthorizer()
authorizer.add_user(
"user",
"pass",
FTP_DIRECTORY,
"elradfmwMT",
)
authorizer.add_anonymous(FTP_DIRECTORY)

# Instantiate TLS_FTPHandler with required parameters
handler = TLS_FTPHandler
handler.certfile = os.path.join(os.path.dirname(__file__), "keycert.pem")
handler.authorizer = authorizer

# Instantiate FTP server with TLS handler and authorizer
server = FTPServer((FTP_HOST, FTP_PORT), handler)
server.authorizer = authorizer

server.serve_forever()


if __name__ == "__main__":
ftp()
24 changes: 24 additions & 0 deletions fsspec/implementations/tests/keycert.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-----BEGIN EC PARAMETERS-----
BggqhkjOPQMBBw==
-----END EC PARAMETERS-----
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIBTg1e61mzYYPJ+MDkOWCSevnT1HUaaK9iopgTGyDoIuoAoGCCqGSM49
AwEHoUQDQgAEDy3E+4WgohcRUlaSZBndEZQBTyoRztCSoaDbhZkqsPFBbeaGJ5zA
E7qX+9LICDezAUsCiq2RYltOqDCsELteiQ==
-----END EC PRIVATE KEY-----
-----BEGIN CERTIFICATE-----
MIICdzCCAh2gAwIBAgIUNN4kmTSxbLOoQXLFiYOs2XeK1jIwCgYIKoZIzj0EAwIw
gY8xCzAJBgNVBAYTAk5MMRUwEwYDVQQIDAxadWlkLUhvbGxhbmQxDjAMBgNVBAcM
BURlbGZ0MRAwDgYDVQQKDAdXaGlmZmxlMQ0wCwYDVQQLDARERVZBMRIwEAYDVQQD
DAlCYXJ0dmFuRXMxJDAiBgkqhkiG9w0BCQEWFWJhcnQudmFuZXNAd2hpZmZsZS5u
bDAgFw0yNDA0MTgxMDI0NDFaGA8yMjk4MDIwMTEwMjQ0MVowgY8xCzAJBgNVBAYT
Ak5MMRUwEwYDVQQIDAxadWlkLUhvbGxhbmQxDjAMBgNVBAcMBURlbGZ0MRAwDgYD
VQQKDAdXaGlmZmxlMQ0wCwYDVQQLDARERVZBMRIwEAYDVQQDDAlCYXJ0dmFuRXMx
JDAiBgkqhkiG9w0BCQEWFWJhcnQudmFuZXNAd2hpZmZsZS5ubDBZMBMGByqGSM49
AgEGCCqGSM49AwEHA0IABA8txPuFoKIXEVJWkmQZ3RGUAU8qEc7QkqGg24WZKrDx
QW3mhiecwBO6l/vSyAg3swFLAoqtkWJbTqgwrBC7XomjUzBRMB0GA1UdDgQWBBRb
1nPqritk/P2cbDzTw9SQ9vO7JDAfBgNVHSMEGDAWgBRb1nPqritk/P2cbDzTw9SQ
9vO7JDAPBgNVHRMBAf8EBTADAQH/MAoGCCqGSM49BAMCA0gAMEUCIBcvCFS4AD3p
Ix1v8pp3hcMvGFIQLeczh4kXkPfZWvBkAiEAiTCqsdKhZi8k814H6FFkaoQVIjTe
iUtUlW6RfyDNZ9E=
-----END CERTIFICATE-----
189 changes: 189 additions & 0 deletions fsspec/implementations/tests/test_ftp_tls.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an exact copy of he insecure tests? You can use pytest.mark.parametrize to run each of the existing tests, once with secure=False and once with True.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not possible since it will need a different fixture.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is totally possible to have a parametrised fixture, or two fixtures that you choose between. https://stackoverflow.com/a/29407931/3821154 shows an example.

Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import os
import subprocess
import sys
import time

import pytest

import fsspec
from fsspec import open_files
from fsspec.implementations.ftp import FTPFileSystem

ftplib = pytest.importorskip("ftplib")
here = os.path.dirname(os.path.abspath(__file__))


@pytest.fixture()
def ftp():
P = subprocess.Popen(
[sys.executable, os.path.join(os.path.dirname(__file__), "ftp_tls.py")],
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
)
try:
time.sleep(1)
yield "localhost", 2121, "user", "pass"
finally:
P.terminate()
P.wait()


def test_basic(ftp):
host, port, _, _ = ftp
fs = FTPFileSystem(host, port, timeout=1, ssl=True)
assert fs.ls("/", detail=False) == sorted(os.listdir(here))
out = fs.cat(f"/{os.path.basename(__file__)}")
assert out == open(__file__, "rb").read()


def test_basic_prot_p(ftp):
host, port, _, _ = ftp
fs = FTPFileSystem(host, port, ssl=True, prot_p=True)
assert fs.ls("/", detail=False) == sorted(os.listdir(here))
out = fs.cat(f"/{os.path.basename(__file__)}")
assert out == open(__file__, "rb").read()


def test_not_cached(ftp):
host, port, _, _ = ftp
fs = FTPFileSystem(host, port, ssl=True)
fs2 = FTPFileSystem(host, port, ssl=True)
assert fs is not fs2


@pytest.mark.parametrize("cache_type", ["bytes", "mmap"])
def test_complex(ftp, cache_type):
from fsspec.core import BytesCache

host, port, user, pw = ftp
files = open_files(
"ftp:///ou*",
host=host,
port=port,
username=user,
password=pw,
block_size=10000,
cache_type=cache_type,
ssl=True,
)
assert len(files) == 1
with files[0] as fo:
assert fo.read(10) == b"hellohello"
if isinstance(fo.cache, BytesCache):
assert len(fo.cache.cache) == 10010
assert fo.read(2) == b"he"
assert fo.tell() == 12


def test_write_small(ftp):
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, ssl=True)
with fs.open("/out_tls2", "wb") as f:
f.write(b"oi")
assert fs.cat("/out_tls2") == b"oi"


def test_with_url(ftp):
host, port, user, pw = ftp
fo = fsspec.open(f"ftp://{user}:{pw}@{host}:{port}/out_tls", "wb")
with fo as f:
f.write(b"hello")
fo = fsspec.open(f"ftp://{user}:{pw}@{host}:{port}/out_tls", "rb")
with fo as f:
assert f.read() == b"hello"


@pytest.mark.parametrize("cache_type", ["bytes", "mmap"])
def test_write_big(ftp, cache_type):
host, port, user, pw = ftp
fs = FTPFileSystem(
host, port, user, pw, block_size=1000, cache_type=cache_type, ssl=True
)
fn = f"/bigger_tls_{cache_type}"
with fs.open(fn, "wb") as f:
f.write(b"o" * 500)
assert not fs.exists(fn)
f.write(b"o" * 1000)
fs.invalidate_cache()
assert fs.exists(fn)
f.write(b"o" * 200)
f.flush()

assert fs.info(fn)["size"] == 1700
assert fs.cat(fn) == b"o" * 1700
fs.rm(fn)


def test_transaction(ftp):
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, ssl=True)
fs.mkdir("tmp_tls")
fn = "tr"
with fs.transaction:
with fs.open(fn, "wb") as f:
f.write(b"not")
assert not fs.exists(fn)
assert fs.exists(fn)
assert fs.cat(fn) == b"not"

fs.rm(fn)
assert not fs.exists(fn)


def test_transaction_with_cache(ftp, tmpdir):
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, ssl=True)
fs.mkdirs("tmp_tls", exist_ok=True)
fs.mkdir("tmp_tls/dir")
assert "dir" in fs.ls("tmp_tls", detail=False)

with fs.transaction:
fs.rmdir("tmp_tls/dir")

assert "dir" not in fs.ls("tmp_tls", detail=False)
assert not fs.exists("tmp_tls/dir")


def test_cat_get(ftp, tmpdir):
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, block_size=500, ssl=True)
fs.mkdirs("tmp_tls", exist_ok=True)
data = b"hello" * 500
fs.pipe("tmp_tls/myfile_tls", data)
assert fs.cat_file("tmp_tls/myfile_tls") == data

fn = os.path.join(tmpdir, "lfile")
fs.get_file("tmp_tls/myfile_tls", fn)
assert open(fn, "rb").read() == data


def test_mkdir(ftp):
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, ssl=True)
with pytest.raises(ftplib.error_perm):
fs.mkdir("tmp_tls/not/exist_tls", create_parents=False)
fs.mkdir("tmp_tls/not/exist")
assert fs.exists("tmp_tls/not/exist")
fs.makedirs("tmp_tls/not/exist", exist_ok=True)
with pytest.raises(FileExistsError):
fs.makedirs("tmp_tls/not/exist", exist_ok=False)
fs.makedirs("tmp_tls/not/exist/inner/inner")
assert fs.isdir("tmp_tls/not/exist/inner/inner")


def test_rm_get_recursive(ftp, tmpdir):
tmpdir = str(tmpdir)
host, port, user, pw = ftp
fs = FTPFileSystem(host, port, user, pw, ssl=True)
fs.mkdir("tmp_tls/topdir")
fs.mkdir("tmp_tls/topdir/underdir")
fs.touch("tmp_tls/topdir/afile")
fs.touch("tmp_tls/topdir/underdir/afile")

fs.get("tmp_tls/topdir", tmpdir, recursive=True)

with pytest.raises(ftplib.error_perm):
fs.rmdir("tmp_tls/topdir")

fs.rm("tmp_tls/topdir", recursive=True)
assert not fs.exists("tmp_tls/topdir")