From f35ef7cdde0009a8744d7a9f3d2531f3b80fcae0 Mon Sep 17 00:00:00 2001 From: rchevrier Date: Fri, 19 Jun 2026 11:27:17 +0000 Subject: [PATCH] switch GSHHG to HTTP --- src/fcollections/sad/_gshhg.py | 31 ++++++++++++++------------ tests/sad/test_aux.py | 40 ++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/src/fcollections/sad/_gshhg.py b/src/fcollections/sad/_gshhg.py index 2a42843..a7b0169 100644 --- a/src/fcollections/sad/_gshhg.py +++ b/src/fcollections/sad/_gshhg.py @@ -5,7 +5,8 @@ import logging import tarfile import typing as tp -from ftplib import FTP + +import requests from ._interface import IAuxiliaryDataFetcher @@ -33,7 +34,7 @@ class GSHHG(IAuxiliaryDataFetcher): the user home (~/.config/sad) """ - FTP_URL = "ftp.soest.hawaii.edu" + HTTP_URL = "http://www.soest.hawaii.edu/pwessel" FILE = "gshhg/gshhg-gmt-2.3.7.tar.gz" @property @@ -43,25 +44,24 @@ def keys(self) -> set[str]: return {f"{s}_{r}" for s, r in itertools.product(subset, resolutions)} def _download(self, remote_file: str, target_folder: Path): - fetch_ftp_file(self.FTP_URL, self.FILE, target_folder) + fetch_http_file(self.HTTP_URL, self.FILE, target_folder) return target_folder / remote_file def _file_name(self, key: str): return f"binned_{key}.nc" -def fetch_ftp_file(url: str, filename: str, target_folder: Path): +def fetch_http_file(url: str, filename: str, target_folder: Path): - logger.debug("Connecting as anonymous to %s", url) - ftp = FTP(url) - ftp.login() + full_url = url + "/" + filename - # Download in-memory. This should be limited to a few MB - logger.info("Downloading %s...", filename) + logger.info("Downloading %s...", full_url) tar_data = io.BytesIO() - ftp.retrbinary(f"RETR {filename}", tar_data.write) - ftp.quit() - logger.info("Downloading %s... Done", filename) + response = requests.get(full_url, timeout=60) + try: + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to download file from {full_url}") from e # Filter out non-netcdf and flatten the tar gz structure def tar_info_filter(tar_info: tarfile.TarInfo, _) -> tarfile.TarInfo | None: @@ -69,12 +69,15 @@ def tar_info_filter(tar_info: tarfile.TarInfo, _) -> tarfile.TarInfo | None: logger.debug("Not an netcdf, skipping extraction") return None - tar_info.name = tar_info.name.split("/")[-1] + logger.debug("Extracting %s", tar_info.name) + tar_info.path = tar_info.path.split("/")[-1] return tar_info # Extract in-memory buffer + tar_data.write(response.content) tar_data.seek(0) with tarfile.open(fileobj=tar_data, mode="r") as tar: for member in tar.getmembers(): - logger.debug("Extracting %s", member.name) tar.extract(member, path=target_folder, filter=tar_info_filter) + + logger.info("Downloading %s... Done", full_url) diff --git a/tests/sad/test_aux.py b/tests/sad/test_aux.py index 64b2372..b84a972 100644 --- a/tests/sad/test_aux.py +++ b/tests/sad/test_aux.py @@ -66,23 +66,41 @@ def gshhg_tar_gz() -> bytes: def test_gshhg(gshhg_tar_gz: bytes, tmp_path_factory: pytest.TempPathFactory): path = tmp_path_factory.mktemp("sad") - def retrbinary_side_effect(_, callback): - callback(gshhg_tar_gz) + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tar: + data = b"Hello World" + info = tarfile.TarInfo(name="binned_border_i.nc") + info.size = len(data) + tar.addfile(info, io.BytesIO(data)) - ftp_mock = Mock() - ftp_mock.retrbinary.side_effect = retrbinary_side_effect + mock_resp = Mock() + mock_resp.content = buf.getvalue() + mock_resp.raise_for_status.return_value = None aux = GSHHG() with ( - patch("fcollections.sad._gshhg.FTP", return_value=ftp_mock), + patch("requests.get", return_value=mock_resp) as get, patch("fcollections.sad.GSHHG.lookup_folders", return_value=[path]), ): fetched_file = aux["border_i"] - with open(fetched_file, "rb") as f: - assert f.read() == b"hello" - fetched_file = aux["GSHHS_h"] - with open(fetched_file, "rb") as f: - assert f.read() == b"world" + with open(fetched_file) as f: + assert f.read() == "Hello World" - ftp_mock.retrbinary.assert_called_once() + get.assert_called_once_with( + aux.HTTP_URL + "/gshhg/gshhg-gmt-2.3.7.tar.gz", timeout=60 + ) + + +def test_gshhg_http_error(tmp_path_factory: pytest.TempPathFactory): + path = tmp_path_factory.mktemp("sad") + mock_resp = Mock() + mock_resp.raise_for_status.side_effect = requests.HTTPError("404") + + aux = GSHHG() + with ( + patch("fcollections.sad._gshhg.requests.get", return_value=mock_resp), + patch("fcollections.sad.GSHHG.lookup_folders", return_value=[path]), + ): + with pytest.raises(RuntimeError): + aux["border_i"]