diff --git a/python/ray/_common/runtime_env_uri.py b/python/ray/_common/runtime_env_uri.py new file mode 100644 index 000000000000..b5d3e5b392ae --- /dev/null +++ b/python/ray/_common/runtime_env_uri.py @@ -0,0 +1,117 @@ +import enum +import pathlib +import urllib.parse +from typing import Tuple +from urllib.parse import urlparse + +_REMOTE_PROTOCOLS = ("http", "https", "s3", "gs", "azure", "abfss", "file") + + +class Protocol(enum.Enum): + # For packages dynamically uploaded and managed by the GCS. + GCS = "gcs" + # For conda environments installed locally on each node. + CONDA = "conda" + # For pip environments installed locally on each node. + PIP = "pip" + # For uv environments installed locally on each node. + UV = "uv" + # Remote http path, assumes everything packed in one zip file. + HTTP = "http" + # Remote https path, assumes everything packed in one zip file. + HTTPS = "https" + # Remote s3 path, assumes everything packed in one zip file. + S3 = "s3" + # Remote google storage path, assumes everything packed in one zip file. + GS = "gs" + # Remote azure blob storage path, assumes everything packed in one zip file. + AZURE = "azure" + # Remote Azure Blob File System Secure path, assumes everything packed in one zip file. + ABFSS = "abfss" + # File storage path, assumes everything packed in one zip file. + FILE = "file" + + @classmethod + def remote_protocols(cls): + # Returns a list of protocols that support remote storage. + # These protocols should only be used with paths that end in + # ".zip", ".whl", ".tar.gz", or ".tgz". + return [cls[protocol.upper()] for protocol in _REMOTE_PROTOCOLS] + + +def _is_path(path_or_uri: str) -> bool: + """Returns True if path_or_uri is a path and False otherwise.""" + if not isinstance(path_or_uri, str): + raise TypeError(f" path_or_uri must be a string, got {type(path_or_uri)}.") + + parsed_path = pathlib.Path(path_or_uri) + parsed_uri = urllib.parse.urlparse(path_or_uri) + + if isinstance(parsed_path, pathlib.PurePosixPath): + return not parsed_uri.scheme + elif isinstance(parsed_path, pathlib.PureWindowsPath): + return parsed_uri.scheme == parsed_path.drive.strip(":").lower() + else: + # this should never happen. + raise TypeError(f"Unsupported path type: {type(parsed_path).__name__}") + + +def parse_uri(pkg_uri: str) -> Tuple[Protocol, str]: + """ + Parse package uri into protocol and package name based on its format. + Note that the output of this function is not for handling actual IO, it's + only for setting up local directory folders by using package name as path. + + >>> parse_uri("https://test.com/file.zip") + (, 'https_test_com_file.zip') + + >>> parse_uri("https://test.com/file.whl") + (, 'file.whl') + + """ + if _is_path(pkg_uri): + raise ValueError(f"Expected URI but received path {pkg_uri}") + + uri = urlparse(pkg_uri) + try: + protocol = Protocol(uri.scheme) + except ValueError as e: + raise ValueError( + f'Invalid protocol for runtime_env URI "{pkg_uri}". ' + f"Supported protocols: {Protocol._member_names_}. Original error: {e}" + ) + + if protocol in Protocol.remote_protocols(): + if uri.path.endswith(".whl"): + # Don't modify the .whl filename. See + # https://peps.python.org/pep-0427/#file-name-convention + # for more information. + package_name = uri.path.split("/")[-1] + else: + package_name = f"{protocol.value}_{uri.netloc}{uri.path}" + + disallowed_chars = ["/", ":", "@", "+", " ", "(", ")"] + for disallowed_char in disallowed_chars: + package_name = package_name.replace(disallowed_char, "_") + + # Preserve compound extensions like .tar.gz before replacing dots. + compound_ext = None + if package_name.endswith(".tar.gz"): + compound_ext = ".tar.gz" + package_name = package_name[: -len(".tar.gz")] + elif package_name.endswith(".tar.bz2"): + compound_ext = ".tar.bz2" + package_name = package_name[: -len(".tar.bz2")] + + if compound_ext: + package_name = package_name.replace(".", "_") + package_name += compound_ext + else: + # Remove all periods except the last, which is part of the + # file extension. + package_name = package_name.replace( + ".", "_", package_name.count(".") - 1 + ) + else: + package_name = uri.netloc + return (protocol, package_name) diff --git a/python/ray/_common/tests/BUILD.bazel b/python/ray/_common/tests/BUILD.bazel index c5b3dcbded27..a08cefb92640 100644 --- a/python/ray/_common/tests/BUILD.bazel +++ b/python/ray/_common/tests/BUILD.bazel @@ -21,6 +21,7 @@ py_test_module_list( "test_network_utils.py", "test_ray_option_utils.py", "test_retry.py", + "test_runtime_env_uri.py", "test_signal_semaphore_utils.py", "test_signature.py", "test_tls_utils.py", diff --git a/python/ray/_common/tests/test_runtime_env_uri.py b/python/ray/_common/tests/test_runtime_env_uri.py new file mode 100644 index 000000000000..df39d5c71f2e --- /dev/null +++ b/python/ray/_common/tests/test_runtime_env_uri.py @@ -0,0 +1,182 @@ +import sys + +import pytest + +from ray._common.runtime_env_uri import Protocol, parse_uri + + +class TestParseUri: + @pytest.mark.parametrize( + "uri,protocol,package_name", + [ + ("gcs://file.zip", Protocol.GCS, "file.zip"), + ("s3://bucket/file.zip", Protocol.S3, "s3_bucket_file.zip"), + ("http://test.com/file.zip", Protocol.HTTP, "http_test_com_file.zip"), + ("https://test.com/file.zip", Protocol.HTTPS, "https_test_com_file.zip"), + ("gs://bucket/file.zip", Protocol.GS, "gs_bucket_file.zip"), + ("azure://container/file.zip", Protocol.AZURE, "azure_container_file.zip"), + ( + "abfss://container@account.dfs.core.windows.net/file.zip", + Protocol.ABFSS, + "abfss_container_account_dfs_core_windows_net_file.zip", + ), + ( + "https://test.com/package-0.0.1-py2.py3-none-any.whl?param=value", + Protocol.HTTPS, + "package-0.0.1-py2.py3-none-any.whl", + ), + ( + "http://test.com/package-0.0.1-py2.py3-none-any.whl?param=value", + Protocol.HTTP, + "package-0.0.1-py2.py3-none-any.whl", + ), + ], + ) + def test_parsing_remote_basic(self, uri, protocol, package_name): + assert parse_uri(uri) == (protocol, package_name) + + @pytest.mark.parametrize( + "uri,package_name", + [ + ( + "https://username:PAT@github.com/repo/archive/commit_hash.zip", + "https_username_PAT_github_com_repo_archive_commit_hash.zip", + ), + ( + ( + "https://un:pwd@gitlab.com/user/repo/-/" + "archive/commit_hash/repo-commit_hash.zip" + ), + ( + "https_un_pwd_gitlab_com_user_repo_-_" + "archive_commit_hash_repo-commit_hash.zip" + ), + ), + ], + ) + def test_parse_private_git_https_uris(self, uri, package_name): + assert parse_uri(uri) == (Protocol.HTTPS, package_name) + + @pytest.mark.parametrize( + "uri,protocol,package_name", + [ + ( + "https://username:PAT@github.com/repo/archive:2/commit_hash.zip", + Protocol.HTTPS, + "https_username_PAT_github_com_repo_archive_2_commit_hash.zip", + ), + ( + "gs://fake/2022-10-21T13:11:35+00:00/package.zip", + Protocol.GS, + "gs_fake_2022-10-21T13_11_35_00_00_package.zip", + ), + ( + "s3://fake/2022-10-21T13:11:35+00:00/package.zip", + Protocol.S3, + "s3_fake_2022-10-21T13_11_35_00_00_package.zip", + ), + ( + "azure://fake/2022-10-21T13:11:35+00:00/package.zip", + Protocol.AZURE, + "azure_fake_2022-10-21T13_11_35_00_00_package.zip", + ), + ( + ( + "abfss://container@account.dfs.core.windows.net/" + "2022-10-21T13:11:35+00:00/package.zip" + ), + Protocol.ABFSS, + ( + "abfss_container_account_dfs_core_windows_net_" + "2022-10-21T13_11_35_00_00_package.zip" + ), + ), + ( + "file:///fake/2022-10-21T13:11:35+00:00/package.zip", + Protocol.FILE, + "file__fake_2022-10-21T13_11_35_00_00_package.zip", + ), + ( + "file:///fake/2022-10-21T13:11:35+00:00/(package).zip", + Protocol.FILE, + "file__fake_2022-10-21T13_11_35_00_00__package_.zip", + ), + ], + ) + def test_parse_uris_with_disallowed_chars(self, uri, protocol, package_name): + assert parse_uri(uri) == (protocol, package_name) + + @pytest.mark.parametrize( + "uri,protocol,package_name", + [ + ( + "https://username:PAT@github.com/repo/archive:2/commit_hash.whl", + Protocol.HTTPS, + "commit_hash.whl", + ), + ( + "gs://fake/2022-10-21T13:11:35+00:00/package.whl", + Protocol.GS, + "package.whl", + ), + ( + "s3://fake/2022-10-21T13:11:35+00:00/package.whl", + Protocol.S3, + "package.whl", + ), + ( + "azure://fake/2022-10-21T13:11:35+00:00/package.whl", + Protocol.AZURE, + "package.whl", + ), + ( + ( + "abfss://container@account.dfs.core.windows.net/" + "2022-10-21T13:11:35+00:00/package.whl" + ), + Protocol.ABFSS, + "package.whl", + ), + ( + "file:///fake/2022-10-21T13:11:35+00:00/package.whl", + Protocol.FILE, + "package.whl", + ), + ], + ) + def test_parse_remote_whl_uris(self, uri, protocol, package_name): + assert parse_uri(uri) == (protocol, package_name) + + @pytest.mark.parametrize( + "gcs_uri", + ["gcs://pip_install_test-0.5-py3-none-any.whl", "gcs://storing@here.zip"], + ) + def test_parse_gcs_uri(self, gcs_uri): + """GCS URIs should not be modified in this function.""" + protocol, package_name = parse_uri(gcs_uri) + assert protocol == Protocol.GCS + assert package_name == gcs_uri.split("/")[-1] + + +def test_parse_uri_tar_gz(): + protocol, package_name = parse_uri("s3://bucket/archive.tar.gz") + assert package_name.endswith(".tar.gz") + assert protocol == Protocol.S3 + + protocol, package_name = parse_uri("https://example.com/path/my.pkg.tar.gz") + assert package_name.endswith(".tar.gz") + assert "_" in package_name + + +def test_parse_uri_rejects_local_path(): + with pytest.raises(ValueError, match="Expected URI but received path"): + parse_uri("/tmp/file.zip") + + +def test_parse_uri_rejects_invalid_protocol(): + with pytest.raises(ValueError, match="Invalid protocol for runtime_env URI"): + parse_uri("unknown://file.zip") + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", __file__])) diff --git a/python/ray/_private/runtime_env/conda.py b/python/ray/_private/runtime_env/conda.py index 7ca678902433..e071dade0531 100644 --- a/python/ray/_private/runtime_env/conda.py +++ b/python/ray/_private/runtime_env/conda.py @@ -13,6 +13,7 @@ from filelock import FileLock import ray +from ray._common.runtime_env_uri import parse_uri from ray._common.utils import ( get_or_create_event_loop, try_to_create_directory, @@ -25,8 +26,8 @@ get_conda_info_json, ) from ray._private.runtime_env.context import RuntimeEnvContext -from ray._private.runtime_env.packaging import Protocol, parse_uri from ray._private.runtime_env.plugin import RuntimeEnvPlugin +from ray._private.runtime_env.protocol import Protocol from ray._private.runtime_env.validation import parse_and_validate_conda from ray._private.utils import ( get_directory_size_bytes, diff --git a/python/ray/_private/runtime_env/packaging.py b/python/ray/_private/runtime_env/packaging.py index 310189aca59c..298140e83479 100644 --- a/python/ray/_private/runtime_env/packaging.py +++ b/python/ray/_private/runtime_env/packaging.py @@ -9,12 +9,11 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Callable, List, Optional, Tuple -from urllib.parse import urlparse from zipfile import ZipFile from filelock import FileLock -from ray._private.path_utils import is_path +from ray._common.runtime_env_uri import parse_uri as _parse_uri from ray._private.ray_constants import ( GRPC_CPP_MAX_MESSAGE_SIZE, RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_DEFAULT, @@ -246,70 +245,9 @@ def parse_path(pkg_path: str) -> None: raise ValueError(f"{path} is not a valid path.") -def parse_uri(pkg_uri: str) -> Tuple[Protocol, str]: - """ - Parse package uri into protocol and package name based on its format. - Note that the output of this function is not for handling actual IO, it's - only for setting up local directory folders by using package name as path. - - >>> parse_uri("https://test.com/file.zip") - (, 'https_test_com_file.zip') - - >>> parse_uri("https://test.com/file.whl") - (, 'file.whl') - - """ - if is_path(pkg_uri): - raise ValueError(f"Expected URI but received path {pkg_uri}") - - uri = urlparse(pkg_uri) - try: - protocol = Protocol(uri.scheme) - except ValueError as e: - raise ValueError( - f'Invalid protocol for runtime_env URI "{pkg_uri}". ' - f"Supported protocols: {Protocol._member_names_}. Original error: {e}" - ) - - if protocol in Protocol.remote_protocols(): - if uri.path.endswith(".whl"): - # Don't modify the .whl filename. See - # https://peps.python.org/pep-0427/#file-name-convention - # for more information. - package_name = uri.path.split("/")[-1] - else: - package_name = f"{protocol.value}_{uri.netloc}{uri.path}" - - disallowed_chars = ["/", ":", "@", "+", " ", "(", ")"] - for disallowed_char in disallowed_chars: - package_name = package_name.replace(disallowed_char, "_") - - # Preserve compound extensions like .tar.gz before replacing dots - compound_ext = None - if package_name.endswith(".tar.gz"): - compound_ext = ".tar.gz" - package_name = package_name[: -len(".tar.gz")] - elif package_name.endswith(".tar.bz2"): - compound_ext = ".tar.bz2" - package_name = package_name[: -len(".tar.bz2")] - - if compound_ext: - package_name = package_name.replace(".", "_") - package_name += compound_ext - else: - # Remove all periods except the last, which is part of the - # file extension - package_name = package_name.replace( - ".", "_", package_name.count(".") - 1 - ) - else: - package_name = uri.netloc - return (protocol, package_name) - - def is_zip_uri(uri: str) -> bool: try: - protocol, path = parse_uri(uri) + protocol, path = _parse_uri(uri) except ValueError: return False @@ -318,7 +256,7 @@ def is_zip_uri(uri: str) -> bool: def is_whl_uri(uri: str) -> bool: try: - _, path = parse_uri(uri) + _, path = _parse_uri(uri) except ValueError: return False @@ -327,7 +265,7 @@ def is_whl_uri(uri: str) -> bool: def is_jar_uri(uri: str) -> bool: try: - _, path = parse_uri(uri) + _, path = _parse_uri(uri) except ValueError: return False @@ -336,7 +274,7 @@ def is_jar_uri(uri: str) -> bool: def is_tar_gz_uri(uri: str) -> bool: try: - _, path = parse_uri(uri) + _, path = _parse_uri(uri) except ValueError: return False @@ -503,7 +441,7 @@ def _store_package_in_gcs( def _get_local_path(base_directory: str, pkg_uri: str) -> str: - _, pkg_name = parse_uri(pkg_uri) + _, pkg_name = _parse_uri(pkg_uri) return os.path.join(base_directory, pkg_name) @@ -569,7 +507,7 @@ def package_exists(pkg_uri: str) -> bool: Return: True for package existing and False for not. """ - protocol, pkg_name = parse_uri(pkg_uri) + protocol, pkg_name = _parse_uri(pkg_uri) if protocol == Protocol.GCS: return _internal_kv_exists(pkg_uri) else: @@ -693,7 +631,7 @@ def upload_package_to_gcs(pkg_uri: str, pkg_bytes: bytes) -> None: NotImplementedError: If the protocol of the URI is not supported. """ - protocol, pkg_name = parse_uri(pkg_uri) + protocol, pkg_name = _parse_uri(pkg_uri) if protocol == Protocol.GCS: _store_package_in_gcs(pkg_uri, pkg_bytes) elif protocol in Protocol.remote_protocols(): @@ -867,7 +805,7 @@ async def download_and_unpack_package( shutil.rmtree(local_dir) if download_package: - protocol, _ = parse_uri(pkg_uri) + protocol, _ = _parse_uri(pkg_uri) logger.info( f"Downloading package from {pkg_uri} to {pkg_file} " f"with protocol {protocol}" diff --git a/python/ray/_private/runtime_env/pip.py b/python/ray/_private/runtime_env/pip.py index 903675496cd8..fd682f42769d 100644 --- a/python/ray/_private/runtime_env/pip.py +++ b/python/ray/_private/runtime_env/pip.py @@ -8,10 +8,11 @@ from asyncio import create_task, get_running_loop from typing import Dict, List, Optional +from ray._common.runtime_env_uri import parse_uri from ray._common.utils import try_to_create_directory from ray._private.runtime_env import dependency_utils, virtualenv_utils -from ray._private.runtime_env.packaging import Protocol, parse_uri from ray._private.runtime_env.plugin import RuntimeEnvPlugin +from ray._private.runtime_env.protocol import Protocol from ray._private.runtime_env.utils import check_output_cmd from ray._private.utils import get_directory_size_bytes diff --git a/python/ray/_private/runtime_env/protocol.py b/python/ray/_private/runtime_env/protocol.py index a9f4546a20d0..bbea9f0aeb59 100644 --- a/python/ray/_private/runtime_env/protocol.py +++ b/python/ray/_private/runtime_env/protocol.py @@ -1,7 +1,8 @@ -import enum import os from urllib.parse import urlparse +from ray._common.runtime_env_uri import Protocol + RAY_RUNTIME_ENV_HTTP_USER_AGENT_ENV_VAR = "RAY_RUNTIME_ENV_HTTP_USER_AGENT" RAY_RUNTIME_ENV_BEARER_TOKEN_ENV_VAR = "RAY_RUNTIME_ENV_BEARER_TOKEN" _DEFAULT_HTTP_USER_AGENT = "ray-runtime-env-curl/1.0" @@ -289,25 +290,6 @@ def open_file(uri, mode, *, transport_params=None): fout.write(fin.read()) -Protocol = enum.Enum( - "Protocol", - {protocol.upper(): protocol for protocol in ProtocolsProvider.get_protocols()}, -) - - -@classmethod -def _remote_protocols(cls): - # Returns a list of protocols that support remote storage - # These protocols should only be used with paths that end in - # ".zip", ".whl", ".tar.gz", or ".tgz" - return [ - cls[protocol.upper()] for protocol in ProtocolsProvider.get_remote_protocols() - ] - - -Protocol.remote_protocols = _remote_protocols - - def _download_remote_uri(self, source_uri, dest_file): return ProtocolsProvider.download_remote_uri(self.value, source_uri, dest_file) diff --git a/python/ray/_private/runtime_env/py_modules.py b/python/ray/_private/runtime_env/py_modules.py index 65aa23c72b92..f15048c3c44c 100644 --- a/python/ray/_private/runtime_env/py_modules.py +++ b/python/ray/_private/runtime_env/py_modules.py @@ -4,10 +4,10 @@ from types import ModuleType from typing import Any, Dict, List, Optional +from ray._common.runtime_env_uri import parse_uri from ray._common.utils import try_to_create_directory from ray._private.runtime_env.context import RuntimeEnvContext from ray._private.runtime_env.packaging import ( - Protocol, delete_package, download_and_unpack_package, get_local_dir_from_uri, @@ -17,11 +17,11 @@ install_wheel_package, is_whl_uri, package_exists, - parse_uri, upload_package_if_needed, upload_package_to_gcs, ) from ray._private.runtime_env.plugin import RuntimeEnvPlugin +from ray._private.runtime_env.protocol import Protocol from ray._private.runtime_env.working_dir import set_pythonpath_in_context from ray._private.utils import get_directory_size_bytes from ray._raylet import GcsClient diff --git a/python/ray/_private/runtime_env/uv.py b/python/ray/_private/runtime_env/uv.py index faebcc84ab26..4e61e211fe2a 100644 --- a/python/ray/_private/runtime_env/uv.py +++ b/python/ray/_private/runtime_env/uv.py @@ -10,10 +10,11 @@ from asyncio import create_task, get_running_loop from typing import Dict, List, Optional +from ray._common.runtime_env_uri import parse_uri from ray._common.utils import try_to_create_directory from ray._private.runtime_env import dependency_utils, virtualenv_utils -from ray._private.runtime_env.packaging import Protocol, parse_uri from ray._private.runtime_env.plugin import RuntimeEnvPlugin +from ray._private.runtime_env.protocol import Protocol from ray._private.runtime_env.utils import check_output_cmd from ray._private.utils import get_directory_size_bytes diff --git a/python/ray/_private/runtime_env/validation.py b/python/ray/_private/runtime_env/validation.py index 02662755c19b..f7bbb41f3995 100644 --- a/python/ray/_private/runtime_env/validation.py +++ b/python/ray/_private/runtime_env/validation.py @@ -19,7 +19,8 @@ def validate_path(path: str) -> None: def validate_uri(uri: str): try: - from ray._private.runtime_env.packaging import Protocol, parse_uri + from ray._common.runtime_env_uri import parse_uri + from ray._private.runtime_env.protocol import Protocol protocol, path = parse_uri(uri) except ValueError: diff --git a/python/ray/_private/runtime_env/working_dir.py b/python/ray/_private/runtime_env/working_dir.py index dffb4f18505c..d9181bcd2369 100644 --- a/python/ray/_private/runtime_env/working_dir.py +++ b/python/ray/_private/runtime_env/working_dir.py @@ -5,20 +5,20 @@ from typing import Any, Callable, Dict, List, Optional import ray._private.ray_constants as ray_constants +from ray._common.runtime_env_uri import parse_uri from ray._common.utils import try_to_create_directory from ray._private.runtime_env.context import RuntimeEnvContext from ray._private.runtime_env.packaging import ( - Protocol, delete_package, download_and_unpack_package, get_local_dir_from_uri, get_uri_for_directory, get_uri_for_package, - parse_uri, upload_package_if_needed, upload_package_to_gcs, ) from ray._private.runtime_env.plugin import RuntimeEnvPlugin +from ray._private.runtime_env.protocol import Protocol from ray._private.utils import get_directory_size_bytes from ray._raylet import GcsClient from ray.exceptions import RuntimeEnvSetupError diff --git a/python/ray/dashboard/modules/job/common.py b/python/ray/dashboard/modules/job/common.py index 8cba7ea89687..5729fa95d7d9 100644 --- a/python/ray/dashboard/modules/job/common.py +++ b/python/ray/dashboard/modules/job/common.py @@ -7,13 +7,13 @@ from pathlib import Path from typing import Any, Dict, Optional, Tuple, Union +from ray._common.runtime_env_uri import parse_uri from ray._private import ray_constants from ray._private.event.export_event_logger import ( EventLogType, check_export_api_enabled, get_export_event_logger, ) -from ray._private.runtime_env.packaging import parse_uri from ray._raylet import RAY_INTERNAL_NAMESPACE_PREFIX, GcsClient from ray.core.generated.export_event_pb2 import ExportEvent from ray.core.generated.export_submission_job_event_pb2 import ( diff --git a/python/ray/serve/schema.py b/python/ray/serve/schema.py index 5d2489d89cbb..7623846aeae2 100644 --- a/python/ray/serve/schema.py +++ b/python/ray/serve/schema.py @@ -18,7 +18,7 @@ ) from ray._common.logging_constants import LOGRECORD_STANDARD_ATTRS -from ray._private.runtime_env.packaging import parse_uri +from ray._common.runtime_env_uri import parse_uri from ray.serve._private.common import ( DeploymentStatus, DeploymentStatusTrigger, diff --git a/python/ray/tests/test_runtime_env_packaging.py b/python/ray/tests/test_runtime_env_packaging.py index 232b83e4407b..67fad34037ac 100644 --- a/python/ray/tests/test_runtime_env_packaging.py +++ b/python/ray/tests/test_runtime_env_packaging.py @@ -17,6 +17,7 @@ import pytest import ray +from ray._common.runtime_env_uri import parse_uri from ray._private.ray_constants import ( KV_NAMESPACE_PACKAGE, ) @@ -39,7 +40,6 @@ is_tar_gz_uri, is_whl_uri, is_zip_uri, - parse_uri, remove_dir_from_filepaths, untar_package, unzip_package,