From 583c69336cb05424b936b2ac80da5b0798415e32 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Thu, 6 Jun 2024 15:24:45 -0700 Subject: [PATCH 01/10] Added standalone script for everything. --- python/pyxet/pyxet/cli.py | 1 + python/pyxet/scripts/build_standalone_cli.sh | 19 ++++++------------- python/pyxet/scripts/run_tests.sh | 1 - python/pyxet/scripts/xet_standalone_entry.py | 11 +++++++++++ 4 files changed, 18 insertions(+), 14 deletions(-) create mode 100755 python/pyxet/scripts/xet_standalone_entry.py diff --git a/python/pyxet/pyxet/cli.py b/python/pyxet/pyxet/cli.py index a1d832e..a45f4d5 100644 --- a/python/pyxet/pyxet/cli.py +++ b/python/pyxet/pyxet/cli.py @@ -115,6 +115,7 @@ def version(): """ print(__version__) + @staticmethod @cli.command() def cp(source: Annotated[typing.List[str], typer.Argument(help="Source file or folder to copy")], target: Annotated[str, typer.Argument(help="Target location of the file or folder")], diff --git a/python/pyxet/scripts/build_standalone_cli.sh b/python/pyxet/scripts/build_standalone_cli.sh index 088fbc6..d4b197f 100755 --- a/python/pyxet/scripts/build_standalone_cli.sh +++ b/python/pyxet/scripts/build_standalone_cli.sh @@ -14,21 +14,14 @@ pip install target/wheels/pyxet-*.whl OS=$(uname -s) +xet_cli_path="./scripts/xet_standalone_entry.py" +echo "Path to xet entry script = '${xet_cli_path}'" + # Build binary if [[ "$OS" == "Darwin" ]]; then - xet_cli_path="$(which xet)" - echo "Path to xet = '${xet_cli_path}'" - pyinstaller --onefile "$xet_cli_path" --target-arch universal2 + pyinstaller --onefile "$xet_cli_path" --name xet --target-arch universal2 elif [[ "$OS" == "Linux" ]] ; then - xet_cli_path="$(which xet)" - echo "Path to xet = '${xet_cli_path}'" - pyinstaller --onefile "$xet_cli_path" + pyinstaller --onefile "$xet_cli_path" --name xet else - # Windows is weird. Have to go directly to the cli path - - # Find the cli file, which isn't always where you want it to be. - xet_cli_path="./.venv_build/Lib/site-packages/pyxet/cli.py" - echo "Path to xet = '${xet_cli_path}'" - pyinstaller --onefile "$xet_cli_path" - mv dist/cli.exe dist/xet.exe + pyinstaller --onefile "$xet_cli_path" --name xet fi diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index 685640c..200e120 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -32,4 +32,3 @@ pip install target/wheels/pyxet-*.whl # Error: tests in cli can't be run simultaneously actually, as there are conflicts. #pytest -n 12 --verbose tests/ pytest --verbose tests/ - diff --git a/python/pyxet/scripts/xet_standalone_entry.py b/python/pyxet/scripts/xet_standalone_entry.py new file mode 100755 index 0000000..1de735f --- /dev/null +++ b/python/pyxet/scripts/xet_standalone_entry.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +# Pull in everything from pyxet, without the relative imports. +from pyxet.cli import cli + +# Package in s3 dependencies, otherwise these +# will just give other repo errors. +import s3fs + +if __name__ == "__main__": + cli() \ No newline at end of file From c7367ffcaad1059f22ab59baf55767b5add82239 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Thu, 6 Jun 2024 16:07:38 -0700 Subject: [PATCH 02/10] Added standalone test script. --- python/pyxet/tests/cli_cp_new_url_test.py | 3 - python/pyxet/tests/cli_standalone_cp_tests.py | 309 ++++++++++++++ python/pyxet/tests/test_standalone_cli.py | 385 ++++++++++++++++++ 3 files changed, 694 insertions(+), 3 deletions(-) create mode 100644 python/pyxet/tests/cli_standalone_cp_tests.py create mode 100644 python/pyxet/tests/test_standalone_cli.py diff --git a/python/pyxet/tests/cli_cp_new_url_test.py b/python/pyxet/tests/cli_cp_new_url_test.py index a5b5c32..a2dfd9f 100644 --- a/python/pyxet/tests/cli_cp_new_url_test.py +++ b/python/pyxet/tests/cli_cp_new_url_test.py @@ -7,14 +7,11 @@ from pyxet.file_operations import perform_copy, build_cp_action_list - def delete_branch(repo, branch, *args): try: pyxet.BranchCLI.delete(repo, branch, *args) except Exception as e: print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}") - - def test_single_file_upload(): user, host = utils.test_account_login() diff --git a/python/pyxet/tests/cli_standalone_cp_tests.py b/python/pyxet/tests/cli_standalone_cp_tests.py new file mode 100644 index 0000000..f80db57 --- /dev/null +++ b/python/pyxet/tests/cli_standalone_cp_tests.py @@ -0,0 +1,309 @@ +import os +import pytest +import pyxet +import utils +import shutil +import tempfile +import subprocess +import sys + +from pyxet.file_operations import perform_copy, build_cp_action_list + +# Set these once. +user, host = utils.test_account_login() +repo = utils.test_repo() + +# TODO: right syntax for this. +xet_cli_path=os.environ.get("XET_STANDALONE_CLI", None) + +if xet_cli_path is None: + print("Warning: XET_STANDALONE_CLI not set, skipping all tests.") + +def xet_url(branch = None, path = None): + ret = f"xet://{host}:{user}/{repo}" + if branch is not None: + ret += f"/{branch}" + if path is not None: + assert branch is not None + ret += f"/{path}" + return ret + +def run_xet(*args, cwd = None): + arg_text = ' '.join(f'"{arg}"' if arg.contains(' ') else arg for arg in args) + print(f"Running command: xet {arg_text}") + subprocess.check_output([xet_cli_path] + args, cwd = cwd) + +def xet_perform_copy(cwd, src, dest, message, is_recursive): + run_xet(["cp"] + (["--recursive"] if is_recursive else []) + [src, dest], cwd=cwd) + +def delete_branch(branch): + try: + run_xet("branch", "delete", xet_url(branch)) + except Exception as e: + print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}") + + +@pytest.mark.skipif(xet_cli_path is None) +def test_single_file_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a random file in a temp dir + dir = tempfile.mkdtemp() + local_file = f"{dir}/data" + utils.random_binary_file(local_file, 1024) + + # test variations of path + source_list = [ + f"{dir}/data", + ] + + dest_list = [ + # (dest in cp command, expected path of remote file) + (f"xet://{host}:{user}/{repo}/{b1}", [f"xet://{host}:{user}/{repo}/{b1}/data"]), + (f"xet://{host}:{user}/{repo}/{b1}/",[f"xet://{host}:{user}/{repo}/{b1}/data"]), + (f"xet://{host}:{user}/{repo}/{b1}/zz", [f"xet://{host}:{user}/{repo}/{b1}/zz"]), + ] + + recursive_list = [ + False, + True, + ] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + xet_perform_copy(src, dest[0], "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", dest[1]) + pyxet.PyxetCLI.rm(dest[1]) + finally: + shutil.rmtree(dir) + finally: + delete_branch(b1) + +@pytest.mark.skipif(xet_cli_path is None) +def test_multiple_files_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate random files in a temp dir + dir = tempfile.mkdtemp() + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + True, + ] + + expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + xet_perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) + run_xet("rm", *expected_files) + finally: + shutil.rmtree(dir) + finally: + delete_branch(b1) + +@pytest.mark.skipif(xet_cli_path is None) +def test_glob_nonrecursive_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + ] + + expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + xet_perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) + utils.assert_remote_files_not_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"]) + run_xet("rm", *expected_files) + finally: + shutil.rmtree(dir) + finally: + delete_branch(b1) + +@pytest.mark.skipif(xet_cli_path is None) +def test_glob_recursive_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + True, + ] + + expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] + expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + xet_perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) + run_xet("rm", *expected_files_level1) + finally: + shutil.rmtree(dir) + finally: + delete_branch(b1) + +@pytest.mark.skipif(xet_cli_path is None) +def test_directory_nonrecursive_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a random file in a temp dir + dir = tempfile.mkdtemp() + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + local_file = f"{dir}/{sub_dir}/data" + utils.random_binary_file(local_file, 1024) + + # test variations of path + source_list = [ + (f"{dir}/{sub_dir}/data", True), + (f"{dir}/{sub_dir}", False), + (f"{dir}/{sub_dir}/", False) + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + ] + + try: + for src, should_succeed in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + + if should_succeed: + xet_perform_copy(src, dest, "add data", r) + + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/data"]) + + else: + + # ignores instead of raising error + xet_perform_copy(src, dest, "add data", r) + + finally: + shutil.rmtree(dir) + finally: + delete_branch(b1) + +@pytest.mark.skipif(xet_cli_path is None) +def test_directory_recursive_upload(): + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + True, + ] + + expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] + expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + xet_perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) + run_xet("rm", *expected_files_level1) + run_xet("rm", *expected_files_level2) + finally: + shutil.rmtree(dir) + + finally: + delete_branch(b1) \ No newline at end of file diff --git a/python/pyxet/tests/test_standalone_cli.py b/python/pyxet/tests/test_standalone_cli.py new file mode 100644 index 0000000..a5b5c32 --- /dev/null +++ b/python/pyxet/tests/test_standalone_cli.py @@ -0,0 +1,385 @@ +import os +import pytest +import pyxet +import utils +import shutil +import tempfile + +from pyxet.file_operations import perform_copy, build_cp_action_list + + +def delete_branch(repo, branch, *args): + try: + pyxet.BranchCLI.delete(repo, branch, *args) + except Exception as e: + print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}") + + + +def test_single_file_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a random file in a temp dir + dir = tempfile.mkdtemp() + local_file = f"{dir}/data" + utils.random_binary_file(local_file, 1024) + + # test variations of path + source_list = [ + f"{dir}/data", + ] + + dest_list = [ + # (dest in cp command, expected path of remote file) + (f"xet://{host}:{user}/{repo}/{b1}", [f"xet://{host}:{user}/{repo}/{b1}/data"]), + (f"xet://{host}:{user}/{repo}/{b1}/",[f"xet://{host}:{user}/{repo}/{b1}/data"]), + (f"xet://{host}:{user}/{repo}/{b1}/zz", [f"xet://{host}:{user}/{repo}/{b1}/zz"]), + ] + + recursive_list = [ + False, + True, + ] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest[0]} {r}") + perform_copy(src, dest[0], "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", dest[1]) + pyxet.PyxetCLI.rm(dest[1]) + finally: + shutil.rmtree(dir) + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_multiple_files_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate random files in a temp dir + dir = tempfile.mkdtemp() + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + True, + ] + + expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) + pyxet.PyxetCLI.rm(expected_files) + finally: + shutil.rmtree(dir) + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_glob_nonrecursive_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + ] + + expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) + utils.assert_remote_files_not_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"]) + pyxet.PyxetCLI.rm(expected_files) + finally: + shutil.rmtree(dir) + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_glob_recursive_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + True, + ] + + expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] + expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) + pyxet.PyxetCLI.rm(expected_files_level1) + finally: + shutil.rmtree(dir) + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_directory_nonrecursive_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a random file in a temp dir + dir = tempfile.mkdtemp() + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + local_file = f"{dir}/{sub_dir}/data" + utils.random_binary_file(local_file, 1024) + + # test variations of path + source_list = [ + (f"{dir}/{sub_dir}/data", True), + (f"{dir}/{sub_dir}", False), + (f"{dir}/{sub_dir}/", False) + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + False, + ] + + try: + for src, should_succeed in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + + if should_succeed: + perform_copy(src, dest, "add data", r) + + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/data"]) + + else: + + # ignores instead of raising error + perform_copy(src, dest, "add data", r) + + finally: + shutil.rmtree(dir) + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_directory_recursive_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}/*", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + True, + ] + + expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] + expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) + pyxet.PyxetCLI.rm(expected_files_level1) + pyxet.PyxetCLI.rm(expected_files_level2) + finally: + shutil.rmtree(dir) + + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +# According to https://filesystem-spec.readthedocs.io/en/latest/copying.html#single-source-to-single-target +# section 1e, if the trailing slash is omitted from "source/subdir" then the subdir is also copied, +# not just its contents. +# +# NOTE: only use this behavior for the fsspec copy method in python, not the xet cp command line. +def _test_directory_recursive_noslash_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + try: + # generate a mix of random files and directories in a temp dir + dir = tempfile.mkdtemp() + dir_name = os.path.basename(dir) + + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [1024, 1024]) + + sub_dir = "subdir" + os.mkdir(f"{dir}/{sub_dir}") + utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) + + # test variations of path + source_list = [ + f"{dir}", + ] + + dest_list = [ + f"xet://{host}:{user}/{repo}/{b1}", + f"xet://{host}:{user}/{repo}/{b1}/", + ] + + recursive_list = [ + True, + ] + + expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}"] + expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/data0", f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/data1", f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}"] + expected_files_level3 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}/data"] + + try: + for src in source_list: + for dest in dest_list: + for r in recursive_list: + print(f"xet cp {src} {dest} {r}") + perform_copy(src, dest, "add data", r) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/*", expected_files_level2) + utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}/*", expected_files_level3) + pyxet.PyxetCLI.rm(expected_files_level1) + finally: + shutil.rmtree(dir) + + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_large_batch_upload(): + user, host = utils.test_account_login() + repo = utils.test_repo() + b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") + + try: + # generate a large batch of random files in a temp dir + dir = tempfile.mkdtemp() + + n_files = 1000 + local_files = list(map(lambda i: f"{dir}/data{i}", range(n_files))) + utils.random_binary_files(local_files, [1024] * n_files) + + try: + pyxet.commit_transaction.TRANSACTION_FILE_LIMIT = 100 + perform_copy(f"{dir}/", f"xet://{host}:{user}/{repo}/{b1}", "add data", True) + finally: + shutil.rmtree(dir) + + finally: + delete_branch(f"xet://{host}:{user}/{repo}", b1, True) + +def test_size_hint(): + user, host = utils.test_account_login() + repo = utils.test_repo() + + try: + # generate a large batch of random files in a temp dir + dir = tempfile.mkdtemp() + local_files = [f"{dir}/data0", f"{dir}/data1"] + utils.random_binary_files(local_files, [262, 471]) + + cplist = build_cp_action_list(f"{dir}/*", f"xet://{host}:{user}/{repo}/main") + assert len(cplist) == 2 + assert cplist[0].size == 262 + assert cplist[1].size == 471 + + finally: + shutil.rmtree(dir) \ No newline at end of file From 1887c4bc5bec8027bf00fa6fef146deddc455c74 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Thu, 6 Jun 2024 16:47:15 -0700 Subject: [PATCH 03/10] Added build dependencies and tests so that tests are run on standalone cli. --- python/pyxet/scripts/build_standalone_cli.sh | 21 +++++++---- python/pyxet/scripts/build_wheel.sh | 37 ++++++++++++++------ python/pyxet/scripts/cli_requirements.txt | 3 ++ python/pyxet/scripts/run_tests.sh | 32 ++++++++--------- python/pyxet/scripts/setup_env.sh | 21 ++++++----- python/pyxet/tests/test_standalone_cli.py | 2 -- 6 files changed, 71 insertions(+), 45 deletions(-) create mode 100644 python/pyxet/scripts/cli_requirements.txt diff --git a/python/pyxet/scripts/build_standalone_cli.sh b/python/pyxet/scripts/build_standalone_cli.sh index d4b197f..49fc25f 100755 --- a/python/pyxet/scripts/build_standalone_cli.sh +++ b/python/pyxet/scripts/build_standalone_cli.sh @@ -4,24 +4,31 @@ # Will build wheel in release mode, then build standalone executable using the xet packaged with the CLI if [[ ! -e pyproject.toml ]] ; then - echo "Run this script in the pyxet directory using ./scripts/$0" + >&2 echo "Run this script in the pyxet directory using ./scripts/$0" exit 1 fi -source ./scripts/build_wheel.sh +>&2 wheel_location=$(./scripts/build_wheel.sh) -pip install target/wheels/pyxet-*.whl +>&2 pip install $wheel_location +>&2 pip install -r ./scripts/cli_requirements.sh OS=$(uname -s) xet_cli_path="./scripts/xet_standalone_entry.py" -echo "Path to xet entry script = '${xet_cli_path}'" +>&2 echo "Path to xet entry script = '${xet_cli_path}'" # Build binary if [[ "$OS" == "Darwin" ]]; then - pyinstaller --onefile "$xet_cli_path" --name xet --target-arch universal2 + >&2 pyinstaller --onefile "$xet_cli_path" --name xet --target-arch universal2 + cli_path="$PWD/dist/xet" elif [[ "$OS" == "Linux" ]] ; then - pyinstaller --onefile "$xet_cli_path" --name xet + >&2 pyinstaller --onefile "$xet_cli_path" --name xet + cli_path="$PWD/dist/xet" else - pyinstaller --onefile "$xet_cli_path" --name xet + >&2 pyinstaller --onefile "$xet_cli_path" --name xet + cli_path="$PWD/dist/xet.exe" fi + +>&2 echo "Standalone installer is located at ${cli_path}." +echo ${cli_path} diff --git a/python/pyxet/scripts/build_wheel.sh b/python/pyxet/scripts/build_wheel.sh index 7a4f951..6c6ee8c 100644 --- a/python/pyxet/scripts/build_wheel.sh +++ b/python/pyxet/scripts/build_wheel.sh @@ -4,7 +4,7 @@ # Will build wheel in release mode. if [[ ! -e pyproject.toml ]] ; then - echo "Run this script in the pyxet directory using ./scripts/$0" + >&2 echo "Run this script in the pyxet directory using ./scripts/$0" exit 1 fi @@ -15,20 +15,35 @@ unset CONDA_PREFIX # Use a new build environment that links against the system python on OSX # and always creates a new environment. -rm -rf .venv_build -source ./scripts/setup_env.sh -create_venv .venv_build release -source $(venv_activate_script .venv_build) + +# If we're already in a virtual env, then don't worry about this. +if [[ -z $_PYXET_BUILD_VIRTUAL_ENV]] ; then + >&2 rm -rf .venv_build + >&2 source ./scripts/setup_env.sh + >&2 create_venv .venv_build release + >&2 source $(venv_activate_script .venv_build) +else + >&2 source $(venv_activate_script ${_PYXET_BUILD_VIRTUAL_ENV}) +fi # Clear out any old wheels -mkdir -p target/old_wheels/ -mv target/wheels/* target/old_wheels/ || echo "" +>&2 mkdir -p target/old_wheels/ +>&2 mv target/wheels/* target/old_wheels/ || echo "" + +# Mode +if [[ $_PYXET_BUILD_MODE == "debug" ]] ; then + flags="--debug" +else + flags="--profile=cli-release" +fi + if [[ "$OS" == "Darwin" ]]; then - maturin build --profile=cli-release --target=universal2-apple-darwin -else - maturin build --profile=cli-release + >&2 maturin build $flags --target=universal2-apple-darwin +else + >&2 maturin build $flags fi -echo "Wheel is located at target/wheels/pyxet-*.whl" +>&2 echo "Wheel is located at target/wheels/pyxet-*.whl" +echo "${PWD}/target/wheels/pyxet-*.whl" diff --git a/python/pyxet/scripts/cli_requirements.txt b/python/pyxet/scripts/cli_requirements.txt new file mode 100644 index 0000000..96c0840 --- /dev/null +++ b/python/pyxet/scripts/cli_requirements.txt @@ -0,0 +1,3 @@ +# All of the requirements for installation that the fsspec implementation +# could be interested in. +s3fs \ No newline at end of file diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index 200e120..bf52f01 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -9,26 +9,26 @@ if [[ ! -e pyproject.toml ]] ; then fi source ./scripts/setup_env.sh -create_venv venv dev +create_venv venv dev # The dev part here installs the additional dev requirements source $(venv_activate_script venv) -if [[ -z "$VIRTUAL_ENV" ]] ; then - echo "Failed to activate virtual env." - exit 1 -fi +export _PYXET_BUILD_MODE=debug +export _PYXET_BUILD_VIRTUAL_ENV=venv +# Build the wheel. +wheel=$(./scripts/build_wheel.sh) -# Clear out any old wheels -mkdir -p target/old_wheels/ -mv target/wheels/* target/old_wheels/ || echo "" +# Build the standalone cli and wheel +cli=$(./scripts/build_standalone_cli.sh) -echo "$(which pip)" +# Install the wheel +pip install "$wheel" -# Install -maturin build -pip install target/wheels/pyxet-*.whl +if [[ -z "$VIRTUAL_ENV" ]] ; then + echo "Failed to activate virtual env." + exit 1 +fi -# TODO: This runs the tests in parallel using pytest-xdist -# Error: tests in cli can't be run simultaneously actually, as there are conflicts. -#pytest -n 12 --verbose tests/ -pytest --verbose tests/ +# Set this so we can execute the +export XET_STANDALONE_CLI=${cli} +pytest -n 8 --verbose tests/ diff --git a/python/pyxet/scripts/setup_env.sh b/python/pyxet/scripts/setup_env.sh index 21671b2..a4292b9 100755 --- a/python/pyxet/scripts/setup_env.sh +++ b/python/pyxet/scripts/setup_env.sh @@ -36,14 +36,17 @@ create_venv() { [[ -e "./$venv_name" ]] || exit 1 source $(venv_activate_script $venv_name) - - >&2 pip install --upgrade pip - if [[ $build_mode == "release" ]] ; then - # For building the wheel / standalone xet, use minimal installation - # environment; otherwise may pull in non-universal2 compatible package. - >&2 pip install -r scripts/build_requirements.txt - else - >&2 pip install -r scripts/dev_requirements.txt - fi + fi + + # Make sure it's up to par. + >&2 pip install --upgrade pip + if [[ $build_mode == "release" ]] ; then + # For building the wheel / standalone xet, use minimal installation + # environment; otherwise may pull in non-universal2 compatible package. + >&2 pip install --upgrade -r scripts/build_requirements.txt + else + # Install both. + >&2 pip install --upgrade -r scripts/build_requirements.txt + >&2 pip install --upgrade -r scripts/dev_requirements.txt fi } \ No newline at end of file diff --git a/python/pyxet/tests/test_standalone_cli.py b/python/pyxet/tests/test_standalone_cli.py index a5b5c32..bfa3abc 100644 --- a/python/pyxet/tests/test_standalone_cli.py +++ b/python/pyxet/tests/test_standalone_cli.py @@ -14,8 +14,6 @@ def delete_branch(repo, branch, *args): except Exception as e: print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}") - - def test_single_file_upload(): user, host = utils.test_account_login() repo = utils.test_repo() From cf00e1b3706f914afab2cef6df045bcaa839d616 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 09:44:13 -1000 Subject: [PATCH 04/10] Update to cli stuff. --- python/pyxet/scripts/build_standalone_cli.sh | 10 ++++++-- python/pyxet/scripts/build_wheel.sh | 24 +++++++++++--------- python/pyxet/scripts/run_tests.sh | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) mode change 100644 => 100755 python/pyxet/scripts/build_wheel.sh diff --git a/python/pyxet/scripts/build_standalone_cli.sh b/python/pyxet/scripts/build_standalone_cli.sh index 49fc25f..5677f13 100755 --- a/python/pyxet/scripts/build_standalone_cli.sh +++ b/python/pyxet/scripts/build_standalone_cli.sh @@ -11,7 +11,7 @@ fi >&2 wheel_location=$(./scripts/build_wheel.sh) >&2 pip install $wheel_location ->&2 pip install -r ./scripts/cli_requirements.sh +>&2 pip install -r ./scripts/cli_requirements.txt OS=$(uname -s) @@ -20,7 +20,13 @@ xet_cli_path="./scripts/xet_standalone_entry.py" # Build binary if [[ "$OS" == "Darwin" ]]; then - >&2 pyinstaller --onefile "$xet_cli_path" --name xet --target-arch universal2 + if [[ ${_PYXET_BUILD_MODE} == "debug" ]] ; then + target_flag= + else + target_flag="--target-arch=universal2" + fi + + >&2 pyinstaller --onefile "$xet_cli_path" --name xet $target_flag cli_path="$PWD/dist/xet" elif [[ "$OS" == "Linux" ]] ; then >&2 pyinstaller --onefile "$xet_cli_path" --name xet diff --git a/python/pyxet/scripts/build_wheel.sh b/python/pyxet/scripts/build_wheel.sh old mode 100644 new mode 100755 index 6c6ee8c..c7ed696 --- a/python/pyxet/scripts/build_wheel.sh +++ b/python/pyxet/scripts/build_wheel.sh @@ -13,13 +13,15 @@ OS=$(uname -s) export MACOSX_DEPLOYMENT_TARGET=10.9 unset CONDA_PREFIX +# Adds in the install instructions. +>&2 source ./scripts/setup_env.sh + # Use a new build environment that links against the system python on OSX # and always creates a new environment. # If we're already in a virtual env, then don't worry about this. -if [[ -z $_PYXET_BUILD_VIRTUAL_ENV]] ; then +if [[ -z $_PYXET_BUILD_VIRTUAL_ENV ]] ; then >&2 rm -rf .venv_build - >&2 source ./scripts/setup_env.sh >&2 create_venv .venv_build release >&2 source $(venv_activate_script .venv_build) else @@ -32,18 +34,18 @@ fi # Mode if [[ $_PYXET_BUILD_MODE == "debug" ]] ; then - flags="--debug" + flags= else flags="--profile=cli-release" -fi - -if [[ "$OS" == "Darwin" ]]; then - >&2 maturin build $flags --target=universal2-apple-darwin -else - >&2 maturin build $flags + if [[ "$OS" == "Darwin" ]]; then + flags="$flags --target=universal2-apple-darwin" + fi fi ->&2 echo "Wheel is located at target/wheels/pyxet-*.whl" -echo "${PWD}/target/wheels/pyxet-*.whl" +>&2 maturin build $flags + +wheel=$PWD/$(ls target/wheels/pyxet-*.whl | head -n 1) +>&2 echo "Wheel is located at $wheel" +echo $wheel diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index bf52f01..00e67cd 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -31,4 +31,4 @@ fi # Set this so we can execute the export XET_STANDALONE_CLI=${cli} -pytest -n 8 --verbose tests/ +pytest -n 4 --verbose tests/ From c5fed8b9c3dd9ac0354c81a80b00f14967a590a4 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 10:07:28 -1000 Subject: [PATCH 05/10] Updated script run location. --- python/pyxet/scripts/run_tests.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index 00e67cd..0a77510 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -29,6 +29,14 @@ if [[ -z "$VIRTUAL_ENV" ]] ; then exit 1 fi -# Set this so we can execute the -export XET_STANDALONE_CLI=${cli} -pytest -n 4 --verbose tests/ +# Set this so we can execute the scripts properly +tests_dir=${PWD}/tests/ + +# Make sure windows executable can run anywhere +mkdir -p ./testing_tmp/ +work_dir="$(mktemp -d -p "${PWD}/testing_tmp")" + +cp "${cli}" "${work_dir}" +export XET_STANDALONE_CLI=${work_dir}/$(basename "$cli") +cd "${work_dir}" +pytest -n 4 --verbose "$tests_dir" From e7c4c8ad333d81713cb0c8c3c31be5e180512737 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 11:34:35 -1000 Subject: [PATCH 06/10] pytest back to sequential. --- python/pyxet/scripts/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index 0a77510..e5eb460 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -39,4 +39,4 @@ work_dir="$(mktemp -d -p "${PWD}/testing_tmp")" cp "${cli}" "${work_dir}" export XET_STANDALONE_CLI=${work_dir}/$(basename "$cli") cd "${work_dir}" -pytest -n 4 --verbose "$tests_dir" +pytest --verbose "$tests_dir" From eb2fa55d6075b278b8f2494a7c1eedfe127066cd Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 11:43:35 -1000 Subject: [PATCH 07/10] Update to run some tests in parallel. --- python/pyxet/scripts/run_tests.sh | 2 +- python/pyxet/tests/arrow_test.py | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index e5eb460..c6574f6 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -39,4 +39,4 @@ work_dir="$(mktemp -d -p "${PWD}/testing_tmp")" cp "${cli}" "${work_dir}" export XET_STANDALONE_CLI=${work_dir}/$(basename "$cli") cd "${work_dir}" -pytest --verbose "$tests_dir" +pytest -n 12 --verbose "$tests_dir" diff --git a/python/pyxet/tests/arrow_test.py b/python/pyxet/tests/arrow_test.py index 29e728a..6404859 100644 --- a/python/pyxet/tests/arrow_test.py +++ b/python/pyxet/tests/arrow_test.py @@ -5,23 +5,18 @@ import pyxet from utils import skip_if_no, CONSTANTS - -@pytest.mark.skip("Not sure if pyxet will implement read_arrow - TODO") -def test_read_arrow(): - dataset = pyxet.read_arrow(CONSTANTS.TITANIC_PARQUET) - assert dataset.to_table().num_rows == 891 - +# This just ends up being one test, as this repo is currently mdbv1, and the mdb +# v1 clone process is not safe between multiple processes. @skip_if_no("pyarrow") -def test_pyarrow_dataset(): +def test_pyarrow(): import pyarrow.dataset as ds fs = pyxet.XetFS(repo_url=CONSTANTS.TITANIC_MAIN) dataset = ds.dataset(CONSTANTS.TITANIC_PARQUET, filesystem=fs) assert dataset.to_table().num_rows == 891 -@skip_if_no("pyarrow") -def test_pyarrow_parquet(): +# def test_pyarrow_parquet(): from pyarrow.parquet import ParquetFile import pyarrow as pa @@ -31,9 +26,7 @@ def test_pyarrow_parquet(): df = pa.Table.from_batches([first_ten_rows]).to_pandas() assert df.shape == (10, 12) - -@skip_if_no("pyarrow") -def test_pyarrow_stream(): +# def test_pyarrow_stream(): import pandas as pd from pyarrow.fs import PyFileSystem, FSSpecHandler @@ -44,8 +37,7 @@ def test_pyarrow_stream(): assert df.shape == (891, 12) -@pytest.mark.skip("cp not implemented") -def test_pyarrow_stream_cp(): +# def test_pyarrow_stream_cp(): with pytest.raises(NotImplementedError): # TODO pa_fs.copy_file(CONSTANTS.TITANIC_CSV, 'https://xethub.com/xdssio/titanic.git/main/titanic2.csv') From 53c6b2bfc956a9a451c1bb986e39d73eca610e76 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 11:57:44 -1000 Subject: [PATCH 08/10] Fixed absolute paths issue on windows. --- python/pyxet/scripts/build_standalone_cli.sh | 6 +++--- python/pyxet/scripts/build_wheel.sh | 2 +- python/pyxet/scripts/run_tests.sh | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/python/pyxet/scripts/build_standalone_cli.sh b/python/pyxet/scripts/build_standalone_cli.sh index 5677f13..1fd7e8c 100755 --- a/python/pyxet/scripts/build_standalone_cli.sh +++ b/python/pyxet/scripts/build_standalone_cli.sh @@ -27,13 +27,13 @@ if [[ "$OS" == "Darwin" ]]; then fi >&2 pyinstaller --onefile "$xet_cli_path" --name xet $target_flag - cli_path="$PWD/dist/xet" + cli_path="dist/xet" elif [[ "$OS" == "Linux" ]] ; then >&2 pyinstaller --onefile "$xet_cli_path" --name xet - cli_path="$PWD/dist/xet" + cli_path="dist/xet" else >&2 pyinstaller --onefile "$xet_cli_path" --name xet - cli_path="$PWD/dist/xet.exe" + cli_path="dist/xet.exe" fi >&2 echo "Standalone installer is located at ${cli_path}." diff --git a/python/pyxet/scripts/build_wheel.sh b/python/pyxet/scripts/build_wheel.sh index c7ed696..53e7abc 100755 --- a/python/pyxet/scripts/build_wheel.sh +++ b/python/pyxet/scripts/build_wheel.sh @@ -45,7 +45,7 @@ fi >&2 maturin build $flags -wheel=$PWD/$(ls target/wheels/pyxet-*.whl | head -n 1) +wheel=$(ls ./target/wheels/pyxet-*.whl | head -n 1) >&2 echo "Wheel is located at $wheel" echo $wheel diff --git a/python/pyxet/scripts/run_tests.sh b/python/pyxet/scripts/run_tests.sh index e5eb460..b6ad94a 100755 --- a/python/pyxet/scripts/run_tests.sh +++ b/python/pyxet/scripts/run_tests.sh @@ -29,14 +29,13 @@ if [[ -z "$VIRTUAL_ENV" ]] ; then exit 1 fi -# Set this so we can execute the scripts properly -tests_dir=${PWD}/tests/ - # Make sure windows executable can run anywhere -mkdir -p ./testing_tmp/ -work_dir="$(mktemp -d -p "${PWD}/testing_tmp")" +work_dir=./.testing_tmp +rm -rf $work_dir || echo "" +mkdir -p $work_dir + +cp "$cli" "$work_dir" +cd "$work_dir" -cp "${cli}" "${work_dir}" -export XET_STANDALONE_CLI=${work_dir}/$(basename "$cli") -cd "${work_dir}" -pytest --verbose "$tests_dir" +export XET_STANDALONE_CLI="./$(basename ${cli})" +pytest --verbose "../tests/" From 63fe77ad9d35aa9496434a81faa9abbfc87c5e30 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 12:40:25 -1000 Subject: [PATCH 09/10] Dropped unsupported test. --- python/pyxet/tests/arrow_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/pyxet/tests/arrow_test.py b/python/pyxet/tests/arrow_test.py index 6404859..440eaf4 100644 --- a/python/pyxet/tests/arrow_test.py +++ b/python/pyxet/tests/arrow_test.py @@ -37,12 +37,6 @@ def test_pyarrow(): assert df.shape == (891, 12) -# def test_pyarrow_stream_cp(): - with pytest.raises(NotImplementedError): # TODO - pa_fs.copy_file(CONSTANTS.TITANIC_CSV, - 'https://xethub.com/xdssio/titanic.git/main/titanic2.csv') - - @pytest.mark.skip("Not sure if we need this - TODO") def test_pyarrow_fsspec(): from pyarrow import fs From 4d9cf665638e3d47e4f4f5c2651efb617293dfd3 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Fri, 7 Jun 2024 12:41:17 -1000 Subject: [PATCH 10/10] Removed duplicate file. --- python/pyxet/tests/test_standalone_cli.py | 383 ---------------------- 1 file changed, 383 deletions(-) delete mode 100644 python/pyxet/tests/test_standalone_cli.py diff --git a/python/pyxet/tests/test_standalone_cli.py b/python/pyxet/tests/test_standalone_cli.py deleted file mode 100644 index bfa3abc..0000000 --- a/python/pyxet/tests/test_standalone_cli.py +++ /dev/null @@ -1,383 +0,0 @@ -import os -import pytest -import pyxet -import utils -import shutil -import tempfile - -from pyxet.file_operations import perform_copy, build_cp_action_list - - -def delete_branch(repo, branch, *args): - try: - pyxet.BranchCLI.delete(repo, branch, *args) - except Exception as e: - print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}") - -def test_single_file_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate a random file in a temp dir - dir = tempfile.mkdtemp() - local_file = f"{dir}/data" - utils.random_binary_file(local_file, 1024) - - # test variations of path - source_list = [ - f"{dir}/data", - ] - - dest_list = [ - # (dest in cp command, expected path of remote file) - (f"xet://{host}:{user}/{repo}/{b1}", [f"xet://{host}:{user}/{repo}/{b1}/data"]), - (f"xet://{host}:{user}/{repo}/{b1}/",[f"xet://{host}:{user}/{repo}/{b1}/data"]), - (f"xet://{host}:{user}/{repo}/{b1}/zz", [f"xet://{host}:{user}/{repo}/{b1}/zz"]), - ] - - recursive_list = [ - False, - True, - ] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest[0]} {r}") - perform_copy(src, dest[0], "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", dest[1]) - pyxet.PyxetCLI.rm(dest[1]) - finally: - shutil.rmtree(dir) - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_multiple_files_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate random files in a temp dir - dir = tempfile.mkdtemp() - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [1024, 1024]) - - # test variations of path - source_list = [ - f"{dir}/*", - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - False, - True, - ] - - expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - perform_copy(src, dest, "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) - pyxet.PyxetCLI.rm(expected_files) - finally: - shutil.rmtree(dir) - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_glob_nonrecursive_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate a mix of random files and directories in a temp dir - dir = tempfile.mkdtemp() - - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [1024, 1024]) - - sub_dir = "subdir" - os.mkdir(f"{dir}/{sub_dir}") - utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) - - # test variations of path - source_list = [ - f"{dir}/*", - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - False, - ] - - expected_files = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1"] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - perform_copy(src, dest, "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files) - utils.assert_remote_files_not_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"]) - pyxet.PyxetCLI.rm(expected_files) - finally: - shutil.rmtree(dir) - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_glob_recursive_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate a mix of random files and directories in a temp dir - dir = tempfile.mkdtemp() - - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [1024, 1024]) - - sub_dir = "subdir" - os.mkdir(f"{dir}/{sub_dir}") - utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) - - # test variations of path - source_list = [ - f"{dir}/*", - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - True, - ] - - expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] - expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - perform_copy(src, dest, "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) - pyxet.PyxetCLI.rm(expected_files_level1) - finally: - shutil.rmtree(dir) - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_directory_nonrecursive_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate a random file in a temp dir - dir = tempfile.mkdtemp() - sub_dir = "subdir" - os.mkdir(f"{dir}/{sub_dir}") - local_file = f"{dir}/{sub_dir}/data" - utils.random_binary_file(local_file, 1024) - - # test variations of path - source_list = [ - (f"{dir}/{sub_dir}/data", True), - (f"{dir}/{sub_dir}", False), - (f"{dir}/{sub_dir}/", False) - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - False, - ] - - try: - for src, should_succeed in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - - if should_succeed: - perform_copy(src, dest, "add data", r) - - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", [f"xet://{host}:{user}/{repo}/{b1}/data"]) - - else: - - # ignores instead of raising error - perform_copy(src, dest, "add data", r) - - finally: - shutil.rmtree(dir) - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_directory_recursive_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - try: - # generate a mix of random files and directories in a temp dir - dir = tempfile.mkdtemp() - - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [1024, 1024]) - - sub_dir = "subdir" - os.mkdir(f"{dir}/{sub_dir}") - utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) - - # test variations of path - source_list = [ - f"{dir}/*", - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - True, - ] - - expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/data0", f"xet://{host}:{user}/{repo}/{b1}/data1", f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}"] - expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/data"] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - perform_copy(src, dest, "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{sub_dir}/*", expected_files_level2) - pyxet.PyxetCLI.rm(expected_files_level1) - pyxet.PyxetCLI.rm(expected_files_level2) - finally: - shutil.rmtree(dir) - - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -# According to https://filesystem-spec.readthedocs.io/en/latest/copying.html#single-source-to-single-target -# section 1e, if the trailing slash is omitted from "source/subdir" then the subdir is also copied, -# not just its contents. -# -# NOTE: only use this behavior for the fsspec copy method in python, not the xet cp command line. -def _test_directory_recursive_noslash_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - try: - # generate a mix of random files and directories in a temp dir - dir = tempfile.mkdtemp() - dir_name = os.path.basename(dir) - - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [1024, 1024]) - - sub_dir = "subdir" - os.mkdir(f"{dir}/{sub_dir}") - utils.random_binary_file(f"{dir}/{sub_dir}/data", 1024) - - # test variations of path - source_list = [ - f"{dir}", - ] - - dest_list = [ - f"xet://{host}:{user}/{repo}/{b1}", - f"xet://{host}:{user}/{repo}/{b1}/", - ] - - recursive_list = [ - True, - ] - - expected_files_level1 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}"] - expected_files_level2 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/data0", f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/data1", f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}"] - expected_files_level3 = [f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}/data"] - - try: - for src in source_list: - for dest in dest_list: - for r in recursive_list: - print(f"xet cp {src} {dest} {r}") - perform_copy(src, dest, "add data", r) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/*", expected_files_level1) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/*", expected_files_level2) - utils.assert_remote_files_exist(f"xet://{host}:{user}/{repo}/{b1}/{dir_name}/{sub_dir}/*", expected_files_level3) - pyxet.PyxetCLI.rm(expected_files_level1) - finally: - shutil.rmtree(dir) - - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_large_batch_upload(): - user, host = utils.test_account_login() - repo = utils.test_repo() - b1 = utils.new_random_branch_from(f"xet://{host}:{user}/{repo}", "main") - - try: - # generate a large batch of random files in a temp dir - dir = tempfile.mkdtemp() - - n_files = 1000 - local_files = list(map(lambda i: f"{dir}/data{i}", range(n_files))) - utils.random_binary_files(local_files, [1024] * n_files) - - try: - pyxet.commit_transaction.TRANSACTION_FILE_LIMIT = 100 - perform_copy(f"{dir}/", f"xet://{host}:{user}/{repo}/{b1}", "add data", True) - finally: - shutil.rmtree(dir) - - finally: - delete_branch(f"xet://{host}:{user}/{repo}", b1, True) - -def test_size_hint(): - user, host = utils.test_account_login() - repo = utils.test_repo() - - try: - # generate a large batch of random files in a temp dir - dir = tempfile.mkdtemp() - local_files = [f"{dir}/data0", f"{dir}/data1"] - utils.random_binary_files(local_files, [262, 471]) - - cplist = build_cp_action_list(f"{dir}/*", f"xet://{host}:{user}/{repo}/main") - assert len(cplist) == 2 - assert cplist[0].size == 262 - assert cplist[1].size == 471 - - finally: - shutil.rmtree(dir) \ No newline at end of file