Skip to content
1 change: 1 addition & 0 deletions python/pyxet/pyxet/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def version():
"""
print(__version__)

@staticmethod
@cli.command()
def cp(source: Annotated[typing.List[str], typer.Argument(help="Source file or folder to copy")],
target: Annotated[str, typer.Argument(help="Target location of the file or folder")],
Expand Down
38 changes: 22 additions & 16 deletions python/pyxet/scripts/build_standalone_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,37 @@
# Will build wheel in release mode, then build standalone executable using the xet packaged with the CLI

if [[ ! -e pyproject.toml ]] ; then
echo "Run this script in the pyxet directory using ./scripts/$0"
>&2 echo "Run this script in the pyxet directory using ./scripts/$0"
exit 1
fi

source ./scripts/build_wheel.sh
>&2 wheel_location=$(./scripts/build_wheel.sh)

pip install target/wheels/pyxet-*.whl
>&2 pip install $wheel_location
>&2 pip install -r ./scripts/cli_requirements.txt

OS=$(uname -s)

xet_cli_path="./scripts/xet_standalone_entry.py"
>&2 echo "Path to xet entry script = '${xet_cli_path}'"

# Build binary
if [[ "$OS" == "Darwin" ]]; then
xet_cli_path="$(which xet)"
echo "Path to xet = '${xet_cli_path}'"
pyinstaller --onefile "$xet_cli_path" --target-arch universal2
if [[ ${_PYXET_BUILD_MODE} == "debug" ]] ; then
target_flag=
else
target_flag="--target-arch=universal2"
fi

>&2 pyinstaller --onefile "$xet_cli_path" --name xet $target_flag
cli_path="dist/xet"
elif [[ "$OS" == "Linux" ]] ; then
xet_cli_path="$(which xet)"
echo "Path to xet = '${xet_cli_path}'"
pyinstaller --onefile "$xet_cli_path"
>&2 pyinstaller --onefile "$xet_cli_path" --name xet
cli_path="dist/xet"
else
# Windows is weird. Have to go directly to the cli path

# Find the cli file, which isn't always where you want it to be.
xet_cli_path="./.venv_build/Lib/site-packages/pyxet/cli.py"
echo "Path to xet = '${xet_cli_path}'"
pyinstaller --onefile "$xet_cli_path"
mv dist/cli.exe dist/xet.exe
>&2 pyinstaller --onefile "$xet_cli_path" --name xet
cli_path="dist/xet.exe"
fi

>&2 echo "Standalone installer is located at ${cli_path}."
echo ${cli_path}
43 changes: 30 additions & 13 deletions python/pyxet/scripts/build_wheel.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Will build wheel in release mode.

if [[ ! -e pyproject.toml ]] ; then
echo "Run this script in the pyxet directory using ./scripts/$0"
>&2 echo "Run this script in the pyxet directory using ./scripts/$0"
exit 1
fi

Expand All @@ -13,22 +13,39 @@ OS=$(uname -s)
export MACOSX_DEPLOYMENT_TARGET=10.9
unset CONDA_PREFIX

# Adds in the install instructions.
>&2 source ./scripts/setup_env.sh

# Use a new build environment that links against the system python on OSX
# and always creates a new environment.
rm -rf .venv_build
source ./scripts/setup_env.sh
create_venv .venv_build release
source $(venv_activate_script .venv_build)

# Clear out any old wheels
mkdir -p target/old_wheels/
mv target/wheels/* target/old_wheels/ || echo ""

if [[ "$OS" == "Darwin" ]]; then
maturin build --profile=cli-release --target=universal2-apple-darwin
# If we're already in a virtual env, then don't worry about this.
if [[ -z $_PYXET_BUILD_VIRTUAL_ENV ]] ; then
>&2 rm -rf .venv_build
>&2 create_venv .venv_build release
>&2 source $(venv_activate_script .venv_build)
else
maturin build --profile=cli-release
>&2 source $(venv_activate_script ${_PYXET_BUILD_VIRTUAL_ENV})
fi

echo "Wheel is located at target/wheels/pyxet-*.whl"
# Clear out any old wheels
>&2 mkdir -p target/old_wheels/
>&2 mv target/wheels/* target/old_wheels/ || echo ""

# Mode
if [[ $_PYXET_BUILD_MODE == "debug" ]] ; then
flags=
else
flags="--profile=cli-release"

if [[ "$OS" == "Darwin" ]]; then
flags="$flags --target=universal2-apple-darwin"
fi
fi

>&2 maturin build $flags

wheel=$(ls ./target/wheels/pyxet-*.whl | head -n 1)
>&2 echo "Wheel is located at $wheel"
echo $wheel

3 changes: 3 additions & 0 deletions python/pyxet/scripts/cli_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# All of the requirements for installation that the fsspec implementation
# could be interested in.
s3fs
36 changes: 21 additions & 15 deletions python/pyxet/scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,33 @@ if [[ ! -e pyproject.toml ]] ; then
fi

source ./scripts/setup_env.sh
create_venv venv dev
create_venv venv dev # The dev part here installs the additional dev requirements
source $(venv_activate_script venv)

export _PYXET_BUILD_MODE=debug
export _PYXET_BUILD_VIRTUAL_ENV=venv

# Build the wheel.
wheel=$(./scripts/build_wheel.sh)

# Build the standalone cli and wheel
cli=$(./scripts/build_standalone_cli.sh)

# Install the wheel
pip install "$wheel"

if [[ -z "$VIRTUAL_ENV" ]] ; then
echo "Failed to activate virtual env."
exit 1
fi

# Make sure windows executable can run anywhere
work_dir=./.testing_tmp
rm -rf $work_dir || echo ""
mkdir -p $work_dir

# Clear out any old wheels
mkdir -p target/old_wheels/
mv target/wheels/* target/old_wheels/ || echo ""

echo "$(which pip)"

# Install
maturin build
pip install target/wheels/pyxet-*.whl

# TODO: This runs the tests in parallel using pytest-xdist
# Error: tests in cli can't be run simultaneously actually, as there are conflicts.
#pytest -n 12 --verbose tests/
pytest --verbose tests/
cp "$cli" "$work_dir"
cd "$work_dir"

export XET_STANDALONE_CLI="./$(basename ${cli})"
pytest -n 12 --verbose "../tests/"
21 changes: 12 additions & 9 deletions python/pyxet/scripts/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,17 @@ create_venv() {
[[ -e "./$venv_name" ]] || exit 1

source $(venv_activate_script $venv_name)

>&2 pip install --upgrade pip
if [[ $build_mode == "release" ]] ; then
# For building the wheel / standalone xet, use minimal installation
# environment; otherwise may pull in non-universal2 compatible package.
>&2 pip install -r scripts/build_requirements.txt
else
>&2 pip install -r scripts/dev_requirements.txt
fi
fi

# Make sure it's up to par.
>&2 pip install --upgrade pip
if [[ $build_mode == "release" ]] ; then
# For building the wheel / standalone xet, use minimal installation
# environment; otherwise may pull in non-universal2 compatible package.
>&2 pip install --upgrade -r scripts/build_requirements.txt
else
# Install both.
>&2 pip install --upgrade -r scripts/build_requirements.txt
>&2 pip install --upgrade -r scripts/dev_requirements.txt
fi
}
11 changes: 11 additions & 0 deletions python/pyxet/scripts/xet_standalone_entry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python

# Pull in everything from pyxet, without the relative imports.
from pyxet.cli import cli

# Package in s3 dependencies, otherwise these
# will just give other repo errors.
import s3fs

if __name__ == "__main__":
cli()
24 changes: 5 additions & 19 deletions python/pyxet/tests/arrow_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,18 @@
import pyxet
from utils import skip_if_no, CONSTANTS


@pytest.mark.skip("Not sure if pyxet will implement read_arrow - TODO")
def test_read_arrow():
dataset = pyxet.read_arrow(CONSTANTS.TITANIC_PARQUET)
assert dataset.to_table().num_rows == 891

# This just ends up being one test, as this repo is currently mdbv1, and the mdb
# v1 clone process is not safe between multiple processes.

@skip_if_no("pyarrow")
def test_pyarrow_dataset():
def test_pyarrow():
import pyarrow.dataset as ds
fs = pyxet.XetFS(repo_url=CONSTANTS.TITANIC_MAIN)
dataset = ds.dataset(CONSTANTS.TITANIC_PARQUET, filesystem=fs)
assert dataset.to_table().num_rows == 891


@skip_if_no("pyarrow")
def test_pyarrow_parquet():
# def test_pyarrow_parquet():
from pyarrow.parquet import ParquetFile
import pyarrow as pa

Expand All @@ -31,9 +26,7 @@ def test_pyarrow_parquet():
df = pa.Table.from_batches([first_ten_rows]).to_pandas()
assert df.shape == (10, 12)


@skip_if_no("pyarrow")
def test_pyarrow_stream():
# def test_pyarrow_stream():
import pandas as pd
from pyarrow.fs import PyFileSystem, FSSpecHandler

Expand All @@ -44,13 +37,6 @@ def test_pyarrow_stream():
assert df.shape == (891, 12)


@pytest.mark.skip("cp not implemented")
def test_pyarrow_stream_cp():
with pytest.raises(NotImplementedError): # TODO
pa_fs.copy_file(CONSTANTS.TITANIC_CSV,
'https://xethub.com/xdssio/titanic.git/main/titanic2.csv')


@pytest.mark.skip("Not sure if we need this - TODO")
def test_pyarrow_fsspec():
from pyarrow import fs
Expand Down
3 changes: 0 additions & 3 deletions python/pyxet/tests/cli_cp_new_url_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@

from pyxet.file_operations import perform_copy, build_cp_action_list


def delete_branch(repo, branch, *args):
try:
pyxet.BranchCLI.delete(repo, branch, *args)
except Exception as e:
print(f"WARNING: Exception trying to delete branch {branch} on {repo}: {e}")



def test_single_file_upload():
user, host = utils.test_account_login()
Expand Down
Loading