diff --git a/.github/workflows/crates-publish.yml b/.github/workflows/crates-publish.yml new file mode 100644 index 0000000..db0261a --- /dev/null +++ b/.github/workflows/crates-publish.yml @@ -0,0 +1,37 @@ +name: Publish crate + +on: + release: + types: [published] + +permissions: + contents: read + id-token: write + +jobs: + publish-to-crates: + if: startsWith(github.ref_name, 'rs-v') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + + - name: Verify release tag matches Cargo.toml version + shell: bash + run: | + cargo_version=$(grep -m1 '^version = ' rust/Cargo.toml | sed -E 's/^version = "(.*)"/\1/') + tag="${GITHUB_REF_NAME#rs-v}" + echo "Cargo.toml version: $cargo_version" + echo "Release tag: $tag" + if [ "$cargo_version" != "$tag" ]; then + echo "::error::Release tag ($tag) does not match rust/Cargo.toml version ($cargo_version). Bump Cargo.toml or fix the tag." + exit 1 + fi + + - name: Authenticate to crates.io + uses: rust-lang/crates-io-auth-action@v1 + id: auth + + - name: Publish to crates.io + run: cargo publish --manifest-path rust/Cargo.toml + env: + CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..261d08b --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,29 @@ +name: Documentation +on: + push: + branches: + - main +permissions: + contents: read + pages: write + id-token: write +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - uses: actions/configure-pages@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: pip install zensical + - run: zensical build --clean + working-directory: python + - uses: actions/upload-pages-artifact@v4 + with: + path: python/site + - uses: actions/deploy-pages@v4 + id: deployment diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..a8dbb58 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,42 @@ +name: PR + +on: + pull_request: + +permissions: + contents: read + +jobs: + rust-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: cargo test + run: cargo test -p rusterize-rs + + python-tests: + runs-on: ubuntu-latest + defaults: + run: + working-directory: python + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Install GDAL system libs + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends gdal-bin libgdal-dev + + - name: Build extension + install test deps + run: | + pip install maturin + maturin build --out dist + pip install numpy setuptools wheel + pip install --no-build-isolation gdal==$(gdal-config --version) + pip install "$(ls dist/*.whl)[all]" pytest + + - name: pytest + run: pytest test/ diff --git a/.github/workflows/CI.yml b/.github/workflows/pypi-publish.yml similarity index 92% rename from .github/workflows/CI.yml rename to .github/workflows/pypi-publish.yml index 893b95d..ad15cd6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/pypi-publish.yml @@ -9,14 +9,15 @@ permissions: jobs: check-version: + if: startsWith(github.ref_name, 'py-v') runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - name: Verify release tag matches Cargo.toml version shell: bash run: | - cargo_version=$(grep -m1 '^version = ' Cargo.toml | sed -E 's/^version = "(.*)"/\1/') - tag="${GITHUB_REF_NAME#v}" + cargo_version=$(grep -m1 '^version = ' python/Cargo.toml | sed -E 's/^version = "(.*)"/\1/') + tag="${GITHUB_REF_NAME#py-v}" echo "Cargo.toml version: $cargo_version" echo "Release tag: $tag" if [ "$cargo_version" != "$tag" ]; then @@ -54,6 +55,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -63,7 +65,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-linux-${{ matrix.platform.target }} - path: dist + path: python/dist musllinux: needs: check-version @@ -93,6 +95,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -102,7 +105,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-musllinux-${{ matrix.platform.target }} - path: dist + path: python/dist windows: needs: check-version @@ -129,6 +132,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -137,7 +141,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-windows-${{ matrix.platform.target }} - path: dist + path: python/dist macos: needs: check-version @@ -165,6 +169,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -173,7 +178,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-macos-${{ matrix.platform.target }} - path: dist + path: python/dist sdist: needs: check-version @@ -183,13 +188,14 @@ jobs: - name: Build sdist uses: PyO3/maturin-action@v1 with: + working-directory: python command: sdist args: --out dist - name: Upload sdist uses: actions/upload-artifact@v5 with: name: wheels-sdist - path: dist + path: python/dist publish-to-pypi: name: Publish to PyPI diff --git a/.github/workflows/test-ci.yml b/.github/workflows/testpypi-publish.yml similarity index 93% rename from .github/workflows/test-ci.yml rename to .github/workflows/testpypi-publish.yml index ed23131..d769d4d 100644 --- a/.github/workflows/test-ci.yml +++ b/.github/workflows/testpypi-publish.yml @@ -24,7 +24,7 @@ jobs: - name: Verify input version matches Cargo.toml version shell: bash run: | - cargo_version=$(grep -m1 '^version = ' Cargo.toml | sed -E 's/^version = "(.*)"/\1/') + cargo_version=$(grep -m1 '^version = ' python/Cargo.toml | sed -E 's/^version = "(.*)"/\1/') tag="${{ inputs.version }}" tag="${tag#v}" echo "Cargo.toml version: $cargo_version" @@ -64,6 +64,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -73,7 +74,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-linux-${{ matrix.platform.target }} - path: dist + path: python/dist musllinux: needs: check-version @@ -103,6 +104,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -112,7 +114,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-musllinux-${{ matrix.platform.target }} - path: dist + path: python/dist windows: needs: check-version @@ -139,6 +141,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -147,7 +150,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-windows-${{ matrix.platform.target }} - path: dist + path: python/dist macos: needs: check-version @@ -175,6 +178,7 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: + working-directory: python target: ${{ matrix.platform.target }} args: --profile dist-release --out dist --find-interpreter sccache: "true" @@ -183,7 +187,7 @@ jobs: uses: actions/upload-artifact@v5 with: name: wheels-macos-${{ matrix.platform.target }} - path: dist + path: python/dist sdist: needs: check-version @@ -193,13 +197,14 @@ jobs: - name: Build sdist uses: PyO3/maturin-action@v1 with: + working-directory: python command: sdist args: --out dist - name: Upload sdist uses: actions/upload-artifact@v5 with: name: wheels-sdist - path: dist + path: python/dist publish-to-testpypi: name: Publish to TestPyPI diff --git a/.gitignore b/.gitignore index 55c6e8a..1fb3150 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ # Generated by Cargo -# will have compiled files and executables debug/ target/ dist/ @@ -8,9 +7,6 @@ dist_base/ # These are backup files generated by rustfmt **/*.rs.bk -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - # environment .venv .env @@ -23,7 +19,7 @@ uv.lock # Pycache **/__pycache__ -.pytest_cache +**/.pytest_cache # Benchmarks .benchmarks @@ -31,14 +27,16 @@ uv.lock # Other stuff README_files .ruff_cache -benchmarks/data +python/benchmarks/data .git **/*.aux.xml +**/*.cache # Temporary maturin development *.pyd -# Jupyter Notebooks -.ipynb_checkpoints -*/.ipynb_checkpoints/* -dev_notebooks/ +# Zensical +python/site + +# Profiling +rust/examples diff --git a/Cargo.lock b/Cargo.lock index 1194a6a..bc7c519 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -38,12 +32,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anyhow" -version = "1.0.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" - [[package]] name = "approx" version = "0.5.1" @@ -62,6 +50,15 @@ dependencies = [ "object", ] +[[package]] +name = "arc-swap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c049c0be4daef0b145cb3555416b3b8ef5b7888a38aea1a3a155801fe7b0810b" +dependencies = [ + "rustversion", +] + [[package]] name = "argminmax" version = "0.6.3" @@ -86,9 +83,15 @@ checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] name = "arrayvec" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +checksum = "f02882884d3e1bc524fb12c79f107f6ad0e1cfd498c536ffb494301740995dfe" + +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] name = "async-channel" @@ -254,9 +257,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +checksum = "8ae3f5d315924270530207e2a68396c3cc547f6dca3fbdca317cfb1a51edb593" dependencies = [ "serde", ] @@ -272,9 +275,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.64" +version = "1.2.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" +checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96" dependencies = [ "find-msvc-tools", "jobserver", @@ -296,9 +299,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chacha20" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +checksum = "d524456ba66e72eb8b115ff89e01e497f8e6d11d78b70b1aa13c0fbd97540a81" dependencies = [ "cfg-if", "cpufeatures 0.3.0", @@ -307,15 +310,14 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ - "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-link 0.1.3", + "windows-link 0.2.1", ] [[package]] @@ -329,15 +331,10 @@ dependencies = [ ] [[package]] -name = "comfy-table" -version = "7.2.2" +name = "chunked_transfer" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" -dependencies = [ - "crossterm", - "unicode-segmentation", - "unicode-width", -] +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" [[package]] name = "compact_str" @@ -455,29 +452,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crossterm" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" -dependencies = [ - "bitflags", - "crossterm_winapi", - "document-features", - "parking_lot", - "rustix", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" -dependencies = [ - "winapi", -] - [[package]] name = "crunchy" version = "0.2.4" @@ -510,6 +484,27 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "displaydoc" version = "0.2.6" @@ -521,15 +516,6 @@ dependencies = [ "syn", ] -[[package]] -name = "document-features" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" -dependencies = [ - "litrs", -] - [[package]] name = "dyn-clone" version = "1.0.20" @@ -537,12 +523,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] -name = "earcutr" -version = "0.4.3" +name = "earcut" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +checksum = "88459a2a8e3a514b6e6de38cf3aaa9250a894cb098f74a932db77fcc8341b6d0" dependencies = [ - "itertools 0.11.0", "num-traits", ] @@ -552,6 +537,12 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "equivalent" version = "1.0.2" @@ -607,6 +598,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -631,9 +628,9 @@ dependencies = [ [[package]] name = "float_next_after" -version = "1.0.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" +checksum = "37007738a80ea34f969af54a3390dd72cacdef654974cfd449c9f6f72dbaac10" [[package]] name = "fnv" @@ -772,19 +769,22 @@ dependencies = [ [[package]] name = "geo" -version = "0.30.0" +version = "0.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4416397671d8997e9a3e7ad99714f4f00a22e9eaa9b966a5985d2194fc9e02e1" +checksum = "30eb1fdc57c1e5cfd11826fe0caec4b9dc7901f3758263bb506228d88c8d9e9a" dependencies = [ - "earcutr", + "earcut", "float_next_after", "geo-types", "geographiclib-rs", "i_overlay", "log", "num-traits", + "rand 0.10.1", + "rand_pcg", "robust", "rstar", + "sif-itree", "spade", ] @@ -808,6 +808,7 @@ dependencies = [ "rayon", "rstar", "serde", + "spade", ] [[package]] @@ -848,16 +849,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +checksum = "300e883d756b2e4ec94e02791f39b04b522276138852cfc41d9fb7e904106099" dependencies = [ "cfg-if", "libc", "r-efi 6.0.0", "rand_core 0.10.1", - "wasip2", - "wasip3", ] [[package]] @@ -939,6 +938,16 @@ version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "byteorder", + "num-traits", +] + [[package]] name = "heapless" version = "0.8.0" @@ -955,6 +964,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -970,6 +985,56 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "hotpath" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773a11e94e48f8a6418064c0665a8428f587981bb030b5684cf8ef1a0c3431e7" +dependencies = [ + "arc-swap", + "cfg-if", + "crossbeam-channel", + "futures-util", + "hdrhistogram", + "hotpath-macros", + "hotpath-meta", + "libc", + "pin-project-lite", + "prettytable-rs", + "quanta", + "regex", + "serde", + "serde_json", + "tiny_http", + "tokio", +] + +[[package]] +name = "hotpath-macros" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c222c9825343c64a3c7c5b6f2665a9076b760a352e5ca6b7fb4baf01c3824746" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "hotpath-macros-meta" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be74f4eb7a0eed70232e4f0863de4dfbdcba2c030c8bf6c8115a02771773578" + +[[package]] +name = "hotpath-meta" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19023f9af4e3864a31bf702c60e8622f420854966f559b74f58d7d9bf411d228" +dependencies = [ + "hotpath-macros-meta", +] + [[package]] name = "http" version = "1.4.2" @@ -1009,6 +1074,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humantime" version = "2.3.0" @@ -1077,24 +1148,27 @@ dependencies = [ [[package]] name = "i_float" -version = "1.7.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85df3a416829bb955fdc2416c7b73680c8dcea8d731f2c7aa23e1042fe1b8343" +checksum = "813145bb0ad5b60f55cbbf3c74cdceda1c0a9d253b35c4cc36ae0df7887cb78f" dependencies = [ - "serde", + "libm", ] [[package]] name = "i_key_sort" -version = "0.2.0" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd" +checksum = "d73d122b937fca067feb0ad74f62388920272b27c356d4df2d0cfdd59e044cf0" +dependencies = [ + "rayon", +] [[package]] name = "i_overlay" -version = "2.0.5" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0542dfef184afdd42174a03dcc0625b6147fb73e1b974b1a08a2a42ac35cee49" +checksum = "8dd314b4668e2b3a12508f2e125558c82a6c0a8636fa5107a900f79ce414e450" dependencies = [ "i_float", "i_key_sort", @@ -1105,19 +1179,18 @@ dependencies = [ [[package]] name = "i_shape" -version = "1.7.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a38f5a42678726718ff924f6d4a0e79b129776aeed298f71de4ceedbd091bce" +checksum = "bfa9eac533d7509a8ab87672b60ac610c17240f9ea4851d26227689fdfe349c8" dependencies = [ "i_float", - "serde", ] [[package]] name = "i_tree" -version = "0.8.3" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139" +checksum = "4804bdc1dc124eb7e1aa9e144ecc04096bcf787a10a15fa44af682b51f0f6cce" [[package]] name = "iana-time-zone" @@ -1131,7 +1204,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.62.2", ] [[package]] @@ -1225,12 +1298,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - [[package]] name = "idna" version = "1.1.0" @@ -1264,15 +1331,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "ipnet" version = "2.12.0" @@ -1280,12 +1338,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] -name = "itertools" -version = "0.11.0" +name = "is-terminal" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ - "either", + "hermit-abi", + "libc", + "windows-sys 0.61.2", ] [[package]] @@ -1315,9 +1375,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.102" +version = "0.3.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03d04c30968dffe80775bd4d7fb676131cd04a1fb46d2686dbffbaec2d9dfd31" +checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102" dependencies = [ "cfg-if", "futures-util", @@ -1325,10 +1385,10 @@ dependencies = [ ] [[package]] -name = "leb128fmt" -version = "0.1.0" +name = "lazy_static" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" @@ -1351,6 +1411,15 @@ dependencies = [ "cc", ] +[[package]] +name = "libredox" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02ab6bace2054fb888a3c16f990117b579d14a3088e472d63c6011fa185c9d3" +dependencies = [ + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -1363,12 +1432,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" -[[package]] -name = "litrs" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" - [[package]] name = "lock_api" version = "0.4.14" @@ -1380,9 +1443,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.32" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" +checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad" [[package]] name = "lru-slab" @@ -1427,22 +1490,13 @@ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" [[package]] name = "memmap2" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +checksum = "d1219ed1b7f229ee7104d281dd01d6802fe28bb6e95d292942c4daacdeb798c0" dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mimalloc" version = "0.1.52" @@ -1498,6 +1552,15 @@ dependencies = [ "chrono", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -1561,9 +1624,9 @@ dependencies = [ [[package]] name = "numpy" -version = "0.27.1" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aac2e6a6e4468ffa092ad43c39b81c79196c2bb773b8db4085f695efe3bba17" +checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" dependencies = [ "libc", "ndarray", @@ -1575,6 +1638,25 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.37.3" @@ -1602,7 +1684,7 @@ dependencies = [ "http-body-util", "humantime", "hyper", - "itertools 0.14.0", + "itertools", "parking_lot", "percent-encoding", "quick-xml", @@ -1710,9 +1792,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899852b723e563dc3cbdc7ea833b14ec44e61309f55df29ba86d45cfd6bc141a" +checksum = "82f1f122456ec136102033b13f71905b7c3f01e526642679c86aace9f9cdefde" dependencies = [ "getrandom 0.2.17", "getrandom 0.3.4", @@ -1725,7 +1807,6 @@ dependencies = [ "polars-io", "polars-lazy", "polars-ops", - "polars-parquet", "polars-sql", "polars-time", "polars-utils", @@ -1734,9 +1815,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f672743a042b72ace4f88b29f8205ab200b29c5ac976c0560899680c07d2d09" +checksum = "87d4892d5cc6461bb4a184d18e6fa03a5d316ee1d6de06a33dfa08d479fbc2db" dependencies = [ "atoi_simd", "bitflags", @@ -1777,23 +1858,44 @@ dependencies = [ "serde", ] +[[package]] +name = "polars-async" +version = "0.54.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e87f836190486f500b28347436985cc0af29b7a514e53f98840d396ce4d5f5" +dependencies = [ + "atomic-waker", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "parking_lot", + "pin-project-lite", + "polars-config", + "polars-error", + "polars-utils", + "rand 0.9.4", + "slotmap", + "tokio", +] + [[package]] name = "polars-buffer" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d7011424c3a79ca9c1272c7b4f5fe98695d3bed45595e37bb23c16a2978c80c" +checksum = "e481eeaf33c544ac0dd71a2e375553ca2fdae47b3472a96eaccb6eb43218783d" dependencies = [ "bytemuck", "either", + "polars-utils", "serde", "version_check", ] [[package]] name = "polars-compute" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a32eca8e08ac4cc5de2ac3996d2b38567bba72cdb19bbfd94c370193ed51dd" +checksum = "c55d41642a9ee887ac394c5a310af3256fa8340a86cde2cb624c515aa963461c" dependencies = [ "atoi_simd", "bytemuck", @@ -1816,18 +1918,27 @@ dependencies = [ "zmij", ] +[[package]] +name = "polars-config" +version = "0.54.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65af861341b00eac73bcb65423fb5cc3d2322526d6b7561a0ddf094947c38033" +dependencies = [ + "polars-error", + "serde", +] + [[package]] name = "polars-core" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "726296966d04268ee9679c2062af2d06c83c7a87379be471defe616b244c5029" +checksum = "3e5924fc46306054bae78f9d35ea5e404cf185baa7f170eb55a16ff95191069c" dependencies = [ "bitflags", "boxcar", "bytemuck", "chrono", "chrono-tz", - "comfy-table", "either", "getrandom 0.3.4", "hashbrown 0.16.1", @@ -1835,8 +1946,10 @@ dependencies = [ "itoa", "num-traits", "polars-arrow", + "polars-async", "polars-buffer", "polars-compute", + "polars-config", "polars-dtype", "polars-error", "polars-row", @@ -1849,6 +1962,7 @@ dependencies = [ "serde", "serde_json", "strum_macros", + "tokio", "uuid", "version_check", "xxhash-rust", @@ -1856,9 +1970,9 @@ dependencies = [ [[package]] name = "polars-dtype" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51976dc46d42cd1e7ca252a9e3bdc90c63b0bfa7030047ebaf5250c2b7838fa6" +checksum = "7b65a750bb99ea66be90c8a7e336f6f3a87427a0f7f89d2a40adae98314e9b27" dependencies = [ "boxcar", "hashbrown 0.16.1", @@ -1871,9 +1985,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c13126f8baebc13dadf26a80dcf69a607977fc8a67b18671ad2cefc713a7bdd" +checksum = "e49a75e3406b9b5b4e5ff177877fe0de766e9688fbdb263a7b25f293dc47d61a" dependencies = [ "object_store", "parking_lot", @@ -1886,9 +2000,9 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2151f54b0ae5d6b86c3c47df0898ff90edfe774807823f742f36e44973d51ea1" +checksum = "e21fdd37e8d9ef109f13d3454baffa0a57041cf60069123b8a2bd846c8ad0205" dependencies = [ "bitflags", "hashbrown 0.16.1", @@ -1912,9 +2026,9 @@ dependencies = [ [[package]] name = "polars-ffi" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9526b18335cfddc556eb5c34cdecba3ecf49bba7734470a82728569d44e72a0" +checksum = "7fddc8eb96794c42758233f017cfe1cedb3ab24296583171a94d6f452f0ef1f6" dependencies = [ "polars-arrow", "polars-core", @@ -1922,9 +2036,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "059724d7762d7332cbc225e6504d996091b28fa1337716e06e5a81d9e54a34ad" +checksum = "6363a1c44a65fe8d73cce7fe4d77c9b6fea3a0da44007012e755e5b4e65aa078" dependencies = [ "async-trait", "atoi_simd", @@ -1932,6 +2046,7 @@ dependencies = [ "bytes", "chrono", "fast-float2", + "fastrand", "fs4", "futures", "glob", @@ -1942,16 +2057,19 @@ dependencies = [ "memmap2", "num-traits", "object_store", + "parking_lot", "percent-encoding", "polars-arrow", "polars-buffer", "polars-compute", + "polars-config", "polars-core", "polars-error", "polars-parquet", "polars-schema", "polars-time", "polars-utils", + "rand 0.9.4", "rayon", "regex", "reqwest", @@ -1964,9 +2082,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e1e24d4db8c349e9576564cfff47a3f08bb831dba9168f6599be178bc725e8" +checksum = "809d9590232a37d638337629c18279af97bdb0d17c3d8b2b6bb186e903e8bd5e" dependencies = [ "bitflags", "chrono", @@ -1975,6 +2093,7 @@ dependencies = [ "polars-arrow", "polars-buffer", "polars-compute", + "polars-config", "polars-core", "polars-expr", "polars-io", @@ -1990,9 +2109,9 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c394e4cd90186043d4051ce118e90794afbe81ac5eb9a51e358a56728e8ebde3" +checksum = "f55c6b7d162c506bc8eee82b065fa0399ebcd20b8f08675a534f3d360904ba38" dependencies = [ "memmap2", "polars-arrow", @@ -2008,11 +2127,29 @@ dependencies = [ "recursive", ] +[[package]] +name = "polars-ooc" +version = "0.54.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3eea0b386837b760a97ec9c92df99cbc10f94885cae060fd7100f9b794163" +dependencies = [ + "async-trait", + "boxcar", + "libc", + "polars-async", + "polars-config", + "polars-core", + "polars-io", + "polars-utils", + "thread_local", + "tokio", +] + [[package]] name = "polars-ops" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e47b2d9b3627662650da0a8c76ce5101ed1c61b104cb2b3663e0dc711571b12" +checksum = "cb146490a717ac5ae4ff3a22a5adf3ebae79361f187b1f550f9e24783d7ad765" dependencies = [ "argminmax", "base64", @@ -2044,9 +2181,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436bae3e89438cafe69400e7567057d7d9820d21ac9a4f69a33b413f2666f03d" +checksum = "fd6b79ba2103c00cbb9c5dd4459ffff1d8ce15286c7a6d376a04c711df20d8b7" dependencies = [ "async-stream", "base64", @@ -2058,6 +2195,7 @@ dependencies = [ "polars-arrow", "polars-buffer", "polars-compute", + "polars-config", "polars-error", "polars-parquet-format", "polars-utils", @@ -2079,9 +2217,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7930d5ae1d006179e65f01af57c859307b5875a4cc078dc75257250b9ae5162" +checksum = "2f5ccc230515adb10762a8c7b0df03fd88f3328deb5b60e9b1eeb2eceef4d344" dependencies = [ "bitflags", "blake3", @@ -2092,12 +2230,14 @@ dependencies = [ "either", "futures", "hashbrown 0.16.1", + "indexmap", "memmap2", "num-traits", "percent-encoding", "polars-arrow", "polars-buffer", "polars-compute", + "polars-config", "polars-core", "polars-error", "polars-io", @@ -2116,9 +2256,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ea1a4554fe06442db1d6229235cd358e8eacba96aed8718f612caf3e3a646" +checksum = "3d4e3254450024078e10c919ecd3b467bdcfdd5cf386c2ca6eedec89bd4771d2" dependencies = [ "bitflags", "bytemuck", @@ -2132,9 +2272,9 @@ dependencies = [ [[package]] name = "polars-schema" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d688e73f9156f93cb29350be144c8f1e84c1bc705f00ee7f15eb9706a7971273" +checksum = "6f8a0de8951d02576fd0cdcecd9c605a6b6364d3105b7469b8d7874ea34eea2f" dependencies = [ "indexmap", "polars-error", @@ -2145,9 +2285,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100415f86069d7e9fbf54737148fc161a7c7316a6a7d375fb6cfc7fc64f570ae" +checksum = "b282a6164927eb12774b66b071b773a1573173ae53758e8d4df50389ff06efa2" dependencies = [ "bitflags", "hex", @@ -2165,53 +2305,51 @@ dependencies = [ [[package]] name = "polars-stream" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65a0c054bdf16efd16bbc587e8d5418ae28464d61afd735513579cd3c338fa70" +checksum = "cfa8ff4ee21799898579595a0ef2fb728d0a9cac3d061835fb7f7f6dd854734a" dependencies = [ "async-channel", "async-trait", - "atomic-waker", "bitflags", "bytes", "chrono-tz", "crossbeam-channel", - "crossbeam-deque", "crossbeam-queue", - "crossbeam-utils", "futures", "memchr", - "memmap2", "num-traits", "parking_lot", "percent-encoding", - "pin-project-lite", "polars-arrow", + "polars-async", "polars-buffer", "polars-compute", + "polars-config", "polars-core", "polars-error", "polars-expr", "polars-io", "polars-mem-engine", + "polars-ooc", "polars-ops", "polars-parquet", "polars-plan", "polars-time", "polars-utils", - "rand 0.9.4", "rayon", "recursive", "slotmap", "tokio", + "uuid", "version_check", ] [[package]] name = "polars-time" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e80404e1e418c997230e3b2972c3be331f45df8bdd3150fe3bef562c7a332f" +checksum = "e1063fe074c4212a54917be604377c6e6bfbc8b6c942a5c57be214e4ccaaafdf" dependencies = [ "atoi_simd", "bytemuck", @@ -2232,9 +2370,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.53.0" +version = "0.54.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c97cabf53eb8fbf6050cde3fef8f596c51cc25fd7d55fbde108d815ee6674abf" +checksum = "590b0a94aa8f97992d52f1198600ecc1c1f7cfa03c1b31cae057143455804ac0" dependencies = [ "argminmax", "bincode", @@ -2244,6 +2382,7 @@ dependencies = [ "either", "flate2", "foldhash 0.2.0", + "futures", "half", "hashbrown 0.16.1", "indexmap", @@ -2251,6 +2390,7 @@ dependencies = [ "memmap2", "num-derive", "num-traits", + "polars-config", "polars-error", "rand 0.9.4", "raw-cpuid", @@ -2262,6 +2402,8 @@ dependencies = [ "serde_stacker", "slotmap", "stacker", + "sysinfo", + "tokio", "uuid", "version_check", ] @@ -2300,13 +2442,16 @@ dependencies = [ ] [[package]] -name = "prettyplease" -version = "0.2.37" +name = "prettytable-rs" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" dependencies = [ - "proc-macro2", - "syn", + "encode_unicode", + "is-terminal", + "lazy_static", + "term", + "unicode-width", ] [[package]] @@ -2339,26 +2484,23 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.27.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.27.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "python3-dll-a", "target-lexicon", @@ -2366,9 +2508,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.27.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -2376,9 +2518,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.27.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2388,9 +2530,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.27.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -2401,19 +2543,20 @@ dependencies = [ [[package]] name = "pyo3-polars" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29248c4baefdfa23a7768341d1e431a5dee7348a757fa74315c810f3b8710d4" +checksum = "6ad2fcbcb8a0dc41549b73e6481c8fbe236a5461d8cf6d639e840b04d594990a" dependencies = [ "libc", "once_cell", "polars", "polars-arrow", + "polars-config", "polars-core", "polars-error", "polars-ffi", "pyo3", - "thiserror 1.0.69", + "thiserror 2.0.18", ] [[package]] @@ -2425,6 +2568,21 @@ dependencies = [ "cc", ] +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + [[package]] name = "quick-xml" version = "0.39.4" @@ -2437,9 +2595,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.9" +version = "0.11.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +checksum = "0c1a41e437b6bbd489372cd4971de128e85c855f56c57f283d20ff016cf7c0a8" dependencies = [ "bytes", "cfg_aliases", @@ -2457,9 +2615,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +checksum = "4fcb935c5bec503c2f0e306bdd3e58bb9029dcb14fa8d9ac76e3a5256ac0763e" dependencies = [ "bytes", "getrandom 0.3.4", @@ -2492,9 +2650,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.45" +version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368" dependencies = [ "proc-macro2", ] @@ -2528,7 +2686,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "chacha20", - "getrandom 0.4.2", + "getrandom 0.4.3", "rand_core 0.10.1", ] @@ -2567,6 +2725,15 @@ dependencies = [ "rand 0.9.4", ] +[[package]] +name = "rand_pcg" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caa0f4137e1c0a72f4c651489402276c8e8e1cf081f3b0ba156d2cbeef09e86a" +dependencies = [ + "rand_core 0.10.1", +] + [[package]] name = "raw-cpuid" version = "11.6.0" @@ -2631,6 +2798,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "regex" version = "1.12.4" @@ -2759,27 +2937,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] -name = "rusterize" -version = "0.8.1" +name = "rusterize-python" +version = "0.9.0" dependencies = [ - "bitflags", - "fixedbitset", "geo", "geo-traits", "geo-types", "mimalloc", - "ndarray", "num-traits", "numpy", "polars", "pyo3", "pyo3-polars", "rayon", + "rusterize-rs", "tikv-jemallocator", "wkb", "wkt", ] +[[package]] +name = "rusterize-rs" +version = "0.1.0" +dependencies = [ + "fixedbitset", + "geo", + "geo-types", + "hotpath", + "ndarray", + "num-traits", + "polars", + "rayon", + "thiserror 2.0.18", +] + [[package]] name = "rustix" version = "1.1.4" @@ -2795,9 +2986,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.40" +version = "0.23.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +checksum = "6b92b125634d9b795e7beca796cc790df15a7fb38323bf3196fda83292d06b1f" dependencies = [ "once_cell", "ring", @@ -2821,9 +3012,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.1" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +checksum = "764899a24af3980067ee14bc143654f297b22eaebfe3c7b6b211920a5a59b046" dependencies = [ "web-time", "zeroize", @@ -2899,12 +3090,6 @@ dependencies = [ "libc", ] -[[package]] -name = "semver" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" - [[package]] name = "serde" version = "1.0.228" @@ -2988,6 +3173,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" +[[package]] +name = "sif-itree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7f45b8998ced5134fb1d75732c77842a3e888f19c1ff98481822e8fbfbf930b" + [[package]] name = "signal-hook" version = "0.4.4" @@ -3157,9 +3348,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.117" +version = "2.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" dependencies = [ "proc-macro2", "quote", @@ -3186,12 +3377,37 @@ dependencies = [ "syn", ] +[[package]] +name = "sysinfo" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + [[package]] name = "target-lexicon" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -3232,6 +3448,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tikv-jemalloc-sys" version = "0.7.1+5.3.1-0-g81034ce1f1373e37dc865038e1bc8eeecf559ce8" @@ -3252,6 +3477,18 @@ dependencies = [ "tikv-jemalloc-sys", ] +[[package]] +name = "tiny_http" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" +dependencies = [ + "ascii", + "chunked_transfer", + "httpdate", + "log", +] + [[package]] name = "tinystr" version = "0.8.3" @@ -3476,21 +3713,9 @@ checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" [[package]] name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "unindent" -version = "0.2.4" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "untrusted" @@ -3524,11 +3749,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.23.3" +version = "1.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" +checksum = "bf80a72845275afea99e7f2b434723d3bc7e38470fcd1c7ed39a599c73319a53" dependencies = [ - "getrandom 0.4.2", + "getrandom 0.4.3", "js-sys", "serde_core", "wasm-bindgen", @@ -3577,23 +3802,14 @@ version = "1.0.4+wasi-0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487" dependencies = [ - "wit-bindgen 0.57.1", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen 0.51.0", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.125" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ddb3f79143bced6de84270411622a2699cee572fc0875aeaf1e7867cf9fca1a" +checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4" dependencies = [ "cfg-if", "once_cell", @@ -3604,9 +3820,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.75" +version = "0.4.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503b14d284f2c8dac03b819967e155ea753f573586193b2b2c95990cb5d69280" +checksum = "c62df1340f32221cb9c54d6a27b030e3dba64361d4a95bed55f9aacb44da291d" dependencies = [ "js-sys", "wasm-bindgen", @@ -3614,9 +3830,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.125" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e21a184b13fb19e157296e2c46056aec9092264fab83e4ba59e68c61b323c3d" +checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3624,9 +3840,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.125" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fecefd9c35bd935a20fc3fc344b5f29138961e4f47fb03297d88f2587afb5ebd" +checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e" dependencies = [ "bumpalo", "proc-macro2", @@ -3637,35 +3853,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.125" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23939e44bb9a5d7576fa2b563dc2e136628f1224e88a8deed09e04858b77871f" +checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24" dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - [[package]] name = "wasm-streams" version = "0.4.2" @@ -3679,23 +3873,11 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - [[package]] name = "web-sys" -version = "0.3.102" +version = "0.3.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6430a72df5eb332242960fe84b3002a241163998241eb596d4f739b9757061d" +checksum = "8622dcb61c0bcc9fffa6938bed81210af2da9a7e4a1a834b2e37a59b6dfb6141" dependencies = [ "js-sys", "wasm-bindgen", @@ -3742,6 +3924,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -3751,8 +3968,19 @@ dependencies = [ "windows-implement", "windows-interface", "windows-link 0.2.1", - "windows-result", - "windows-strings", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading", ] [[package]] @@ -3789,6 +4017,25 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -3798,6 +4045,15 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows-strings" version = "0.5.1" @@ -3876,6 +4132,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -3981,100 +4246,12 @@ dependencies = [ "memchr", ] -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] - [[package]] name = "wit-bindgen" version = "0.57.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - [[package]] name = "wkb" version = "0.9.2" diff --git a/Cargo.toml b/Cargo.toml index 0ddfa98..cc3e9d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,38 +1,22 @@ -[package] -name = "rusterize" -version = "0.8.1" -edition = "2024" +[workspace] +members = ["rust", "python"] resolver = "3" -[lib] -name = "rusterize" -crate-type = ["cdylib"] +[workspace.package] +description = "Extremely fast geometry rasterization" +edition = "2024" +authors = ["Tommaso Trotto "] +license = "MIT" +repository = "https://github.com/ttrotto/rusterize" +readme = "README.md" +categories = ["science::geo"] +keywords = ["fast", "raster", "geometry"] -[dependencies] -bitflags = "2.13.0" -fixedbitset = "0.5.7" -geo = "0.30.0" -geo-traits = "0.3.0" +[workspace.dependencies] +geo = "0.33.1" geo-types = "0.7.19" -ndarray = { version = "0.17.2", features = ["rayon"] } num-traits = "0.2.19" -numpy = "0.27.1" -polars = { version = "0.53.0", features = ["lazy", "simd", "performant", "nightly"] } -pyo3 = { version = "0.27.2", features = ["extension-module", "abi3-py311", "generate-import-lib"] } -pyo3-polars = "0.26.0" rayon = "1.12.0" -wkb = "0.9.2" -wkt = "0.14.0" - -# OS-specific allocators -[target.'cfg(not(target_family = "unix"))'.dependencies] -mimalloc = { version = "*", default-features = false } - -[target.'cfg(all(target_family = "unix", not(target_os = "macos")))'.dependencies] -tikv-jemallocator = { version = "*", features = ["disable_initial_exec_tls", "background_threads"] } - -[target.'cfg(all(target_family = "unix", target_os = "macos"))'.dependencies] -tikv-jemallocator = { version = "*", features = ["disable_initial_exec_tls"] } [profile.dist-release] inherits = "release" diff --git a/README.md b/README.md index 4282ebe..48ebbec 100644 --- a/README.md +++ b/README.md @@ -4,266 +4,17 @@
-High performance rasterization tool for Python built in Rust, inspired by the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package with lots of useful improvements (see [API](#API)). +**rusterize** is an extremely fast rasterization tool built in 🦀 Rust, with ports to 🐍 Python and (coming soon) R. It +works on all geometry types (polygon, lines, points, geometry collections, and more!) and it does not depend on GDAL. +Inspired by the [fasterize](https://github.com/ecohealthalliance/fasterize.git) package. -**rusterize** is designed to work on _all_ shapely geometries, even when they are nested inside complex geometry collections. Functionally, it supports four input types: +Check out the latest documentations: -- [geopandas](https://geopandas.org/en/stable/) GeoDataFrame and GeoSeries -- [polars-st](https://oreilles.github.io/polars-st/) GeoDataFrame -- Python list of geometries in shapely.Geometry, WKB, or WKT format -- Numpy array of geometries in shapely.Geometry, WKB, or WKT format - -It returns a [xarray](https://docs.xarray.dev/en/stable/), a [numpy](https://numpy.org/), or a sparse array in COOrdinate format. - -### Installation - -**rusterize** comes with numpy as the only required dependency and is distributed in different flavors. A `core` library that performs the rasterization and returns a bare `numpy` array, a `xarray` flavor that returns a georeferenced `xarray` (requires `xarray` and `rioxarray` and is the recommended flavor), or an `all` flavor with dependencies for all supported inputs. - -Install the current version with pip: - -```shell -# core library -pip install rusterize - -# xarray capabilities -pip install 'rusterize[xarray]' - -# support all input types -pip install 'rusterize[all]' -``` - -### Contributing - -Any contribution is welcome! You can install **rusterize** directly from this repo using [maturin](https://www.maturin.rs/) as an editable package. For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) installed. To run the tests you need to have `gdal` installed as well as the `rusterize[all]` flavor. - -```shell -# clone repo -git clone https://github.com//rusterize.git -cd rusterize - -# install Rust nightly toolchain -rustup toolchain install nightly-2026-01-09 - -# install maturin -pip install maturin - -# install editable version with optmized code -maturin develop --profile dist-release - -# test the new contribution -pytest -``` - -### API - -**rusterize** has a simple API consisting of a single function `rusterize()`: - -```python -from rusterize import rusterize - -rusterize( - data, - like=None, - res=(30, 30), - out_shape=(10, 10), - extent=(0, 10, 10, 20), - field="field", - by="by", - burn=None, - fun="sum", - background=0, - encoding="xarray", - all_touched=False, - tap=False, - dtype="uint8" -) -``` - -- **data** : `geopandas.GeoDataFrame`, `geopandas.GeoSeries`, `polars.DataFrame`, `list`, `numpy.ndarray`
- Input data to rasterize. - - If `polars.DataFrame`, it must be have a "geometry" column with geometries stored in WKB or WKT format. - - If `list` or `numpy.ndarray`, geometries must be in WKT, WKB, or shapely formats (EPSG is not inferred and defaults to None). - -- **like** : `xarray.DataArray` or `xarray.Dataset` (default: None)
- Template array used as a spatial blueprint (resolution, shape, extent). Mutually exclusive with `res`, `out_shape`, and `extent`. Requires xarray and rioxarray. - -- **res** : `tuple` or `list` (default: None)
- Pixel resolution defined as (xres, yres). - -- **out_shape** : `tuple` or `list` (default: None)
- Output raster dimensions defined as (nrows, ncols). - -- **extent** : `tuple` or `list` (default: None)
- Spatial bounding box defined as (xmin, ymin, xmax, ymax). - -- **field** : `str` (default: None)
- Column name to use for pixel values. Mutually exclusive with `burn`. Not considered when input is list or numpy.ndarray. - -- **by** : `str` (default: None)
- Column used for grouping. Each group is rasterized into a distinct band in the output. Not considered when input is list or numpy.ndarray. - -- **burn** : `int`, `float`, or `numpy.ndarray` (default: None)
- A static value or a list of values to apply to each geometries. If a `numpy.ndarray`, it must match the length of the geometry data. Mutually exclusive with `field`. - If `burn` is a `numpy.ndarray`, its dtype should match the output `dtype`, otherwise it is internally casted. If `data` is a `geopandas.GeoSeries`, its index is used as `burn` value, unless otherwise specified. - -- **fun** : `str` (default: "last")
- Pixel function to use when burning geometries. Available options: `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. - -- **background** : `int` or `float` (default: numpy.nan)
- Value assigned to pixels not covered by any geometry. - -- **encoding** : `str` (default: "xarray")
- The format of the returned object: `"xarray"`, `"numpy"`, or `"sparse"`. - -- **all_touched** : `bool` (default: False)
- If True, every pixel touched by a geometry is burned. - -- **tap** : `bool` (default: False)
- Target Aligned Pixel: aligns the extent to the pixel resolution. - -- **dtype** : `str` (default: "float64")
- Output data type (e.g., `uint8`, `int32`, `float32`). - -Note that control over the desired extent is not as strict as for resolution and shape. That is, when resolution, output shape, and extent are specified, priority is given to resolution and shape. So, extent is not guaranteed, but resolution and shape are. If extent is not given, it is taken from the polygons and is not modified, unless you specify a resolution value. If you only specify an output shape, the extent is maintained. This mimics the logics of `gdal_rasterize`. - -### Encoding - -**rusterize** offers three encoding options for the rasterization output. You can return a `xarray/numpy` with the rasterized geometries, or a new `SparseArray` structure. This `SparseArray` structure stores the band/row/column triplets of where the geometries should be burned onto the final raster, as well as their corresponding values before applying any pixel function. This can be used as an intermediate output to avoid allocating memory before materializing the final raster, or as a final product. `SparseArray` has three convenience functions: `to_xarray()`, `to_numpy()`, and `to_frame()`. The first two return the final `xarray/numpy` with the appropriate pixel function, the last returns a `polars` dataframe with only the coordinates and values of the rasterized geometries. Note that `SparseArray` avoids allocating memory for the array during rasterization until it's actually needed (e.g. calling `to_xarray()`). See below for an example. - -### Usage - -```python -from rusterize import rusterize -import geopandas as gpd -from shapely import wkt -import matplotlib.pyplot as plt - -# construct geometries -geoms = [ - "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))", - "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))", - "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))", - "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))", - "GEOMETRYCOLLECTION (POINT (50 -40), POLYGON ((75 -40, 75 -30, 100 -30, 100 -40, 75 -40)), LINESTRING (60 -40, 80 0), GEOMETRYCOLLECTION (POLYGON ((100 20, 100 30, 110 30, 110 20, 100 20))))" -] - -# create a GeoDataFrame with shapely geometries from WKT -gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=wkt.loads(geoms), crs='EPSG:32619') - -# or pass values directly to rusterize -# rusterize to "xarray" -> returns a xarray with the burned geometries and spatial reference when available (default) -# will raise a ModuleNotFoundError if xarray and rioxarray are not found -output = rusterize( - geoms, - res=(1, 1), - fun="sum", - burn=np.arange(1, len(geoms) + 1) -).squeeze() - -output = rusterize( - gdf, - res=(1, 1), - field="value", - fun="sum", -).squeeze() - -# plot it -fig, ax = plt.subplots(figsize=(12, 6)) -output.plot.imshow(ax=ax) -plt.show() - -# rusterize to "sparse" -> custom structure storing the coordinates and values of the rasterized geometries -output = rusterize( - gdf, - res=(1, 1), - field="value", - fun="sum", - encoding="sparse" -) -output -# SparseArray: -# - Shape: (131, 361) -# - Extent: (-180.5, -70.5, 180.5, 60.5) -# - Resolution: (1.0, 1.0) -# - EPSG: 32619 -# - Estimated size: 378.33 KB - -# materialize into xarray or numpy -array = output.to_xarray() -array = output.to_numpy() - -# get only coordinates and values -output.to_frame() -# shape: (29_340, 3) -# ┌─────┬─────┬──────┐ -# │ row ┆ col ┆ data │ -# │ --- ┆ --- ┆ --- │ -# │ u32 ┆ u32 ┆ f64 │ -# ╞═════╪═════╪══════╡ -# │ 6 ┆ 40 ┆ 1.0 │ -# │ 6 ┆ 41 ┆ 1.0 │ -# │ 6 ┆ 42 ┆ 1.0 │ -# │ 7 ┆ 39 ┆ 1.0 │ -# │ 7 ┆ 40 ┆ 1.0 │ -# │ … ┆ … ┆ … │ -# │ 64 ┆ 258 ┆ 1.0 │ -# │ 63 ┆ 259 ┆ 1.0 │ -# │ 62 ┆ 259 ┆ 1.0 │ -# │ 61 ┆ 260 ┆ 1.0 │ -# │ 60 ┆ 260 ┆ 1.0 │ -# └─────┴─────┴──────┘ -``` - -![](img/plot.png) - -### Benchmarks - -**rusterize** is fast! Let’s try it on small and large datasets in comparison to GDAL ([benchmark_rusterize.py](benchmarks/benchmark_rusterize.py)). You can run this with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/): - -``` -pytest --benchmark-min-rounds=10 --benchmark-time-unit='s' - ---------------------------------------------- benchmark: 8 tests ------------------------------------------------- -Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------- -test_water_small_f64_numpy 0.0038 0.0045 0.0040 0.0001 0.0040 0.0002 56;3 248.7981 181 1 -test_water_small_f64 0.0048 0.0057 0.0050 0.0001 0.0050 0.0001 21;9 198.8759 158 1 -test_water_small_gdal_f64 0.0053 0.0057 0.0054 0.0001 0.0054 0.0001 28;14 184.3595 160 1 -test_water_large_f64_numpy 1.2628 1.3610 1.3133 0.0314 1.3193 0.0498 5;0 0.7614 10 1 -test_water_large_f64 1.2762 1.4723 1.3342 0.0628 1.3149 0.0165 2;4 0.7495 10 1 -test_water_large_gdal_f64 1.4128 1.4229 1.4178 0.0029 1.4180 0.0040 3;0 0.7053 10 1 -test_roads_uint8 3.3184 3.5184 3.4021 0.0578 3.3849 0.0527 3;1 0.2939 10 1 -test_roads_gdal_uint8 9.0672 9.1040 9.0901 0.0109 9.0920 0.0125 2;0 0.1100 10 1 ------------------------------------------------------------------------------------------------------------------- -``` - -And fasterize ([benchmark_fasterize.r](benchmarks/benchmark_fasterize.r)). Note that it doesn't support custom `dtype` so the returning raster is `float64`. - -``` -Unit: seconds - expr min lq mean median uq max neval - fasterize_small_f64 0.05764281 0.06274373 0.1286875 0.06520358 0.1128432 0.6000182 10 - fasterize_large_f64 36.91321005 37.71877265 41.0140303 40.81343803 43.9201820 46.5596799 10 -``` - -### Comparison with other tools - -While **rusterize** is fast, there are other fast alternatives out there, including `rasterio` and `geocube`. However, **rusterize** allows for a seamless, Rust-native processing with similar or lower memory footprint that **does not** require you to install GDAL and returns the geoinformation you need for downstream processing with ample control over resolution, shape, extent, and data type. - -The following is a time comparison of 10 runs (median) on the same large water bodies dataset used earlier (dtype is `float64`) ([run_others.py](benchmarks/run_others.py)). - -``` -rusterize: 1.3 sec -rasterio: 14.5 sec -geocube: 124.9 sec -``` - -### Integrations - -**rusterize** is integrated into the following libraries: - -- [rasterix](https://github.com/xarray-contrib/rasterix) +- [Rust API](https://docs.rs/rusterize-rs) +- [Python API](ttrotto.github.io/rusterize)
- -Disclaimer: Logo originally generated with Nano Banana Pro + +[![docs.rs](https://img.shields.io/docsrs/rusterize-rs?label=docs.rs%20latest)](https://docs.rs/rusterize-rs) +[![crates.io](https://img.shields.io/crates/v/rusterize-rs)](https://crates.io/crates/rusterize-rs) +[![Latest version on PyPI](https://img.shields.io/pypi/v/rusterize)](https://pypi.org/project/rusterize) diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 0000000..3ef25f4 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "rusterize-python" +description = { workspace = true } +version = "0.9.0" +authors = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } +keywords = { workspace = true } +readme = "README.md" + +[lib] +name = "_rusterize" +crate-type = ["cdylib"] + +[dependencies] +rusterize = { path = "../rust", package = "rusterize-rs", features = ["polars"] } +geo = { workspace = true } +geo-traits = "0.3.0" +geo-types = { workspace = true } +num-traits = { workspace = true } +numpy = "0.28.0" +pyo3 = { version = "0.28.0", features = ["extension-module", "abi3-py311", "generate-import-lib"] } +pyo3-polars = "0.27.0" +polars = { version = "0.54.4", default-features = false, features = ["lazy", "strings", "temporal"] } +rayon = { workspace = true } +wkb = "0.9.2" +wkt = "0.14.0" + +# OS-specific allocators +[target.'cfg(not(target_family = "unix"))'.dependencies] +mimalloc = { version = "*", default-features = false } + +[target.'cfg(all(target_family = "unix", not(target_os = "macos")))'.dependencies] +tikv-jemallocator = { version = "*", features = ["disable_initial_exec_tls", "background_threads"] } + +[target.'cfg(all(target_family = "unix", target_os = "macos"))'.dependencies] +tikv-jemallocator = { version = "*", features = ["disable_initial_exec_tls"] } diff --git a/python/LICENSE b/python/LICENSE new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/python/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..87af78f --- /dev/null +++ b/python/README.md @@ -0,0 +1,14 @@ +## rusterize + +**rusterize** is an extremely fast rasterization engine built in 🦀 Rust and ported to 🐍 Python. + +It is designed to work on _all_ shapely geometries, even when they are nested inside complex geometry collections. Functionally, it supports four input types: + +- [geopandas](https://geopandas.org/en/stable/) GeoDataFrame and GeoSeries +- [polars-st](https://oreilles.github.io/polars-st/) GeoDataFrame +- Python list of geometries in shapely.Geometry, WKB, or WKT format +- Numpy array of geometries in shapely.Geometry, WKB, or WKT format + +It returns a [xarray](https://docs.xarray.dev/en/stable/), a [numpy](https://numpy.org/), or a custom sparse array in COOrdinate format. + +Visit the full documentation [here](ttrotto.github.io/rusterize). diff --git a/benchmarks/benchmark_fasterize.r b/python/benchmarks/benchmark_fasterize.r similarity index 100% rename from benchmarks/benchmark_fasterize.r rename to python/benchmarks/benchmark_fasterize.r diff --git a/benchmarks/benchmark_rusterize.py b/python/benchmarks/benchmark_rusterize.py similarity index 99% rename from benchmarks/benchmark_rusterize.py rename to python/benchmarks/benchmark_rusterize.py index e79ba07..5fe2b77 100644 --- a/benchmarks/benchmark_rusterize.py +++ b/python/benchmarks/benchmark_rusterize.py @@ -1,3 +1,4 @@ +import os import zipfile from io import BytesIO diff --git a/benchmarks/run_others.py b/python/benchmarks/run_others.py similarity index 100% rename from benchmarks/run_others.py rename to python/benchmarks/run_others.py diff --git a/python/docs/api.md b/python/docs/api.md new file mode 100644 index 0000000..88a6fed --- /dev/null +++ b/python/docs/api.md @@ -0,0 +1,11 @@ +# API reference + +::: rusterize.rusterize + +## SparseArray + +Returned when `encoding="sparse"`. A COO-format sparse array with three converters: + +- `to_xarray()` → `xarray.DataArray` +- `to_numpy()` → `numpy.ndarray` +- `to_frame()` → `polars.DataFrame` diff --git a/img/plot.png b/python/docs/img/plot.png similarity index 100% rename from img/plot.png rename to python/docs/img/plot.png diff --git a/python/docs/index.md b/python/docs/index.md new file mode 100644 index 0000000..7c9e4c1 --- /dev/null +++ b/python/docs/index.md @@ -0,0 +1,12 @@ +# rusterize + +**rusterize** is an extremely fast rasterization engine built in 🦀 Rust, with ports to 🐍 Python and (coming soon) R. It +works on all geometry types (polygon, lines, points, geometry collections, and more!) and it does not depend on GDAL. + +For a quickstart, check out the [Rust crate](https://docs.rs/rusterize-rs) and the [Python package](python.md). + +### Integrations + +**rusterize** is integrated into the following libraries: + +- [rasterix](https://github.com/xarray-contrib/rasterix) diff --git a/python/docs/python.md b/python/docs/python.md new file mode 100644 index 0000000..66846c6 --- /dev/null +++ b/python/docs/python.md @@ -0,0 +1,207 @@ +# rusterize on Python + +**rusterize** is designed to work on _all_ shapely geometries, even when they are nested inside complex geometry collections. Functionally, it supports four input types: + +- [geopandas](https://geopandas.org/en/stable/) GeoDataFrame and GeoSeries +- [polars-st](https://oreilles.github.io/polars-st/) GeoDataFrame +- Python list of geometries in shapely.Geometry, WKB, or WKT format +- Numpy array of geometries in shapely.Geometry, WKB, or WKT format + +It returns a [xarray](https://docs.xarray.dev/en/stable/), a [numpy](https://numpy.org/), or a custom sparse array in COOrdinate format. + +## Installation + +**rusterize** comes with `numpy` as the only required dependency and is distributed in different flavors. A `core` library that performs the rasterization and returns +a bare `numpy` array, a `xarray` flavor that returns a georeferenced `xarray` (requires `xarray` and `rioxarray` and is the recommended flavor), or an `all` flavor with +dependencies for all supported inputs. + +Install the current version with pip: + +```bash +# core library +pip install rusterize + +# xarray capabilities +pip install 'rusterize[xarray]' + +# support all input types +pip install 'rusterize[all]' +``` + +## Usage + +Visit the full [API reference](api.md). + +```python +from rusterize import rusterize +import geopandas as gpd +from shapely import wkt +import matplotlib.pyplot as plt + +# construct geometries +geoms = [ + "POLYGON ((-180 -20, -140 55, 10 0, -140 -60, -180 -20), (-150 -20, -100 -10, -110 20, -150 -20))", + "POLYGON ((-10 0, 140 60, 160 0, 140 -55, -10 0))", + "POLYGON ((-125 0, 0 60, 40 5, 15 -45, -125 0))", + "MULTILINESTRING ((-180 -70, -140 -50), (-140 -50, -100 -70), (-100 -70, -60 -50), (-60 -50, -20 -70), (-20 -70, 20 -50), (20 -50, 60 -70), (60 -70, 100 -50), (100 -50, 140 -70), (140 -70, 180 -50))", + "GEOMETRYCOLLECTION (POINT (50 -40), POLYGON ((75 -40, 75 -30, 100 -30, 100 -40, 75 -40)), LINESTRING (60 -40, 80 0), GEOMETRYCOLLECTION (POLYGON ((100 20, 100 30, 110 30, 110 20, 100 20))))" +] + +# create a GeoDataFrame with shapely geometries from WKT +gdf = gpd.GeoDataFrame({'value': range(1, len(geoms) + 1)}, geometry=wkt.loads(geoms), crs='EPSG:32619') + +output = rusterize( + gdf, + res=(1, 1), + field="value", + fun="sum", +).squeeze() + +# plot it +fig, ax = plt.subplots(figsize=(12, 6)) +output.plot.imshow(ax=ax) +plt.show() +``` + +![](img/plot.png) + +You could also create a multiband output by specifing the `by` parameter. + +```python +gdf["by"] = ["a", "a", "b", "b", "c"] + +output = rusterize( + gdf, + res=(1, 1), + field="value", + by="by", + fun="sum", +) +``` + +Alternatively, you can pass raw values to burn on the final raster, one per geometry. + +```python +import numpy as np + +output = rusterize( + geoms, + res=(1, 1), + fun="sum", + burn=np.arange(1, len(geoms) + 1) +).squeeze() +``` + +Finally, you can also create a [`SparseArray`](api.md#sparsearray), that is an object storing the band/row/col value triplets of all pixels that will be materialized in a final raster. + +```python +output = rusterize( + gdf, + res=(1, 1), + field="value", + fun="sum", + encoding="sparse" +) +output +# SparseArray: +# - Shape: (131, 361) +# - Extent: (-180.5, -70.5, 180.5, 60.5) +# - Resolution: (1.0, 1.0) +# - EPSG: 32619 +# - Estimated size: 378.33 KB + +# materialize into xarray or numpy +array = output.to_xarray() +array = output.to_numpy() + +# get only coordinates and values +output.to_frame() +# shape: (29_363, 3) +# ┌─────┬─────┬────────┐ +# │ row ┆ col ┆ values │ +# │ --- ┆ --- ┆ --- │ +# │ u64 ┆ u64 ┆ f64 │ +# ╞═════╪═════╪════════╡ +# │ 6 ┆ 40 ┆ 1.0 │ +# │ 6 ┆ 41 ┆ 1.0 │ +# │ 6 ┆ 42 ┆ 1.0 │ +# │ 7 ┆ 39 ┆ 1.0 │ +# │ 7 ┆ 40 ┆ 1.0 │ +# │ … ┆ … ┆ … │ +# │ 39 ┆ 286 ┆ 5.0 │ +# │ 39 ┆ 287 ┆ 5.0 │ +# │ 39 ┆ 288 ┆ 5.0 │ +# │ 39 ┆ 289 ┆ 5.0 │ +# │ 39 ┆ 290 ┆ 5.0 │ +# └─────┴─────┴────────┘ +``` + +## Contributing + +Any contribution is welcome! You can install **rusterize** directly from this repo using [maturin](https://www.maturin.rs/) as an editable package. +For this to work, you’ll need to have [Rust](https://www.rust-lang.org/tools/install) and [cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) installed. +To run the tests you need to have `gdal` installed as well as the `rusterize[all]` flavor. + +```bash +# clone repo +git clone https://github.com//rusterize.git +cd rusterize + +# install Rust nightly toolchain +rustup toolchain install nightly-2026-04-01 + +# create a virtual environment (e.g. using `uv`) +# install maturin +uv pip install maturin + +# install editable version with optmized code +maturin develop --profile dist-release --uv + +# test the new contribution +pytest +``` + +## Benchmarks + +**rusterize** is fast! Let’s try it on small and large datasets in comparison to GDAL ([benchmark_rusterize.py](benchmarks/benchmark_rusterize.py)). +You can run this with [pytest](https://docs.pytest.org/en/stable/) and [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/): + +``` +pytest --benchmark-min-rounds=10 --benchmark-time-unit='s' + +--------------------------------------------- benchmark: 8 tests ------------------------------------------------- +Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +------------------------------------------------------------------------------------------------------------------ +test_water_small_f64_numpy 0.0038 0.0045 0.0040 0.0001 0.0040 0.0002 56;3 248.7981 181 1 +test_water_small_f64 0.0048 0.0057 0.0050 0.0001 0.0050 0.0001 21;9 198.8759 158 1 +test_water_small_gdal_f64 0.0053 0.0057 0.0054 0.0001 0.0054 0.0001 28;14 184.3595 160 1 +test_water_large_f64_numpy 1.2628 1.3610 1.3133 0.0314 1.3193 0.0498 5;0 0.7614 10 1 +test_water_large_f64 1.2762 1.4723 1.3342 0.0628 1.3149 0.0165 2;4 0.7495 10 1 +test_water_large_gdal_f64 1.4128 1.4229 1.4178 0.0029 1.4180 0.0040 3;0 0.7053 10 1 +test_roads_uint8 3.3184 3.5184 3.4021 0.0578 3.3849 0.0527 3;1 0.2939 10 1 +test_roads_gdal_uint8 9.0672 9.1040 9.0901 0.0109 9.0920 0.0125 2;0 0.1100 10 1 +------------------------------------------------------------------------------------------------------------------ +``` + +And fasterize ([benchmark_fasterize.r](benchmarks/benchmark_fasterize.r)). Note that it doesn't support custom `dtype` so the returning raster is `float64`. + +``` +Unit: seconds + expr min lq mean median uq max neval + fasterize_small_f64 0.05764281 0.06274373 0.1286875 0.06520358 0.1128432 0.6000182 10 + fasterize_large_f64 36.91321005 37.71877265 41.0140303 40.81343803 43.9201820 46.5596799 10 +``` + +### Comparison with other tools + +While **rusterize** is fast, there are other fast alternatives out there, including `rasterio` and `geocube`. However, **rusterize** allows for a seamless, +Rust-native processing with similar or lower memory footprint that **does not** require you to install GDAL and returns the geoinformation you need for downstream +processing with ample control over resolution, shape, extent, and data type. + +The following is a time comparison of 10 runs (median) on the same large water bodies dataset used earlier (dtype is `float64`) ([run_others.py](benchmarks/run_others.py)). + +``` +rusterize: 1.3 sec +rasterio: 14.5 sec +geocube: 124.9 sec +``` diff --git a/pyproject.toml b/python/pyproject.toml similarity index 97% rename from pyproject.toml rename to python/pyproject.toml index 4b597b4..a07cd14 100644 --- a/pyproject.toml +++ b/python/pyproject.toml @@ -29,7 +29,7 @@ repository = "https://github.com/ttrotto/rusterize" [tool.maturin] python-source = "python" -module-name = "rusterize" +module-name = "rusterize._rusterize" include = [{ path = "rust-toolchain.toml", format = "sdist" }] [tool.ruff] diff --git a/python/rusterize/__init__.py b/python/python/rusterize/__init__.py similarity index 73% rename from python/rusterize/__init__.py rename to python/python/rusterize/__init__.py index 53581f6..2a167e7 100644 --- a/python/rusterize/__init__.py +++ b/python/python/rusterize/__init__.py @@ -15,10 +15,10 @@ from ._dependencies import geopandas as gpd from ._dependencies import polars as pl from ._dependencies import xarray as xr -from .rusterize import _rusterize +from ._rusterize import _rusterize if TYPE_CHECKING: - from .rusterize import SparseArray + from ._rusterize import SparseArray __version__ = importlib.metadata.version("rusterize") @@ -97,60 +97,59 @@ def rusterize( dtype: str = "float64", ) -> xr.DataArray | np.ndarray | SparseArray: """ - Fast geometry rasterization in Rust. - Parameters ---------- data : geopandas.GeoDataFrame, geopandas.GeoSeries, polars.DataFrame, list, numpy.ndarray - Input data to rasterize. - - If polars.DataFrame, it must be have a "geometry" column with geometries stored in WKB or WKT format. - - If list or numpy.ndarray, geometries must be in WKT, WKB, or shapely formats (EPSG is not inferred and defaults to None). + Input data to rasterize. + + - If polars.DataFrame, it must be have a "geometry" column with geometries stored in WKB or WKT format. + - If list or numpy.ndarray, geometries must be in WKT, WKB, or shapely formats (EPSG is not inferred and defaults to None). like : xarray.DataArray or xarray.Dataset (default: None) - Template array used as a spatial blueprint (resolution, shape, extent). Mutually exclusive with `res`, `out_shape`, and `extent`. Requires xarray and rioxarray. + Template array used as a spatial blueprint (resolution, shape, extent). Mutually exclusive with `res`, `out_shape`, and `extent`. Requires xarray and rioxarray. res : tuple or list (default: None) - Pixel resolution defined as (xres, yres). + Pixel resolution defined as (xres, yres). out_shape : tuple or list (default: None) - Output raster dimensions defined as (nrows, ncols). + Output raster dimensions defined as (nrows, ncols). extent : `tuple` or `list` (default: None) - Spatial bounding box defined as `(xmin, ymin, xmax, ymax)`. + Spatial bounding box defined as `(xmin, ymin, xmax, ymax)`. field : `str` (default: None) - Column name to use for pixel values. Mutually exclusive with `burn`. Not considered when input is list or numpy.ndarray. + Column name to use for pixel values. Mutually exclusive with `burn`. Not considered when input is list or numpy.ndarray. by : `str` (default: None) - Column used for grouping. Each group is rasterized into a distinct band in the output. Not considered when input is list or numpy.ndarray. + Column used for grouping. Each group is rasterized into a distinct band in the output. Not considered when input is list or numpy.ndarray. burn : `int`, `float`, or `numpy.ndarray` (default: None) - A static value or a list of values to apply to each geometries. If a `numpy.ndarray`, it must match the length of the geometry data. Mutually exclusive with `field`. - If `burn` is a `numpy.ndarray`, its dtype should match the output `dtype`, otherwise it is internally casted. If `data` is a `geopandas.GeoSeries`, its index is used as `burn` value, - unless otherwise specified. + A static value or a list of values to apply to each geometries. If a `numpy.ndarray`, it must match the length of the geometry data. Mutually exclusive with `field`. + If `burn` is a `numpy.ndarray`, its dtype should match the output `dtype`, otherwise it is internally casted. If `data` is a `geopandas.GeoSeries`, its index is used as `burn` value, + unless otherwise specified. fun : `str` (default: "last") - Pixel function to use when burning geometries. Available options: `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. + Pixel function to use when burning geometries. Available options: `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. background : `int` or `float` (default: numpy.nan) - Value assigned to pixels not covered by any geometry. + Value assigned to pixels not covered by any geometry. encoding : `str` (default: "xarray") - The format of the returned object: `"xarray"`, `"numpy"`, or `"sparse"`. + The format of the returned object: `"xarray"`, `"numpy"`, or `"sparse"`. all_touched : `bool` (default: False) - If True, every pixel touched by a geometry is burned. + If True, every pixel touched by a geometry is burned. tap : `bool` (default: False) - Target Aligned Pixels: aligns the extent to the pixel resolution. + Target Aligned Pixels: aligns the extent to the pixel resolution. dtype : `str` (default: "float64") - Output data type (e.g., `uint8`, `int32`, `float32`). + Output data type (e.g., `uint8`, `int32`, `float32`). Returns ------- xarray.DataArray, numpy.ndarray, or a sparse array in COO format. Notes - ----- - If `encoding` is "numpy" or input is list or numpy.ndarray, the return array is without any spatial reference. + ------ + If `encoding` is "numpy" or input is list or numpy.ndarray, the return array is without any spatial reference. - When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable. - If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray or Dataset. - Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border. - The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL. + When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable. + If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray or Dataset. + Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border. + The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL. - If `field` is not in `data`, then a default `burn` value of 1 is rasterized. + If `field` is not in `data`, then a default `burn` value of 1 is rasterized. - A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of that dtype. - For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`. + A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of that dtype. + For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`. """ if isinstance(data, (list, np.ndarray)): @@ -224,10 +223,10 @@ def rusterize( if isinstance(burn, np.ndarray) and burn.size != len(data): raise ValueError("If `burn` is a `numpy.ndarray`, it must have the same length as `data`.") - _with_user_extent = False - _bounds = (np.inf, np.inf, np.inf, np.inf) - _res = (0, 0) - _shape = (0, 0) + _with_custom_bounds = False + _bounds = None + _res = None + _shape = None if like is not None: if not (_xarray_available() and isinstance(like, (xr.DataArray, xr.Dataset))): @@ -243,7 +242,8 @@ def rusterize( affine = like.rio.transform() _res = (affine.a, abs(affine.e)) _shape = like.squeeze().shape - _bounds, _with_user_extent = like.rio.bounds(), True + _bounds = like.rio.bounds() + _with_custom_bounds = True except Exception as e: raise AttributeError("No spatial dimension found for like object") from e else: @@ -257,7 +257,7 @@ def rusterize( if len(extent) != 4 or all(e == 0 for e in extent): raise ValueError("`extent` must be a tuple or list of (xmin, ymin, xmax, ymax).") _bounds = extent - _with_user_extent = True + _with_custom_bounds = True if res: if len(res) != 2 or any(r <= 0 for r in res) or any(not isinstance(r, (int, float)) for r in res): @@ -277,9 +277,6 @@ def rusterize( # data-specific feature extraction match data_type: case "geopandas": - if not _with_user_extent: - _bounds = data.total_bounds - epsg = data.crs.to_epsg() if data.crs else None if cols: @@ -302,14 +299,12 @@ def rusterize( geometries = data.geometry case "polars": - if not _with_user_extent: - try: - _bounds = data.select(pl.col("geometry").st.total_bounds()).item().to_numpy() - except pl.exceptions.ColumnNotFoundError as e: - raise ValueError("If `polars.DataFrame`, a 'geometry' column is expected.") from e - # check if geometry has SRID. If 0, then None, else assume first SRID is equal for all geometries - srid = data.select(pl.col("geometry").first().st.srid()).item() + try: + srid = data.select(pl.col("geometry").first().st.srid()).item() + except pl.exceptions.ColumnNotFoundError as e: + raise ValueError("If `polars.DataFrame`, a 'geometry' column is expected.") from e + epsg = None if srid == 0 else srid if cols: @@ -322,9 +317,6 @@ def rusterize( geometries = data.select(pl.col("geometry")).to_series() case "geoseries": - if not _with_user_extent: - _bounds = data.total_bounds - geometries = data.geometry burn = burn if burn is not None else data.index.to_numpy() @@ -343,17 +335,12 @@ def rusterize( # RawRasterInfo raw_raster_info = { - "nrows": _shape[0], - "ncols": _shape[1], - "xmin": _bounds[0], - "ymin": _bounds[1], - "xmax": _bounds[2], - "ymax": _bounds[3], - "xres": _res[0], - "yres": _res[1], - "with_user_extent": _with_user_extent, + "shape": _shape, + "extent": _bounds, + "resolution": _res, "tap": tap, "epsg": epsg, + "with_custom_bounds": _with_custom_bounds, } return _rusterize( diff --git a/python/rusterize/_dependencies.py b/python/python/rusterize/_dependencies.py similarity index 100% rename from python/rusterize/_dependencies.py rename to python/python/rusterize/_dependencies.py diff --git a/python/python/rusterize/_rusterize.pyi b/python/python/rusterize/_rusterize.pyi new file mode 100644 index 0000000..4000338 --- /dev/null +++ b/python/python/rusterize/_rusterize.pyi @@ -0,0 +1,25 @@ +from typing import Any + +import numpy as np + +from ._dependencies import polars as pl +from ._dependencies import xarray as xr + +def _rusterize( + geometry: Any, + raw_raster_info: dict[str, Any], + pypixel_fn: str, + pydf: Any | None = None, + pyfield: str | None = None, + pyby: str | None = None, + pyburn: Any | None = None, + pybackground: Any | None = None, + pytouched: bool = False, + pyencoding: str = "xarray", + pydtype: str = "float64", +) -> xr.DataArray | np.ndarray | SparseArray: ... + +class SparseArray: + def to_xarray(self) -> xr.DataArray: ... + def to_numpy(self) -> np.ndarray: ... + def to_frame(self) -> pl.DataFrame: ... diff --git a/python/rusterize/py.typed b/python/python/rusterize/py.typed similarity index 100% rename from python/rusterize/py.typed rename to python/python/rusterize/py.typed diff --git a/python/rusterize/rusterize.pyi b/python/rusterize/rusterize.pyi deleted file mode 100644 index bb7661a..0000000 --- a/python/rusterize/rusterize.pyi +++ /dev/null @@ -1,80 +0,0 @@ -import numpy as np - -from ._dependencies import geopandas as gpd -from ._dependencies import polars as pl -from ._dependencies import xarray as xr - -def rusterize( - data: gpd.GeoDataFrame | pl.DataFrame | list | np.ndarray, - like: xr.DataArray | xr.Dataset | None = None, - res: tuple | list | None = None, - out_shape: tuple | list | None = None, - extent: tuple | list | None = None, - field: str | None = None, - by: str | None = None, - burn: int | float | None = None, - fun: str = "last", - background: int | float | None = np.nan, - encoding: str = "xarray", - all_touched: bool = False, - tap: bool = False, - dtype: str = "float64", -) -> xr.DataArray | np.ndarray | SparseArray: - """ - Fast geometry rasterization in Rust. - - Parameters - ---------- - data : geopandas.GeoDataFrame, polars.DataFrame, list, numpy.ndarray - Input data to rasterize. - - If polars.DataFrame, it must be have a "geometry" column with geometries stored in WKB or WKT format. - - If list or numpy.ndarray, geometries must be in WKT, WKB, or shapely formats (EPSG is not inferred and defaults to None). - like : xarray.DataArray or xarray.Dataset (default: None) - Template array used as a spatial blueprint (resolution, shape, extent). Mutually exclusive with `res`, `out_shape`, and `extent`. Requires xarray and rioxarray. - res : tuple or list (default: None) - Pixel resolution defined as (xres, yres). - out_shape : tuple or list (default: None) - Output raster dimensions defined as (nrows, ncols). - extent : `tuple` or `list` (default: None) - Spatial bounding box defined as `(xmin, ymin, xmax, ymax)`. - field : `str` (default: None) - Column name to use for pixel values. Mutually exclusive with `burn`. Not considered when input is list or numpy.ndarray. - by : `str` (default: None) - Column used for grouping. Each group is rasterized into a distinct band in the output. Not considered when input is list or numpy.ndarray. - burn : `int` or `float` (default: None) - A static value to apply to all geometries. Mutually exclusive with `field`. - fun : `str` (default: "last") - Pixel function to use when burning geometries. Available options: `sum`, `first`, `last`, `min`, `max`, `count`, or `any`. - background : `int` or `float` (default: numpy.nan) - Value assigned to pixels not covered by any geometry. - encoding : `str` (default: "xarray") - The format of the returned object: `"xarray"`, `"numpy"`, or `"sparse"`. - all_touched : `bool` (default: False) - If True, every pixel touched by a geometry is burned. - tap : `bool` (default: False) - Target Aligned Pixels: aligns the extent to the pixel resolution. - dtype : `str` (default: "float64") - Output data type (e.g., `uint8`, `int32`, `float32`). - - Returns - ------- - xarray.DataArray, numpy.ndarray, or a sparse array in COO format. - - Notes - ----- - If `encoding` is "numpy" or input is list or numpy.ndarray, the return array is without any spatial reference. - - When any of `res`, `out_shape`, or `extent` is not provided, it is inferred from the other arguments when applicable. - If `like` is specified, `res`, `out_shape`, and `extent` are inferred from the `like` DataArray or Dataset. - Unless `extent` is specified, a half-pixel buffer is applied to avoid missing points on the border. - The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL. - - If `field` is not in `data`, then a default `burn` value of 1 is rasterized. - - A `None` value for `dtype` corresponds to the default of that dtype. An illegal value for a dtype will be replaced with the default of that dtype. For example, a `background=np.nan` for `dtype="uint8"` will become `background=0`, where `0` is the default for `uint8`. - """ - -class SparseArray: - def to_xarray(self) -> xr.DataArray: ... - def to_numpy(self) -> np.ndarray: ... - def to_frame(self) -> pl.DataFrame: ... diff --git a/src/allocator.rs b/python/src/allocator.rs similarity index 100% rename from src/allocator.rs rename to python/src/allocator.rs diff --git a/python/src/encoding/pyarray.rs b/python/src/encoding/pyarray.rs new file mode 100644 index 0000000..d331f69 --- /dev/null +++ b/python/src/encoding/pyarray.rs @@ -0,0 +1,140 @@ +use pyo3::prelude::*; +use pyo3_polars::PyDataFrame; +use std::sync::Arc; + +use super::xarray::build_xarray; +use crate::prelude::OptionalFlags; +use num_traits::Num; +use numpy::{Element, IntoPyArray}; +use rusterize::prelude::*; + +#[derive(IntoPyObject)] +pub enum PyOutput<'py> { + Dense(Bound<'py, PyAny>), + Sparse(PySparseArray), +} + +/// Convert a [`rusterize::prelude::DenseArray`] or a [`rusterize::prelude::SparseArray`] into python object +pub trait Pythonize { + fn pythonize(self, py: Python, opt_flags: OptionalFlags) -> PyResult; +} + +impl Pythonize for DenseArray +where + N: Num + Element, +{ + fn pythonize(self, py: Python, opt_flags: OptionalFlags) -> PyResult { + let (array, band_names, raster_info) = self.into_parts(); + let data = array.into_pyarray(py); + + if opt_flags.xarray { + let xarray = build_xarray(py, &raster_info, data, &band_names)?; + Ok(PyOutput::Dense(xarray)) + } else { + Ok(PyOutput::Dense(data.into_any())) + } + } +} + +impl Pythonize for SparseArray +where + N: RasterDtype + Element + 'static, +{ + fn pythonize(self, _py: Python, _opt_flags: OptionalFlags) -> PyResult { + Ok(PyOutput::Sparse(PySparseArray(Arc::new(self)))) + } +} + +/// Trait to convert a [`rusterize::prelude::SparseArray`] into a python object that mask the output data type. +pub trait PySparseArrayTraits: Send + Sync { + fn shape(&self) -> (usize, usize); + fn extent(&self) -> (f64, f64, f64, f64); + fn resolution(&self) -> (f64, f64); + fn epsg(&self) -> Option; + fn size_hint(&self) -> String; + fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult>; + fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult>; + fn to_frame(&self) -> PyDataFrame; +} + +impl PySparseArrayTraits for SparseArray +where + T: RasterDtype + Element, +{ + fn shape(&self) -> (usize, usize) { + SparseArray::shape(self) + } + fn extent(&self) -> (f64, f64, f64, f64) { + SparseArray::extent(self) + } + fn resolution(&self) -> (f64, f64) { + SparseArray::resolution(self) + } + fn epsg(&self) -> Option { + SparseArray::epsg(self) + } + + /// Estimated size of the materialized [`rusterize::prelude::DenseArray`] + fn size_hint(&self) -> String { + let (nrows, ncols) = SparseArray::shape(self); + let bytes = std::mem::size_of::() * nrows * ncols; + if bytes < 1000 { + format!("{} bytes", bytes) + } else if bytes < 1_000_000 { + format!("{:.2} KB", bytes as f32 / 1000.0) + } else if bytes < 1_000_000_000 { + format!("{:.2} MB", bytes as f32 / 1_000_000.0) + } else { + format!("{:.2} GB", bytes as f32 / 1_000_000_000.0) + } + } + + fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult> { + let raster = self.build_array(); + let data = raster.into_pyarray(py); + build_xarray(py, self.raster_info(), data, self.band_names()) + } + + fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult> { + let raster = self.build_array(); + Ok(raster.into_pyarray(py).into_any()) + } + + fn to_frame(&self) -> PyDataFrame { + PyDataFrame(SparseArray::to_frame(self)) + } +} + +#[pyclass(name = "SparseArray")] +pub struct PySparseArray(pub Arc); + +#[pymethods] +impl PySparseArray { + fn __repr__(&self) -> String { + let epsg = match self.0.epsg() { + Some(e) => e.to_string(), + None => String::from("None"), + }; + + format!( + "SparseArray:\n- Shape: {:?}\n- Extent: {:?}\n- Resolution: {:?}\n- EPSG: {}\n- Estimated size: {}", + self.0.shape(), + self.0.extent(), + self.0.resolution(), + epsg, + self.0.size_hint() + ) + } + + fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult> { + self.0.to_xarray(py) + } + + fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult> { + self.0.to_numpy(py) + } + + fn to_frame(&self) -> PyDataFrame { + self.0.to_frame() + } +} diff --git a/src/encoding/build_xarray.rs b/python/src/encoding/xarray.rs similarity index 81% rename from src/encoding/build_xarray.rs rename to python/src/encoding/xarray.rs index 5900e53..6ea904a 100644 --- a/src/encoding/build_xarray.rs +++ b/python/src/encoding/xarray.rs @@ -1,23 +1,17 @@ -/* -Build xarray object from a dictionary. - -The xarray module is passed as a function argument to avoid importing -it twice for DenseSparse and SparseArray -*/ - -use crate::geo::raster::RasterInfo; +use crate::geo::raster::make_coordinates; use num_traits::Num; use numpy::{Element, PyArray3}; use pyo3::{ prelude::*, types::{PyDict, PyList}, }; +use rusterize::prelude::RasterInfo; pub fn build_xarray<'py, T>( py: Python<'py>, - raster_info: RasterInfo, + raster_info: &RasterInfo, data: Bound<'py, PyArray3>, - band_names: Vec, + band_names: &[String], ) -> PyResult> where T: Num + Element, @@ -25,7 +19,7 @@ where let xarray_module = py.import("xarray")?; py.import("rioxarray")?; - let (y, x) = raster_info.make_coordinates(py); + let (y, x) = make_coordinates(py, raster_info); let bands = PyList::new(py, band_names)?; let dims = PyList::new(py, vec!["bands", "y", "x"])?; diff --git a/src/geo/parse_geometry.rs b/python/src/geo/parse_geometry.rs similarity index 79% rename from src/geo/parse_geometry.rs rename to python/src/geo/parse_geometry.rs index c2368c7..26eb4af 100644 --- a/src/geo/parse_geometry.rs +++ b/python/src/geo/parse_geometry.rs @@ -5,7 +5,7 @@ This is faster than parsing geometries directly via __geo_interface__ use geo_traits::to_geo::ToGeoGeometry; use geo_types::Geometry; -use polars::{datatypes::DataType, error::PolarsError, prelude::*}; +use polars::{datatypes::DataType, prelude::*}; use pyo3::{ Bound, exceptions::{PyTypeError, PyValueError}, @@ -16,35 +16,24 @@ use pyo3::{ }; use pyo3_polars::PySeries; use rayon::iter::ParallelIterator; -use std::ops::Deref; +use rusterize::prelude::{RusterizeError, RusterizeResult}; use wkb::reader::read_wkb; use wkt::TryFromWkt; -pub struct ParsedGeometry(Vec>); - -impl ParsedGeometry { - pub fn len(&self) -> usize { - self.0.len() - } - - pub fn get(&self, index: usize) -> Option<&Geometry> { - self.0.get(index) - } -} - -impl<'a> IntoIterator for &'a ParsedGeometry { - type Item = &'a Geometry; - type IntoIter = std::slice::Iter<'a, Geometry>; - - fn into_iter(self) -> Self::IntoIter { - self.0.iter() - } +macro_rules! bail_if_empty_geoms { + ($identity:ident) => { + if $identity.is_empty() { + return Err(PyValueError::new_err( + "Could not parse geometry. Only WKT or WKB formats are supported.", + )); + } + }; } -impl Deref for ParsedGeometry { - type Target = [Geometry]; +pub struct ParsedGeometry(Vec>); - fn deref(&self) -> &Self::Target { +impl AsRef<[Geometry]> for ParsedGeometry { + fn as_ref(&self) -> &[Geometry] { self.0.as_slice() } } @@ -99,7 +88,7 @@ fn try_parse_wkb_to_geometry(wkb: &[u8]) -> Option> { } fn try_parse_wkt_to_geometry(wkt: &str) -> Option> { - Some(Geometry::try_from_wkt_str(wkt).unwrap()) + Some(Geometry::try_from_wkt_str(wkt).expect("Cannot parse geometry. Check that the WKT is valid.")) } fn to_wkb<'a>(input: &Bound<'a, PyAny>) -> PyResult> { @@ -130,12 +119,7 @@ fn parse_sequence_wkb(input: &Bound) -> PyResult { } } - if geoms.is_empty() { - return Err(PyValueError::new_err( - "Could not parse geometry. Only WKT or WKB formats are supported.", - )); - } - + bail_if_empty_geoms!(geoms); Ok(ParsedGeometry(geoms)) } @@ -148,16 +132,11 @@ fn parse_sequence_wkt(input: &Bound<'_, PyAny>) -> PyResult { } } - if geoms.is_empty() { - return Err(PyValueError::new_err( - "Could not parse geometry. Only WKT or WKB formats are supported.", - )); - } - + bail_if_empty_geoms!(geoms); Ok(ParsedGeometry(geoms)) } -fn parse_polars_series(input: Series) -> Result { +fn parse_polars_series(input: Series) -> RusterizeResult { let wkb_output = match input.dtype() { DataType::Binary => input .binary()? @@ -169,7 +148,7 @@ fn parse_polars_series(input: Series) -> Result { .par_iter() .filter_map(|item| item.and_then(try_parse_wkt_to_geometry)) .collect(), - _ => unimplemented!("Unsupported dtype for geometry column"), + _ => return Err(RusterizeError::ValueError("Unsupported dtype for geometry column")), }; Ok(ParsedGeometry(wkb_output)) } diff --git a/python/src/geo/raster.rs b/python/src/geo/raster.rs new file mode 100644 index 0000000..7feb7fd --- /dev/null +++ b/python/src/geo/raster.rs @@ -0,0 +1,65 @@ +use geo::Geometry; +use numpy::{IntoPyArray, PyArray1, ndarray::Array}; +use pyo3::prelude::*; +use rusterize::prelude::{RasterInfo, RasterInfoBuilder, RusterizeResult}; + +#[derive(FromPyObject)] +#[pyo3(from_item_all)] +pub struct RawRasterInfo { + shape: Option<[usize; 2]>, + extent: Option<[f64; 4]>, + resolution: Option<[f64; 2]>, + tap: bool, + epsg: Option, + with_custom_bounds: bool, +} + +impl RawRasterInfo { + pub(crate) fn build(self, geoms: &[Geometry]) -> RusterizeResult { + let mut builder = RasterInfoBuilder::new(); + + if let Some(shape) = self.shape { + builder = builder.shape(shape[0], shape[1]); + } + + if let Some(resolution) = self.resolution { + builder = builder.resolution(resolution[0], resolution[1]); + } + + if let Some(epsg) = self.epsg { + builder = builder.epsg(epsg); + } + + if self.tap { + builder = builder.with_target_align_pixel(); + } + + if let Some(extent) = self.extent + && self.with_custom_bounds + { + builder.extent(extent[0], extent[1], extent[2], extent[3]).build() + } else { + builder.build_with(geoms) + } + } +} + +/// Construct coordinates for xarray (start from pixel's center) +pub(crate) fn make_coordinates<'py>( + py: Python<'py>, + info: &RasterInfo, +) -> (Bound<'py, PyArray1>, Bound<'py, PyArray1>) { + let y_coords = Array::range( + info.ymax - info.yres / 2.0, + info.ymax - info.nrows as f64 * info.yres, + -info.yres, + ) + .into_pyarray(py); + let x_coords = Array::range( + info.xmin + info.xres / 2.0, + info.xmin + info.ncols as f64 * info.xres, + info.xres, + ) + .into_pyarray(py); + (y_coords, x_coords) +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 0000000..2034375 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,11 @@ +mod allocator; +mod geo { + pub(crate) mod parse_geometry; + pub(crate) mod raster; +} +mod encoding { + pub(crate) mod pyarray; + mod xarray; +} +mod prelude; +mod rusterize; diff --git a/python/src/prelude.rs b/python/src/prelude.rs new file mode 100644 index 0000000..e047218 --- /dev/null +++ b/python/src/prelude.rs @@ -0,0 +1,17 @@ +/// Optional flags at Python runtime +#[derive(Copy, Clone)] +pub struct OptionalFlags { + /// Burn all pixels that are touched by the geometry + pub all_touched: bool, + /// Output return type is Xarray + pub xarray: bool, +} + +impl OptionalFlags { + pub fn new(all_touched: bool, encoding: &str) -> Self { + Self { + all_touched, + xarray: encoding == "xarray", + } + } +} diff --git a/python/src/rusterize.rs b/python/src/rusterize.rs new file mode 100644 index 0000000..befff3d --- /dev/null +++ b/python/src/rusterize.rs @@ -0,0 +1,191 @@ +use crate::{ + encoding::pyarray::{PyOutput, Pythonize}, + geo::{parse_geometry::ParsedGeometry, raster::RawRasterInfo}, + prelude::*, +}; +use num_traits::One; +use numpy::{Element, PyReadonlyArray1}; +use polars::prelude::*; +use pyo3::{ + conversion::FromPyObject, + exceptions::{PyRuntimeError, PyValueError}, + prelude::*, + types::PyAny, +}; +use pyo3_polars::PyDataFrame; +use rusterize::prelude::*; + +macro_rules! dispatch_rusterize { + ( + $dtype:expr, $encoding:expr, $py:expr, $ctx:expr, + [ $( ($str_val:pat, $rust_type:ty) ),* ] + ) => { + match ($dtype, $encoding) { + $( + ($str_val, "xarray" | "numpy") => rusterize_py_impl::>($py, $ctx), + ($str_val, "sparse") => rusterize_py_impl::>($py, $ctx), + )* + _ => unimplemented!("Invalid dtype or encoding provided."), + } + }; +} + +struct Context<'py> { + geometry: ParsedGeometry, + raster_info: RasterInfo, + pixel_fn: PixelFunction, + pybackground: Option<&'py Bound<'py, PyAny>>, + df: Option, + pyfield: Option<&'py str>, + pyby: Option<&'py str>, + pyburn: Option<&'py Bound<'py, PyAny>>, + opt_flags: OptionalFlags, +} + +fn rusterize_py_impl<'py, A>(py: Python<'py>, ctx: Context<'py>) -> PyResult> +where + A: ArrayBuilder + Pythonize, + A::Dtype: Default + Element + for<'a> FromPyObject<'a, 'py>, +{ + let background = ctx + .pybackground + .and_then(|inner| inner.extract().ok()) + .unwrap_or_default(); + + let prepared = match &ctx.df { + Some(df) => { + let mut exprs: Vec = Vec::new(); + if let Some(field) = ctx.pyfield { + exprs.push(col(field).cast(::polars_dtype()).alias("field")); + } + if let Some(by) = ctx.pyby { + exprs.push(col(by).cast(DataType::String).alias("by")); + } + Some( + df.clone() + .lazy() + .select(exprs) + .collect() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?, + ) + } + _ => None, + }; + + let arr: PyReadonlyArray1; + + let field = match (&prepared, ctx.pyfield) { + (Some(df), Some(_)) => FieldSource::Column(df.column("field").unwrap().clone()), + _ => match ctx.pyburn { + None => FieldSource::Scalar(::one()), + Some(b) => match b.extract::() { + Ok(scalar) => FieldSource::Scalar(scalar), + Err(_) => { + arr = b.extract::>()?; + FieldSource::Array(arr.as_array()) + } + }, + }, + }; + + // force every geometry to have a corresponding by value, errors if nulls + let by = match (&prepared, ctx.pyby) { + (Some(df), Some(_)) => { + let ca = df.column("by").unwrap().str().unwrap(); + + if ca.null_count() > 0 { + return Err(PyRuntimeError::new_err( + "Found nulls in `by` column. Consider droppping them.", + )); + } + + let by_vec = ca + .downcast_iter() + .flat_map(|a| a.values_iter()) + .map(str::to_owned) + .collect::>(); + Some(by_vec) + } + _ => None, + }; + + let rctx = RasterizeContext { + raster_info: ctx.raster_info, + field, + by: by.as_deref(), + pixel_fn: ctx.pixel_fn, + background, + all_touched: ctx.opt_flags.all_touched, + }; + + ctx.geometry + .rasterize::(rctx) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))? + .pythonize(py, ctx.opt_flags) +} + +#[pyfunction] +#[pyo3(name = "_rusterize")] +#[pyo3(signature = (geometry, raw_raster_info, pypixel_fn, pydf=None, pyfield=None, pyby=None, pyburn=None, pybackground=None, pytouched=false, pyencoding="xarray", pydtype="float64"))] +#[allow(clippy::too_many_arguments)] +fn rusterize_py<'py>( + py: Python<'py>, + geometry: ParsedGeometry, + raw_raster_info: RawRasterInfo, + pypixel_fn: &'py str, + pydf: Option, + pyfield: Option<&'py str>, + pyby: Option<&'py str>, + pyburn: Option<&'py Bound>, + pybackground: Option<&'py Bound>, + pytouched: bool, + pyencoding: &str, + pydtype: &str, +) -> PyResult> { + let df: Option = pydf.map(|inner| inner.into()); + let raster_info = raw_raster_info + .build(geometry.as_ref()) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let pixel_fn = pypixel_fn + .parse::() + .map_err(|e| PyValueError::new_err(e.to_string()))?; + let opt_flags = OptionalFlags::new(pytouched, pyencoding); + + let ctx = Context { + geometry, + raster_info, + pixel_fn, + pybackground, + df, + pyfield, + pyby, + pyburn, + opt_flags, + }; + + dispatch_rusterize!( + pydtype, + pyencoding, + py, + ctx, + [ + ("uint8", u8), + ("uint16", u16), + ("uint32", u32), + ("uint64", u64), + ("int8", i8), + ("int16", i16), + ("int32", i32), + ("int64", i64), + ("float32", f32), + ("float64", f64) + ] + ) +} + +#[pymodule] +#[pyo3(name = "_rusterize")] +fn rusterize_wrap(m: &Bound) -> PyResult<()> { + m.add_function(wrap_pyfunction!(rusterize_py, m)?)?; + Ok(()) +} diff --git a/test/data/standard_output_sum.tif b/python/test/data/standard_output_sum.tif similarity index 100% rename from test/data/standard_output_sum.tif rename to python/test/data/standard_output_sum.tif diff --git a/test/data/standard_output_sum_custom_shape.tif b/python/test/data/standard_output_sum_custom_shape.tif similarity index 100% rename from test/data/standard_output_sum_custom_shape.tif rename to python/test/data/standard_output_sum_custom_shape.tif diff --git a/test/test_many.py b/python/test/test_many.py similarity index 100% rename from test/test_many.py rename to python/test/test_many.py diff --git a/python/zensical.toml b/python/zensical.toml new file mode 100644 index 0000000..323dd02 --- /dev/null +++ b/python/zensical.toml @@ -0,0 +1,68 @@ +[project] +site_name = "rusterize" +site_description = "Extremely fast geometry rasterization engine for Python, built in Rust" +site_url = "https://ttrotto.github.io/rusterize/" +repo_url = "https://github.com/ttrotto/rusterize" +docs_dir = "docs" +site_dir = "site" +copyright = """ +By the rusterize team +""" +nav = [ + { "Home" = "index.md" }, + { "Python" = "python.md" }, + { "API reference" = "api.md" }, +] + +[project.theme] +variant = "modern" +features = [ + "content.action.edit", + "content.action.view", + "content.code.annotate", + "content.code.copy", + "content.code.select", + "content.footnote.tooltips", + "content.tabs.link", + "content.tooltips", + "navigation.footer", + "navigation.indexes", + "navigation.instant", + "navigation.instant.prefetch", + "navigation.instant.progress", + "navigation.path", + "navigation.prune", + "navigation.top", + "navigation.tracking", + "search.highlight", +] + +[[project.theme.palette]] +accent = "blue" +scheme = "default" +toggle.icon = "lucide/sun" +toggle.name = "Switch to dark mode" + +[[project.theme.palette]] +accent = "purple" +scheme = "slate" +toggle.icon = "lucide/moon" +toggle.name = "Switch to light mode" + +[project.markdown_extensions] +"pymdownx.highlight" = {} +"pymdownx.superfences" = {} + +[project.plugins.mkdocstrings.handlers.python] +inventories = ["https://docs.python.org/3/objects.inv"] +paths = ["python"] + +[project.plugins.mkdocstrings.handlers.python.options] +docstring_style = "numpy" +show_source = false +show_root_heading = true +heading_level = 2 +show_overloads = false +parameter_headings = true +show_category_heading = true +show_symbol_type_heading = true diff --git a/rust-toolchain.toml b/rust-toolchain.toml index d7b477e..71d2892 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2026-01-09" +channel = "nightly-2026-04-01" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..2e9e10a --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "rusterize-rs" +description = { workspace = true } +version = "0.1.0" +authors = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } +keywords = { workspace = true } +readme = "README.md" +autoexamples = false + +[lib] +name = "rusterize" + +[dependencies] +thiserror = "2.0.18" +fixedbitset = "0.5.7" +geo = { workspace = true } +geo-types = { workspace = true } +ndarray = { version = "0.17.2", features = ["rayon"] } +num-traits = { workspace = true } +rayon = { workspace = true } + +polars = { version = "0.54.4", default-features = false, features = ["lazy", "simd", "performant", "nightly", "dtype-i8", "dtype-i16", "dtype-u8", "dtype-u16"], optional = true } +hotpath = { version = "0.19", optional = true } + +[features] +polars = ["dep:polars"] +# Profiling. Off by default = zero compile/runtime cost. +hotpath = ["hotpath/hotpath"] +hotpath-alloc = ["hotpath/hotpath-alloc"] + +[package.metadata.docs.rs] +features = ["polars"] diff --git a/rust/LICENSE b/rust/LICENSE new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/rust/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..3544cbe --- /dev/null +++ b/rust/README.md @@ -0,0 +1,57 @@ +## rusterize + +**rusterize** is an extremely fast, trait-based, rasterization engine for [`geo::Geometry`](https://docs.rs/geo/latest/geo/geometry/enum.Geometry.html). + +Geometries can be rasterized as a `DenseArray` (a materialized raster) or a `SparseArray`, containing the band/row/col value triplets +of all lazily burned pixels. A `SparseArray` can later be materialized into a raster, therefore avoiding large memory allocations +until it's actually needed. + +### Installation + +```toml +[dependencies] +rusterize-rs = "0.1" +``` + +To include [`polars`](https://docs.rs/polars/latest/polars/) support: + +```toml +[dependencies] +rusterize-rs = { version = "0.1", features = ["polars"] } +``` + +### Example + +Build a `RasterInfo` describing the output grid, wrap it in a `RasterizeContext`, then call `rasterize` on any slice of geometries. +The target type (`DenseArray` or `SparseArray`) selects the output encoding and data type. The `PixelFunction` dictates what happens +to overlapping pixels. `FieldSource` represents the values to be burned. + +```rust +use rusterize::prelude::*; +use geo::{Geometry, Point}; + +fn example() -> RusterizeResult<()> { + let raster_info = RasterInfoBuilder::new() + .extent(0.0, 0.0, 10.0, 10.0) + .resolution(1.0, 1.0) + .build()?; + + let geoms = vec![Geometry::Point(Point::new(5.0, 5.0)), Geometry::Point(Point::new(3.0, 3.0))]; + + let ctx = RasterizeContext { + raster_info, + field: FieldSource::Scalar(1.0_f64), + by: None, + pixel_fn: PixelFunction::Last, + background: f64::NAN, + all_touched: false, + }; + + let raster = geoms.rasterize::>(ctx)?; + Ok(()) +} +``` + +### Feature flags + +- `polars`: Adds `FieldSource::Column` for burning a [`polars`](https://docs.rs/polars/latest/polars/) column. diff --git a/rust/src/encoding/arrays.rs b/rust/src/encoding/arrays.rs new file mode 100644 index 0000000..d195193 --- /dev/null +++ b/rust/src/encoding/arrays.rs @@ -0,0 +1,204 @@ +use crate::{ + geo::raster::RasterInfo, + prelude::{RasterDtype, RasterizeContext}, + rasterization::pixel_functions::PixelFn, +}; +use ndarray::Array3; +use num_traits::Num; +use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; + +/// A materialized 3-dimensional array containing the burned geometries and spatial information. +pub struct DenseArray { + raster: Array3, + band_names: Vec, + raster_info: RasterInfo, +} + +impl DenseArray { + pub(crate) fn new(raster: Array3, band_names: Vec, raster_info: RasterInfo) -> Self { + Self { + raster, + band_names, + raster_info, + } + } + + /// Consume self and extract all fields of the DenseArray. + pub fn into_parts(self) -> (Array3, Vec, RasterInfo) { + (self.raster, self.band_names, self.raster_info) + } + + /// Sorted band names for the array. Defaults to "band_1" for a single band. + pub fn band_names(&self) -> &[String] { + &self.band_names + } + + /// Spatial information associated with the array. + pub fn raster_info(&self) -> &RasterInfo { + &self.raster_info + } +} + +/// Triplets of (row, col, value) for all bands as a contiguous block. +/// Used to store inside a [`SparseArray`]. +struct Triplets { + rows: Vec, + cols: Vec, + data: Vec, +} + +impl Triplets { + fn new(rows: Vec, cols: Vec, data: Vec) -> Self { + Self { rows, cols, data } + } +} + +/// A sparse array in COOordinate format storing the band/row/col value triplets. +/// of all burned [`geo::Geometry`]. +pub struct SparseArray { + band_names: Vec, + triplets: Triplets, + lengths: Vec, + raster_info: RasterInfo, + pxfn: PixelFn, + background: N, +} + +impl SparseArray +where + N: RasterDtype, +{ + pub(crate) fn new( + band_names: Vec, + rows: Vec, + cols: Vec, + data: Vec, + lengths: Vec, + ctx: RasterizeContext, + ) -> Self { + let pxfn = ctx.pixel_fn(); + let background = ctx.background; + + Self { + band_names, + triplets: Triplets::new(rows, cols, data), + lengths, + raster_info: ctx.raster_info, + pxfn, + background, + } + } + + /// Get the band names associated with this array. + pub fn band_names(&self) -> &[String] { + &self.band_names + } + + /// Materialize a [`ndarray::Array3`] from this. Drops spatial information. + pub fn build_array(&self) -> Array3 { + let mut raster = self.raster_info.build_raster(self.band_names.len(), self.background); + + let rows = self.triplets.rows.as_slice(); + let cols = self.triplets.cols.as_slice(); + let data = self.triplets.data.as_slice(); + + // per-band start offset into the contiguous triplet arrays + let offsets = self + .lengths + .iter() + .scan(0, |state, &n| { + let start = *state; + *state += n; + Some(start) + }) + .collect::>(); + + raster + .outer_iter_mut() + .into_par_iter() + .zip(self.lengths.par_iter()) + .zip(offsets.par_iter()) + .for_each(|((mut band, n), &off)| { + let end = off + *n; + let band_rows = &rows[off..end]; + let band_cols = &cols[off..end]; + let band_data = &data[off..end]; + + for ((band_row, band_col), band_value) in band_rows.iter().zip(band_cols).zip(band_data) { + (self.pxfn)( + &mut band, + *band_row as usize, + *band_col as usize, + *band_value, + self.background, + ); + } + }); + raster + } + + pub fn extent(&self) -> (f64, f64, f64, f64) { + ( + self.raster_info.xmin, + self.raster_info.ymin, + self.raster_info.xmax, + self.raster_info.ymax, + ) + } + + pub fn shape(&self) -> (usize, usize) { + (self.raster_info.nrows, self.raster_info.ncols) + } + + pub fn resolution(&self) -> (f64, f64) { + (self.raster_info.xres, self.raster_info.yres) + } + + /// Get spatial information associated with this array. + pub fn raster_info(&self) -> &RasterInfo { + &self.raster_info + } + + pub fn epsg(&self) -> Option { + self.raster_info.epsg + } +} + +#[cfg(feature = "polars")] +mod feature_gated { + use super::SparseArray; + use crate::prelude::PolarsHandler; + use num_traits::Num; + use polars::prelude::*; + + impl SparseArray + where + N: Num + Copy + PolarsHandler, + { + /// Convert this to a [`polars::prelude::DataFrame`]. + pub fn to_frame(&self) -> DataFrame { + let mut columns: Vec = Vec::new(); + + // add bands for multiband raster + if self.lengths.len() > 1 { + let bands = self + .lengths + .iter() + .enumerate() + .flat_map(|(i, v)| std::iter::repeat_n(i + 1, *v)) + .map(|b| b as u64) + .collect::>(); + let bands_column = Column::new("band".into(), bands); + columns.push(bands_column); + } + + columns.push(Column::new("row".into(), self.triplets.rows.as_slice())); + columns.push(Column::new("col".into(), self.triplets.cols.as_slice())); + + let height = self.triplets.data.len(); + columns.push(N::from_named_vec("values", &self.triplets.data)); + + DataFrame::new(height, columns).unwrap() + } + } +} diff --git a/src/encoding/writers.rs b/rust/src/encoding/writers.rs similarity index 78% rename from src/encoding/writers.rs rename to rust/src/encoding/writers.rs index 561b580..fc37d4d 100644 --- a/src/encoding/writers.rs +++ b/rust/src/encoding/writers.rs @@ -1,20 +1,17 @@ -/* Handle how pixels are recorded depending on the output format */ - use crate::{ encoding::arrays::SparseArray, - rasterization::{ - pixel_functions::PixelFn, - rusterize_impl::{PixelCache, RasterizeContext}, - }, + prelude::{RasterDtype, RasterizeContext}, + rasterization::{pixel_cache::PixelCache, pixel_functions::PixelFn}, }; use ndarray::ArrayViewMut2; use num_traits::Num; -pub trait PixelWriter { +/// Trait in charge of writing a pixel onto a [`DenseArray`] or [`SparseArray`]. +pub(crate) trait PixelWriter { fn write(&mut self, y: usize, x: usize, value: N, background: N); } -// writer for interior and exterior lines when `all_touched` is true (pass 1) +/// Writer for interior and exterior [`geo::Linestring`] when `all_touched` is true (pass 1). pub struct LineWriter<'a, W> { inner: &'a mut W, cache: &'a mut PixelCache, @@ -33,12 +30,12 @@ where } impl<'a, W> LineWriter<'a, W> { - pub fn new(inner: &'a mut W, cache: &'a mut PixelCache) -> Self { + pub(crate) fn new(inner: &'a mut W, cache: &'a mut PixelCache) -> Self { Self { inner, cache } } } -// writer for filling pixels after burning lines when `all_touched` is true (pass 2) +/// Writer for filling pixels after burning a [`geo::Linestring`] when `all_touched` is true (pass 2). pub struct FillWriter<'a, W> { inner: &'a mut W, cache: &'a mut PixelCache, @@ -57,12 +54,12 @@ where } impl<'a, W> FillWriter<'a, W> { - pub fn new(inner: &'a mut W, cache: &'a mut PixelCache) -> Self { + pub(crate) fn new(inner: &'a mut W, cache: &'a mut PixelCache) -> Self { Self { inner, cache } } } -// writer for dense output (numpy/xarray) +/// Writer for a [`DenseArray`]. pub struct DenseArrayWriter<'a, N> { band: ArrayViewMut2<'a, N>, pxfn: PixelFn, @@ -80,30 +77,30 @@ impl<'a, N: Num> DenseArrayWriter<'a, N> { } } -// convert sparse writer into a sparse array +/// Convert a [`SparseArrayWriter`] into a [`SparseArray`]. pub trait ToSparseArray { fn finish(self, ctx: RasterizeContext) -> SparseArray; } -// writer for sparse output (COOrdinate format) +/// Writer for a [`SparseArray`]. pub struct SparseArrayWriter { pub band_name: String, - pub rows: Vec, - pub cols: Vec, + pub rows: Vec, + pub cols: Vec, pub values: Vec, } impl PixelWriter for SparseArrayWriter { fn write(&mut self, y: usize, x: usize, value: N, _background: N) { - self.rows.push(y); - self.cols.push(x); + self.rows.push(y as u64); + self.cols.push(x as u64); self.values.push(value); } } impl ToSparseArray for SparseArrayWriter where - N: Num + Copy, + N: RasterDtype, { fn finish(self, ctx: RasterizeContext) -> SparseArray { let lengths = vec![self.values.len()]; @@ -114,7 +111,7 @@ where impl ToSparseArray for Vec> where - N: Num + Copy, + N: RasterDtype, { fn finish(self, ctx: RasterizeContext) -> SparseArray { let (band_names, rows, cols, data, lengths) = self.into_iter().fold( diff --git a/rust/src/error.rs b/rust/src/error.rs new file mode 100644 index 0000000..d1387c8 --- /dev/null +++ b/rust/src/error.rs @@ -0,0 +1,14 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum RusterizeError { + #[error("{0}")] + RuntimeError(&'static str), + #[error("{0}")] + ValueError(&'static str), + #[cfg(feature = "polars")] + #[error(transparent)] + Polars(#[from] polars::error::PolarsError), +} + +pub type RusterizeResult = std::result::Result; diff --git a/src/geo/edges.rs b/rust/src/geo/edges.rs similarity index 82% rename from src/geo/edges.rs rename to rust/src/geo/edges.rs index 5568028..697ebd6 100644 --- a/src/geo/edges.rs +++ b/rust/src/geo/edges.rs @@ -1,9 +1,8 @@ -/* Structure to contain information on geometry edges */ - use crate::geo::raster::RasterInfo; use geo_types::{LineString, Point}; -pub struct PointEdge { +/// Spatial coordinates of a single [`geo::Point`]. +pub(crate) struct PointEdge { pub x: usize, pub y: usize, } @@ -14,7 +13,8 @@ impl PointEdge { } } -pub struct PolyEdge { +/// Represents the edges of a single [`geo::Polygon`]. +pub(crate) struct PolyEdge { pub ystart: usize, pub yend: usize, x0: f64, @@ -45,9 +45,9 @@ impl PolyEdge { } } - // sort by x intersection at y line + /// Sort by x intersection at y line. #[inline] - pub fn intersect_at(&self, yline: usize) -> f64 { + pub(crate) fn intersect_at(&self, yline: usize) -> f64 { // y line center let center_y = yline as f64 + 0.5; @@ -55,7 +55,8 @@ impl PolyEdge { } } -pub struct LineEdge { +/// Represent the edges of a single [`geo::Linestring`]. +pub(crate) struct LineEdge { pub x0: f64, pub y0: f64, pub x1: f64, @@ -75,7 +76,7 @@ impl LineEdge { } } -pub fn extract_point(edges: &mut Vec, point: &Point, raster_info: &RasterInfo) { +pub(crate) fn extract_point(edges: &mut Vec, point: &Point, raster_info: &RasterInfo) { // world-to-pixel conversion let x = (point.x() - raster_info.xmin) / raster_info.xres; let y = (raster_info.ymax - point.y()) / raster_info.yres; @@ -86,7 +87,7 @@ pub fn extract_point(edges: &mut Vec, point: &Point, raster_info } } -pub fn extract_ring(edges: &mut Vec, line: &LineString, raster_info: &RasterInfo) { +pub(crate) fn extract_ring(edges: &mut Vec, line: &LineString, raster_info: &RasterInfo) { let rows = raster_info.nrows as f64; for w in line.0.windows(2) { // world-to-pixel conversion @@ -108,7 +109,7 @@ pub fn extract_ring(edges: &mut Vec, line: &LineString, raster_in } } -pub fn extract_line(edges: &mut Vec, line: &LineString, raster_info: &RasterInfo) { +pub(crate) fn extract_line(edges: &mut Vec, line: &LineString, raster_info: &RasterInfo) { let rows = raster_info.nrows as f64; let cols = raster_info.ncols as f64; let is_closed = line.is_closed(); diff --git a/rust/src/geo/raster.rs b/rust/src/geo/raster.rs new file mode 100644 index 0000000..621cec6 --- /dev/null +++ b/rust/src/geo/raster.rs @@ -0,0 +1,159 @@ +use crate::error::{RusterizeError, RusterizeResult}; +use geo::{BoundingRect, Geometry, Rect, coord}; +use ndarray::Array3; +use num_traits::Num; + +/// Contains the spatial information associated with the burned [`geo::Geometry`]. +#[derive(Clone)] +pub struct RasterInfo { + pub ncols: usize, + pub nrows: usize, + pub xmin: f64, + pub xmax: f64, + pub ymin: f64, + pub ymax: f64, + pub xres: f64, + pub yres: f64, + pub epsg: Option, +} + +impl RasterInfo { + pub(crate) fn build_raster(&self, bands: usize, background: N) -> Array3 + where + N: Num + Copy, + { + Array3::from_elem((bands, self.nrows, self.ncols), background) + } +} + +/// Builder for a [`RasterInfo`] instance. +/// If extent is not provided, it can be inferred from the [`geo::Geometry`] when building it. +/// In this case, a half-pixel buffer is applied to avoid missing points on the border. +/// The logics dictating the final spatial properties of the rasterized geometries follow those of GDAL. +#[derive(Default)] +pub struct RasterInfoBuilder { + shape: Option<[usize; 2]>, + extent: Option<[f64; 4]>, + resolution: Option<[f64; 2]>, + tap: bool, + epsg: Option, +} + +impl RasterInfoBuilder { + pub fn new() -> Self { + RasterInfoBuilder::default() + } + + /// Build into a [`RasterInfo`] with user-defined extent. + pub fn build(self) -> RusterizeResult { + match self.extent { + Some(extent) => self.finalize(extent, false), + None => Err(RusterizeError::RuntimeError( + "Extent must be provided for construction. \ + Use `build_with()` to infer extent from geometries.", + )), + } + } + + /// Same as `build`, but infer extent from the geometry. + pub fn build_with(self, geoms: &[Geometry]) -> RusterizeResult { + if self.extent.is_some() { + return Err(RusterizeError::RuntimeError( + "Extent must be inferred from geometries for construction. \ + Use `build()` to provide a custom extent.", + )); + } + + let bounds = geoms.iter().fold(None, |acc, geom| { + let bounds = geom.bounding_rect(); + + match (acc, bounds) { + (None, None) => None, + (None, Some(r)) | (Some(r), None) => Some(r), + (Some(r1), Some(r2)) => Some(Rect::new( + coord! { x: r1.min().x.min(r2.min().x), y: r1.min().y.min(r2.min().y) }, + coord! { x: r1.max().x.max(r2.max().x), y: r1.max().y.max(r2.max().y) }, + )), + } + }); + + if let Some(b) = bounds { + self.finalize([b.min().x, b.min().y, b.max().x, b.max().y], true) + } else { + return Err(RusterizeError::RuntimeError("Cannot infer bounding box from geometry.")); + } + } + + fn finalize( + self, + [mut xmin, mut ymin, mut xmax, mut ymax]: [f64; 4], + inferred: bool, + ) -> RusterizeResult { + if self.shape.is_none() && self.resolution.is_none() { + return Err(RusterizeError::ValueError( + "Must set at least one of `shape` or `resolution`", + )); + } + let has_shape = self.shape.is_some(); + let has_res = self.resolution.is_some(); + let [mut nrows, mut ncols] = self.shape.unwrap_or_default(); + let [mut xres, mut yres] = self.resolution.unwrap_or_default(); + + if inferred && !self.tap && has_res { + xmin -= xres / 2.0; + xmax += xres / 2.0; + ymin -= yres / 2.0; + ymax += yres / 2.0; + } + if !has_res { + xres = (xmax - xmin) / ncols as f64; + yres = (ymax - ymin) / nrows as f64; + } else if self.tap { + xmin = (xmin / xres).floor() * xres; + xmax = (xmax / xres).ceil() * xres; + ymin = (ymin / yres).floor() * yres; + ymax = (ymax / yres).ceil() * yres; + } + if !has_shape { + nrows = (0.5 + (ymax - ymin) / yres) as usize; + ncols = (0.5 + (xmax - xmin) / xres) as usize; + } + + Ok(RasterInfo { + ncols, + nrows, + xmin, + xmax, + ymin, + ymax, + xres, + yres, + epsg: self.epsg, + }) + } + + pub fn shape(mut self, nrows: usize, ncols: usize) -> Self { + self.shape = Some([nrows, ncols]); + self + } + + pub fn extent(mut self, xmin: f64, ymin: f64, xmax: f64, ymax: f64) -> Self { + self.extent = Some([xmin, ymin, xmax, ymax]); + self + } + + pub fn resolution(mut self, xres: f64, yres: f64) -> Self { + self.resolution = Some([xres, yres]); + self + } + + pub fn with_target_align_pixel(mut self) -> Self { + self.tap = true; + self + } + + pub fn epsg(mut self, epsg: u16) -> Self { + self.epsg = Some(epsg); + self + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..a7102a0 --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,39 @@ +#![doc = include_str!("../README.md")] + +#[doc(hidden)] +pub mod error; +#[doc(hidden)] +pub mod prelude; +#[doc(hidden)] +pub mod rasterize; +#[doc(hidden)] +pub mod geo { + pub(crate) mod edges; + pub mod raster; +} +#[doc(hidden)] +pub mod rasterization { + pub(crate) mod burn_geometry; + pub(crate) mod burners; + pub(crate) mod pixel_cache; + pub mod pixel_functions; +} +#[doc(hidden)] +pub mod encoding { + pub mod arrays; + pub(crate) mod writers; +} + +#[doc(inline)] +pub use crate::{ + encoding::arrays::{DenseArray, SparseArray}, + error::{RusterizeError, RusterizeResult}, + geo::raster::{RasterInfo, RasterInfoBuilder}, + prelude::{NaNAware, RasterDtype, RasterizeContext}, + rasterization::pixel_functions::PixelFunction, + rasterize::{ArrayBuilder, FieldSource, Rasterize}, +}; + +#[cfg(feature = "polars")] +#[doc(inline)] +pub use crate::prelude::PolarsHandler; diff --git a/rust/src/prelude.rs b/rust/src/prelude.rs new file mode 100644 index 0000000..9fc4b4f --- /dev/null +++ b/rust/src/prelude.rs @@ -0,0 +1,119 @@ +use crate::rasterization::pixel_functions::PixelFn; +use num_traits::Num; +use std::ops::AddAssign; + +pub use crate::{ + encoding::arrays::{DenseArray, SparseArray}, + error::{RusterizeError, RusterizeResult}, + geo::raster::{RasterInfo, RasterInfoBuilder}, + rasterization::pixel_functions::PixelFunction, + rasterize::{ArrayBuilder, FieldSource, Rasterize}, +}; + +/// Trait to handle NaN check for dtypes that don't have it. +pub trait NaNAware { + fn is_nan(&self) -> bool; +} + +impl NaNAware for f32 { + fn is_nan(&self) -> bool { + f32::is_nan(*self) + } +} + +impl NaNAware for f64 { + fn is_nan(&self) -> bool { + f64::is_nan(*self) + } +} + +macro_rules! impl_maybe_nan_for_int { + ($($t:ty),*) => { + $(impl NaNAware for $t { + fn is_nan(&self) -> bool { + false + } + })* + }; +} + +impl_maybe_nan_for_int!(u8, u16, u32, u64, i8, i16, i32, i64); + +/// Handle polars dtypes and conversions. +#[cfg(feature = "polars")] +pub trait PolarsHandler: polars::prelude::Literal + Send + Sync { + type ChunkedArrayType: polars::prelude::PolarsNumericType + 'static; + fn polars_dtype() -> polars::prelude::DataType; + fn from_named_vec(name: &str, vec: &[Self]) -> polars::prelude::Column + where + Self: Sized; +} + +#[cfg(feature = "polars")] +macro_rules! impl_polars_handler { + ($($t:ty => { dtype: $dtype:expr, catype: $catype:ty }),* $(,)?) => { + $( + impl PolarsHandler for $t { + type ChunkedArrayType = $catype; + fn polars_dtype() -> polars::prelude::DataType { $dtype } + fn from_named_vec(name: &str, vec: &[Self]) -> polars::prelude::Column { + polars::prelude::Column::new(name.into(), vec) + } + } + )* + }; +} + +#[cfg(feature = "polars")] +impl_polars_handler! { + f64 => { dtype: polars::prelude::DataType::Float64, catype: polars::prelude::Float64Type }, + f32 => { dtype: polars::prelude::DataType::Float32, catype: polars::prelude::Float32Type }, + u8 => { dtype: polars::prelude::DataType::UInt8, catype: polars::prelude::UInt8Type }, + i8 => { dtype: polars::prelude::DataType::Int8, catype: polars::prelude::Int8Type }, + u16 => { dtype: polars::prelude::DataType::UInt16, catype: polars::prelude::UInt16Type }, + i16 => { dtype: polars::prelude::DataType::Int16, catype: polars::prelude::Int16Type }, + u32 => { dtype: polars::prelude::DataType::UInt32, catype: polars::prelude::UInt32Type }, + i32 => { dtype: polars::prelude::DataType::Int32, catype: polars::prelude::Int32Type }, + u64 => { dtype: polars::prelude::DataType::UInt64, catype: polars::prelude::UInt64Type }, + i64 => { dtype: polars::prelude::DataType::Int64, catype: polars::prelude::Int64Type }, +} + +/// Bound rasterization to a dtype. +#[cfg(feature = "polars")] +pub trait RasterDtype: Num + Copy + AddAssign + PartialOrd + NaNAware + PolarsHandler {} +#[cfg(feature = "polars")] +impl RasterDtype for N {} +#[cfg(not(feature = "polars"))] +pub trait RasterDtype: Num + Copy + AddAssign + PartialOrd + NaNAware + Send + Sync {} +#[cfg(not(feature = "polars"))] +impl RasterDtype for N {} + +/// Spatial + value context handed to the rasterization engine. +#[derive(Clone)] +pub struct RasterizeContext<'a, N> { + /// The spatial information of the final raster. + pub raster_info: RasterInfo, + /// The values to burn. + pub field: FieldSource<'a, N>, + /// Specify the grouping of the geometries into multiple bands in the final raster. None is no grouping. + /// For this to work, `by` has to have the same length of the geometries. + pub by: Option<&'a [String]>, + /// Describes what happens to overlapping pixels. + pub pixel_fn: PixelFunction, + pub background: N, + /// Flags whether all pixels touching the geometry should be burned. + pub all_touched: bool, +} + +impl<'a, N> RasterizeContext<'a, N> { + pub(crate) fn pixel_fn(&self) -> PixelFn + where + N: Num + Copy + AddAssign + PartialOrd + NaNAware, + { + self.pixel_fn.to_function() + } + + pub(crate) fn requires_dedup(&self) -> bool { + self.all_touched && matches!(self.pixel_fn, PixelFunction::Sum | PixelFunction::Count) + } +} diff --git a/src/rasterization/burn_geometry.rs b/rust/src/rasterization/burn_geometry.rs similarity index 50% rename from src/rasterization/burn_geometry.rs rename to rust/src/rasterization/burn_geometry.rs index b8f4cf4..b2e30c6 100644 --- a/src/rasterization/burn_geometry.rs +++ b/rust/src/rasterization/burn_geometry.rs @@ -1,5 +1,3 @@ -/* Rasterize a single geometry */ - use crate::{ encoding::writers::{FillWriter, LineWriter, PixelWriter}, geo::{ @@ -8,26 +6,27 @@ use crate::{ }, rasterization::{ burners::{LineBurnStrategy, burn_point, burn_polygon}, - rusterize_impl::PixelCache, + pixel_cache::PixelCache, }, }; use geo_types::{Geometry, GeometryCollection, LineString, MultiLineString, MultiPolygon, Polygon}; use num_traits::Num; -pub trait Burn +/// Burn a single [`geo::Geometry`] onto a [`DenseArray`] or [`SparseArray`]. +pub(crate) trait Burn where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T); + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N); } -impl Burn for Geometry +impl Burn for Geometry where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { match self { Geometry::Point(geom) => { let mut pointedge = Vec::new(); @@ -48,29 +47,38 @@ where Geometry::LineString(geom) => geom.burn::(raster_info, field_value, writer, background), Geometry::MultiLineString(geom) => geom.burn::(raster_info, field_value, writer, background), Geometry::GeometryCollection(geom) => geom.burn::(raster_info, field_value, writer, background), - _ => (), // not a shapely geometry + Geometry::Rect(geom) => geom + .to_polygon() + .burn::(raster_info, field_value, writer, background), + Geometry::Triangle(geom) => geom + .to_polygon() + .burn::(raster_info, field_value, writer, background), + Geometry::Line(geom) => { + let linestring = LineString::new(vec![geom.start, geom.end]); + linestring.burn::(raster_info, field_value, writer, background) + } } } } -impl Burn for GeometryCollection +impl Burn for GeometryCollection where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { for geom in self { geom.burn::(raster_info, field_value, writer, background) } } } -impl Burn for Polygon +impl Burn for Polygon where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { // extract edges let mut polyedges = Vec::new(); extract_ring(&mut polyedges, self.exterior(), raster_info); @@ -86,7 +94,7 @@ where extract_line(&mut linedges, hole, raster_info); } - let pixel_cache = if S::REQUIRES_DEDUPLICATION { + let pixel_cache = if S::REQUIRES_DEDUP { Some(PixelCache::new(&linedges)) } else { None @@ -97,7 +105,7 @@ where (None, None) }; - handle_polygon::( + handle_polygon::( raster_info, &mut polyedges, linedges, @@ -109,12 +117,12 @@ where } } -impl Burn for MultiPolygon +impl Burn for MultiPolygon where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { // extract edges for all polygon let mut polyedges = Vec::new(); for polygon in self { @@ -134,7 +142,7 @@ where } } - let pixel_cache = if S::REQUIRES_DEDUPLICATION { + let pixel_cache = if S::REQUIRES_DEDUP { Some(PixelCache::new(&linedges)) } else { None @@ -145,7 +153,7 @@ where (None, None) }; - handle_polygon::( + handle_polygon::( raster_info, &mut polyedges, linedges, @@ -157,18 +165,18 @@ where } } -impl Burn for LineString +impl Burn for LineString where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { // extract exterior and interior lines let mut linedges = Vec::new(); extract_line(&mut linedges, self, raster_info); // handle cases when pixels are not squares - if raster_info.xres != raster_info.yres || S::REQUIRES_DEDUPLICATION { + if raster_info.xres != raster_info.yres || S::REQUIRES_DEDUP { let mut cache = PixelCache::new(&linedges); let mut line_writer = LineWriter::new(writer, &mut cache); S::burn_line(&linedges, raster_info, field_value, &mut line_writer, background) @@ -178,12 +186,12 @@ where } } -impl Burn for MultiLineString +impl Burn for MultiLineString where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - fn burn(&self, raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) { + fn burn(&self, raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) { // extract all edges first to avoid overlaps when a line ends at the beginning of another let mut linedges = Vec::new(); for line in self { @@ -191,7 +199,7 @@ where } // handle cases when pixels are not squares - if raster_info.xres != raster_info.yres || S::REQUIRES_DEDUPLICATION { + if raster_info.xres != raster_info.yres || S::REQUIRES_DEDUP { let mut cache = PixelCache::new(&linedges); let mut line_writer = LineWriter::new(writer, &mut cache); S::burn_line(&linedges, raster_info, field_value, &mut line_writer, background) @@ -201,17 +209,17 @@ where } } -fn handle_polygon( +fn handle_polygon( raster_info: &RasterInfo, polyedges: &mut Vec, linedges: Option>, pixel_cache: &mut Option, - field_value: T, + field_value: N, writer: &mut W, - background: T, + background: N, ) where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, S: LineBurnStrategy, { match (linedges, pixel_cache) { @@ -233,3 +241,139 @@ fn handle_polygon( } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{encoding::writers::PixelWriter, rasterization::burners::Standard}; + use geo_types::{Line, MultiPoint, Point, Rect, Triangle, coord}; + + // records every pixel write so we can assert what each geometry burns + #[derive(Default)] + struct Collector { + cells: Vec<(usize, usize, f64)>, + } + impl PixelWriter for Collector { + fn write(&mut self, y: usize, x: usize, value: f64, _background: f64) { + self.cells.push((y, x, value)); + } + } + + // 10x10, world coord (x, y) -> (row = ymax - y, col = x) + fn raster_10() -> RasterInfo { + RasterInfo { + ncols: 10, + nrows: 10, + xmin: 0.0, + xmax: 10.0, + ymin: 0.0, + ymax: 10.0, + xres: 1.0, + yres: 1.0, + epsg: None, + } + } + + fn burn(geom: Geometry) -> Vec<(usize, usize, f64)> { + let ri = raster_10(); + let mut writer = Collector::default(); + geom.burn::(&ri, 1.0, &mut writer, 0.0); + writer.cells + } + + #[test] + fn point_burns_exact_cell() { + let cells = burn(Geometry::Point(Point::new(2.5, 7.5))); + assert_eq!(cells, vec![(2, 2, 1.0)]); + } + + #[test] + fn multipoint_burns_each_point() { + let mp = MultiPoint::new(vec![Point::new(1.5, 8.5), Point::new(5.5, 3.5)]); + let mut cells = burn(Geometry::MultiPoint(mp)); + cells.sort_by_key(|&(y, x, _)| (y, x)); + assert_eq!(cells, vec![(1, 1, 1.0), (6, 5, 1.0)]); + } + + #[test] + fn linestring_burns_cells() { + let ls = LineString::from(vec![(1.0, 5.0), (6.0, 5.0)]); + let cells = burn(Geometry::LineString(ls)); + assert!(!cells.is_empty(), "horizontal line should burn cells"); + assert!(cells.iter().all(|&(y, _, _)| y == 5)); + } + + #[test] + fn multilinestring_burns_cells() { + let mls = MultiLineString::new(vec![ + LineString::from(vec![(1.0, 8.0), (4.0, 8.0)]), + LineString::from(vec![(1.0, 2.0), (4.0, 2.0)]), + ]); + let cells = burn(Geometry::MultiLineString(mls)); + assert!(cells.iter().any(|&(y, _, _)| y == 2)); + assert!(cells.iter().any(|&(y, _, _)| y == 8)); + } + + #[test] + fn line_burns_diagonal() { + let line = Line::new(coord! { x: 1.0, y: 1.0 }, coord! { x: 6.0, y: 6.0 }); + let cells = burn(Geometry::Line(line)); + assert!(!cells.is_empty(), "diagonal line should burn cells"); + } + + #[test] + fn polygon_fills_interior() { + let poly = Polygon::new( + LineString::from(vec![(2.0, 2.0), (6.0, 2.0), (6.0, 6.0), (2.0, 6.0), (2.0, 2.0)]), + vec![], + ); + let cells = burn(Geometry::Polygon(poly)); + assert!(cells.len() > 4, "square polygon should fill several cells"); + } + + #[test] + fn multipolygon_fills_both() { + let p = |x: f64, y: f64| { + Polygon::new( + LineString::from(vec![(x, y), (x + 2.0, y), (x + 2.0, y + 2.0), (x, y + 2.0), (x, y)]), + vec![], + ) + }; + let mp = MultiPolygon::new(vec![p(1.0, 1.0), p(6.0, 6.0)]); + let cells = burn(Geometry::MultiPolygon(mp)); + assert!(cells.iter().any(|&(y, _, _)| y >= 6)); // lower polygon + assert!(cells.iter().any(|&(y, _, _)| y <= 3)); // upper polygon + } + + #[test] + fn rect_fills_interior() { + let rect = Rect::new(coord! { x: 1.0, y: 1.0 }, coord! { x: 5.0, y: 5.0 }); + let cells = burn(Geometry::Rect(rect)); + assert!(cells.len() > 4, "rect should fill several cells"); + } + + #[test] + fn triangle_fills_interior() { + let tri = Triangle::new( + coord! { x: 1.0, y: 1.0 }, + coord! { x: 6.0, y: 1.0 }, + coord! { x: 1.0, y: 6.0 }, + ); + let cells = burn(Geometry::Triangle(tri)); + assert!(!cells.is_empty(), "triangle should fill cells"); + } + + #[test] + fn geometry_collection_burns_all_members() { + let gc = GeometryCollection(vec![ + Geometry::Point(Point::new(2.5, 7.5)), + Geometry::Polygon(Polygon::new( + LineString::from(vec![(5.0, 1.0), (8.0, 1.0), (8.0, 4.0), (5.0, 4.0), (5.0, 1.0)]), + vec![], + )), + ]); + let cells = burn(Geometry::GeometryCollection(gc)); + assert!(cells.contains(&(2, 2, 1.0)), "point member should burn"); + assert!(cells.iter().any(|&(y, _, _)| y >= 6), "polygon member should fill"); + } +} diff --git a/src/rasterization/burners.rs b/rust/src/rasterization/burners.rs similarity index 83% rename from src/rasterization/burners.rs rename to rust/src/rasterization/burners.rs index dd6f99b..8c725fd 100644 --- a/src/rasterization/burners.rs +++ b/rust/src/rasterization/burners.rs @@ -1,6 +1,6 @@ /* -The AllTouched strategy has been adapted from GDAL: https://github.com/OSGeo/gdal/blob/63396dbf42999441478e036ebb145725de09f7ce/alg/llrasterize.cpp#L407 -Primarily for output consistency. +The AllTouched strategy has been adapted from GDAL: +https://github.com/OSGeo/gdal/blob/63396dbf42999441478e036ebb145725de09f7ce/alg/llrasterize.cpp#L407 */ use crate::{ @@ -20,26 +20,28 @@ pub struct AllTouchedBase; pub type AllTouched = AllTouchedBase; pub type AllTouchedCached = AllTouchedBase; -pub trait LineBurnStrategy { +/// Strategy for burning a single [`geo::Linestring`]. +/// `all_touched` burns all pixels that are touched by the line. +pub(crate) trait LineBurnStrategy { const IS_ALL_TOUCHED: bool; - const REQUIRES_DEDUPLICATION: bool; + const REQUIRES_DEDUP: bool; - fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) + fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) where - T: Num + Copy, - W: PixelWriter; + N: Num + Copy, + W: PixelWriter; } impl LineBurnStrategy for Standard { const IS_ALL_TOUCHED: bool = false; - const REQUIRES_DEDUPLICATION: bool = DEDUP; + const REQUIRES_DEDUP: bool = DEDUP; - fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) + #[cfg_attr(feature = "hotpath", hotpath::measure)] + fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - // early return if empty if linedges.is_empty() { return; } @@ -91,14 +93,14 @@ impl LineBurnStrategy for Standard { impl LineBurnStrategy for AllTouchedBase { const IS_ALL_TOUCHED: bool = true; - const REQUIRES_DEDUPLICATION: bool = DEDUP; + const REQUIRES_DEDUP: bool = DEDUP; - fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: T, writer: &mut W, background: T) + #[cfg_attr(feature = "hotpath", hotpath::measure)] + fn burn_line(linedges: &[LineEdge], raster_info: &RasterInfo, field_value: N, writer: &mut W, background: N) where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - // early return if empty if linedges.is_empty() { return; } @@ -244,36 +246,37 @@ impl LineBurnStrategy for AllTouchedBase { } } -pub fn burn_point(pointedges: &[PointEdge], field_value: T, writer: &mut W, background: T) +#[cfg_attr(feature = "hotpath", hotpath::measure)] +pub(super) fn burn_point(pointedges: &[PointEdge], field_value: N, writer: &mut W, background: N) where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { for point in pointedges { writer.write(point.y, point.x, field_value, background); } } -pub fn burn_polygon( +#[cfg_attr(feature = "hotpath", hotpath::measure)] +pub(super) fn burn_polygon( polyedges: &mut Vec, raster_info: &RasterInfo, - field_value: T, + field_value: N, writer: &mut W, - background: T, + background: N, ) where - T: Num + Copy, - W: PixelWriter, + N: Num + Copy, + W: PixelWriter, { - // early return if empty if polyedges.is_empty() { return; } // sort edges by y coordinate - polyedges.sort_by(|a, b| a.ystart.cmp(&b.ystart)); + polyedges.sort_unstable_by(|a, b| a.ystart.cmp(&b.ystart)); - // start with first y line - let mut yline = polyedges.first().unwrap().ystart; + // start with first y line (polyedges is non-empty, checked above) + let mut yline = polyedges[0].ystart; let mut active_edges = Vec::new(); @@ -296,7 +299,7 @@ pub fn burn_polygon( } // sort by y line - active_edges.sort_by(|a, b| a.x_at_yline.partial_cmp(&b.x_at_yline).unwrap()); + active_edges.sort_unstable_by(|a, b| a.x_at_yline.total_cmp(&b.x_at_yline)); // fill pixels for chunk in active_edges.chunks_exact(2) { diff --git a/rust/src/rasterization/pixel_cache.rs b/rust/src/rasterization/pixel_cache.rs new file mode 100644 index 0000000..80325f6 --- /dev/null +++ b/rust/src/rasterization/pixel_cache.rs @@ -0,0 +1,59 @@ +use crate::geo::edges::LineEdge; +use fixedbitset::FixedBitSet; + +/// Cache pixels when `all_touched` is the burn strategy and [`PixelFunction`] is `Sum` or `Count`. +/// Pass 1 -> burn interior and exterior lines and record visited pixels. +/// Pass 2 -> fill inner values and skip visited from pass 1. +pub(crate) struct PixelCache { + bits: FixedBitSet, + width: usize, + xmin: isize, + ymin: isize, +} + +impl PixelCache { + pub(crate) fn new(linedges: &[LineEdge]) -> Self { + let (xmin, ymin, xmax, ymax) = linedges.iter().fold( + (f64::MAX, f64::MAX, f64::MIN, f64::MIN), + |(xmin, ymin, xmax, ymax), edge| { + ( + xmin.min(edge.x0).min(edge.x1), + ymin.min(edge.y0).min(edge.y1), + xmax.max(edge.x0).max(edge.x1), + ymax.max(edge.y0).max(edge.y1), + ) + }, + ); + + let width = (xmax.floor() - xmin.floor()) as usize + 1; + let length = (ymax.floor() - ymin.floor()) as usize + 1; + + Self { + bits: FixedBitSet::with_capacity(width * length), + width, + xmin: xmin as isize, + ymin: ymin as isize, + } + } + + #[inline] + fn unravel_index(&self, x: usize, y: usize) -> usize { + let local_x = (x as isize - self.xmin) as usize; + let local_y = (y as isize - self.ymin) as usize; + local_y * self.width + local_x + } + + pub(crate) fn insert(&mut self, x: usize, y: usize) -> bool { + let idx = self.unravel_index(x, y); + if self.bits.contains(idx) { + return false; + } + self.bits.insert(idx); + true + } + + pub(crate) fn contains(&self, x: usize, y: usize) -> bool { + let idx = self.unravel_index(x, y); + self.bits.contains(idx) + } +} diff --git a/rust/src/rasterization/pixel_functions.rs b/rust/src/rasterization/pixel_functions.rs new file mode 100644 index 0000000..47456b4 --- /dev/null +++ b/rust/src/rasterization/pixel_functions.rs @@ -0,0 +1,123 @@ +use crate::prelude::*; +use ndarray::ArrayViewMut2; +use num_traits::Num; +use std::{ops::AddAssign, str::FromStr}; + +/// Supported functions to apply to overlapping pixels. +#[derive(Clone)] +pub enum PixelFunction { + Sum, + First, + Last, + Min, + Max, + Count, + Any, +} + +impl FromStr for PixelFunction { + type Err = RusterizeError; + + fn from_str(s: &str) -> RusterizeResult { + match s { + "sum" => Ok(Self::Sum), + "first" => Ok(Self::First), + "last" => Ok(Self::Last), + "min" => Ok(Self::Min), + "max" => Ok(Self::Max), + "count" => Ok(Self::Count), + "any" => Ok(Self::Any), + _ => Err(RusterizeError::ValueError("Unknown pixel function")), + } + } +} + +impl PixelFunction { + pub(crate) fn to_function(&self) -> PixelFn + where + N: Num + Copy + AddAssign + PartialOrd + NaNAware, + { + match self { + Self::Sum => sum_values, + Self::First => first_values, + Self::Last => last_values, + Self::Min => min_values, + Self::Max => max_values, + Self::Count => count_values, + Self::Any => any_values, + } + } +} + +/// On-demand function for overlapping pixels. +pub(crate) type PixelFn = fn(&mut ArrayViewMut2, usize, usize, N, N); + +/// Sum values or NaN/background. +fn sum_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: N, bg: N) +where + N: Num + AddAssign + NaNAware + Copy, +{ + if array[[y, x]] == bg || array[[y, x]].is_nan() || value.is_nan() { + array[[y, x]] = value; + } else { + array[[y, x]] += value; + } +} + +/// Set first value only if currently NaN/background. +fn first_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: N, bg: N) +where + N: Num + NaNAware + Copy, +{ + if array[[y, x]] == bg || array[[y, x]].is_nan() { + array[[y, x]] = value; + } +} + +/// Always set last value. +fn last_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: N, _bg: N) +where + N: Num + Copy, +{ + array[[y, x]] = value; +} + +/// Set value if smaller than current. +fn min_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: N, bg: N) +where + N: Num + NaNAware + PartialOrd + Copy, +{ + if array[[y, x]] == bg || array[[y, x]].is_nan() || array[[y, x]] > value { + array[[y, x]] = value; + } +} + +/// Set value if larger than current. +fn max_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: N, bg: N) +where + N: Num + NaNAware + PartialOrd + Copy, +{ + if array[[y, x]] == bg || array[[y, x]].is_nan() || array[[y, x]] < value { + array[[y, x]] = value; + } +} + +/// Count values at position. +fn count_values(array: &mut ArrayViewMut2, y: usize, x: usize, _value: N, bg: N) +where + N: Num + AddAssign + NaNAware + Copy, +{ + if array[[y, x]] == bg || array[[y, x]].is_nan() { + array[[y, x]] = N::one(); + } else { + array[[y, x]] += N::one(); + } +} + +/// Mark presence. +fn any_values(array: &mut ArrayViewMut2, y: usize, x: usize, _value: N, _bg: N) +where + N: Num, +{ + array[[y, x]] = N::one(); +} diff --git a/rust/src/rasterize.rs b/rust/src/rasterize.rs new file mode 100644 index 0000000..b9f3291 --- /dev/null +++ b/rust/src/rasterize.rs @@ -0,0 +1,314 @@ +use std::collections::BTreeMap; + +use crate::{ + encoding::{ + arrays::{DenseArray, SparseArray}, + writers::{DenseArrayWriter, PixelWriter, SparseArrayWriter, ToSparseArray}, + }, + error::{RusterizeError, RusterizeResult}, + prelude::{RasterDtype, RasterizeContext}, + rasterization::{ + burn_geometry::Burn, + burners::{AllTouched, AllTouchedCached, LineBurnStrategy, Standard}, + }, +}; +use geo::Geometry; +use ndarray::{ArrayView1, Axis}; +use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; + +#[cfg(feature = "polars")] +use polars::prelude::*; + +/// Source of values to burn onto a [`DenseArray`] or [`SparseArray`]. +#[derive(Clone)] +pub enum FieldSource<'a, N> { + /// A single constant value to burn. + Scalar(N), + /// An array of values each associated to a unique geometry. + Array(ArrayView1<'a, N>), + #[cfg(feature = "polars")] + Column(Column), +} + +impl<'a, N, T> From<&'a T> for FieldSource<'a, N> +where + T: AsRef<[N]> + ?Sized, +{ + fn from(v: &'a T) -> Self { + Self::Array(ArrayView1::from(v.as_ref())) + } +} + +macro_rules! dispatch { + ($all_touched:expr, $dedup:expr, $geoms:expr, $ctx:expr, $writer:expr, $idx:expr) => { + match ($all_touched, $dedup) { + (true, true) => process::($geoms, $ctx, $writer, $idx), + (true, false) => process::($geoms, $ctx, $writer, $idx), + (false, _) => process::($geoms, $ctx, $writer, $idx), + } + }; +} + +/// Rasterization trait. Attaches to anything that can be viewed as a [`geo::Geometry`] slice +/// and produces a [`DenseArray`] or a [`SparseArray`]. +pub trait Rasterize { + fn rasterize(&self, ctx: RasterizeContext) -> RusterizeResult; +} + +impl]> + ?Sized> Rasterize for T { + fn rasterize(&self, ctx: RasterizeContext) -> RusterizeResult { + A::build(self.as_ref(), ctx) + } +} + +/// [`DenseArray`] or [`SparseArray`] creation trait. +pub trait ArrayBuilder: Sized { + type Dtype: RasterDtype; + + fn build(geoms: &[Geometry], ctx: RasterizeContext) -> RusterizeResult; +} + +impl ArrayBuilder for DenseArray +where + N: RasterDtype, +{ + type Dtype = N; + + fn build(geoms: &[Geometry], ctx: RasterizeContext) -> RusterizeResult { + assert_matching_len(geoms.len(), &ctx.field, ctx.by)?; + + let dedup = ctx.requires_dedup(); + + match ctx.by { + Some(by) => { + let (groups, groups_idx) = group_keys(by); + let n_groups = groups.len(); + let mut band_names = Vec::with_capacity(n_groups); + let mut raster = ctx.raster_info.build_raster(n_groups, ctx.background); + + raster + .outer_iter_mut() + .into_par_iter() + .zip(groups.into_par_iter()) + .zip(groups_idx.into_par_iter()) + .map(|((band, name), idxs)| { + let mut writer = DenseArrayWriter::new(band, ctx.pixel_fn()); + + dispatch!(ctx.all_touched, dedup, geoms, &ctx, &mut writer, idxs.iter().copied()); + + name + }) + .collect_into_vec(&mut band_names); + + Ok(DenseArray::new(raster, band_names, ctx.raster_info)) + } + None => { + let band_names = vec![String::from("band_1")]; + let mut raster = ctx.raster_info.build_raster(1, ctx.background); + let mut writer = DenseArrayWriter::new(raster.index_axis_mut(Axis(0), 0), ctx.pixel_fn()); + + dispatch!(ctx.all_touched, dedup, geoms, &ctx, &mut writer, 0..geoms.len()); + + Ok(DenseArray::new(raster, band_names, ctx.raster_info)) + } + } + } +} + +impl ArrayBuilder for SparseArray +where + N: RasterDtype, +{ + type Dtype = N; + + fn build(geoms: &[Geometry], ctx: RasterizeContext) -> RusterizeResult { + assert_matching_len(geoms.len(), &ctx.field, ctx.by)?; + + let dedup = ctx.requires_dedup(); + + match ctx.by { + Some(by) => { + let (groups, groups_idx) = group_keys(by); + let mut writers = Vec::with_capacity(groups.len()); + + groups + .into_par_iter() + .zip(groups_idx.into_par_iter()) + .map(|(name, idxs)| { + let mut writer = SparseArrayWriter::new(name); + + dispatch!(ctx.all_touched, dedup, geoms, &ctx, &mut writer, idxs.iter().copied()); + + writer + }) + .collect_into_vec(&mut writers); + + Ok(writers.finish(ctx)) + } + None => { + let mut writer = SparseArrayWriter::new(String::from("band_1")); + + dispatch!(ctx.all_touched, dedup, geoms, &ctx, &mut writer, 0..geoms.len()); + + Ok(writer.finish(ctx)) + } + } + } +} + +/// Burn the geometries at `indices` onto `writer`. +/// `indices` is `0..len` for a single band, or the group's geometry indexes for multiband. +#[cfg_attr(feature = "hotpath", hotpath::measure)] +fn process(geoms: &[Geometry], ctx: &RasterizeContext, writer: &mut W, indices: I) +where + N: RasterDtype, + W: PixelWriter, + S: LineBurnStrategy, + I: Iterator, +{ + match &ctx.field { + FieldSource::Scalar(s) => { + for i in indices { + geoms[i].burn::(&ctx.raster_info, *s, writer, ctx.background); + } + } + FieldSource::Array(arr) => { + for i in indices { + geoms[i].burn::(&ctx.raster_info, arr[i], writer, ctx.background); + } + } + #[cfg(feature = "polars")] + FieldSource::Column(col) => { + let ca = col.as_materialized_series().unpack::().unwrap(); + if let Ok(slice) = ca.cont_slice() { + for i in indices { + geoms[i].burn::(&ctx.raster_info, slice[i], writer, ctx.background); + } + } else { + for i in indices { + if let Some(fv) = ca.get(i) { + geoms[i].burn::(&ctx.raster_info, fv, writer, ctx.background); + } + } + } + } + } +} + +/// Group `by` keys into (band name, geometry indexes) pairs, sorted by key. +fn group_keys(by: &[String]) -> (Vec, Vec>) { + let mut groups: BTreeMap<&String, Vec> = BTreeMap::new(); + for (i, key) in by.iter().enumerate() { + groups.entry(key).or_default().push(i); + } + groups.into_iter().map(|(k, idxs)| (k.clone(), idxs)).unzip() +} + +/// Validate length of geometry, field, and by. Must match. +fn assert_matching_len(n_geoms: usize, field: &FieldSource, by: Option<&[String]>) -> RusterizeResult<()> { + let field_len = match field { + FieldSource::Array(arr) => Some(arr.len()), + #[cfg(feature = "polars")] + FieldSource::Column(col) => Some(col.len()), + FieldSource::Scalar(_) => None, + }; + + if let Some(field_len) = field_len + && field_len != n_geoms + { + return Err(RusterizeError::ValueError("Geometry and field lengths must match")); + } + + if let Some(by) = by + && by.len() != n_geoms + { + return Err(RusterizeError::ValueError("Geometry and by lengths must match")); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{geo::raster::RasterInfo, rasterization::pixel_functions::PixelFunction}; + use geo::{Geometry, LineString, Polygon}; + + fn raster_4x4() -> RasterInfo { + RasterInfo { + ncols: 4, + nrows: 4, + xmin: 0.0, + xmax: 4.0, + ymin: 0.0, + ymax: 4.0, + xres: 1.0, + yres: 1.0, + epsg: None, + } + } + + #[test] + fn dense_burns_a_polygon() { + let poly = Polygon::new( + LineString::from(vec![(0.5, 0.5), (3.5, 0.5), (3.5, 3.5), (0.5, 3.5), (0.5, 0.5)]), + vec![], + ); + let geoms = vec![Geometry::Polygon(poly)]; + let ctx = RasterizeContext { + raster_info: raster_4x4(), + field: FieldSource::Scalar(1.0_f64), + by: None, + pixel_fn: PixelFunction::Last, + background: 0.0, + all_touched: false, + }; + + let out: DenseArray = geoms.rasterize(ctx).unwrap(); + let (raster, _, _) = out.into_parts(); + assert_eq!(raster.shape(), &[1, 4, 4]); + assert!( + raster.iter().any(|&v| v == 1.0), + "polygon should burn at least one cell" + ); + } + + #[test] + fn multiband_burns_only_its_group() { + use geo::Point; + use ndarray::Array1; + let geoms = vec![ + Geometry::Point(Point::new(0.5, 0.5)), + Geometry::Point(Point::new(3.5, 3.5)), + ]; + let by = [String::from("a"), String::from("b")]; + let vals = Array1::from(vec![1.0_f64, 2.0]); + let ctx = RasterizeContext { + raster_info: raster_4x4(), + field: FieldSource::Array(vals.view()), + by: Some(by.as_ref()), + pixel_fn: PixelFunction::Last, + background: 0.0, + all_touched: false, + }; + + let out: DenseArray = geoms.rasterize(ctx).unwrap(); + let (raster, _, _) = out.into_parts(); + assert_eq!(raster.shape(), &[2, 4, 4]); + + for band in raster.outer_iter() { + let has1 = band.iter().any(|&v| v == 1.0); + let has2 = band.iter().any(|&v| v == 2.0); + assert!(has1 ^ has2, "a band burned geometries outside its group"); + } + } + + #[test] + fn group_keys_groups_and_names() { + let by = [String::from("b"), String::from("a"), String::from("b")]; + let (names, idx) = group_keys(&by); + let mut pairs: Vec<(String, Vec)> = names.into_iter().zip(idx).collect(); + pairs.sort(); + assert_eq!(pairs, vec![("a".to_string(), vec![1]), ("b".to_string(), vec![0, 2])]); + } +} diff --git a/src/encoding/arrays.rs b/src/encoding/arrays.rs deleted file mode 100644 index 366a4d2..0000000 --- a/src/encoding/arrays.rs +++ /dev/null @@ -1,215 +0,0 @@ -/* Handle array encoding creation and conversion */ - -use crate::{ - encoding::{ - build_xarray::build_xarray, - pyarrays::{PyOut, PySparseArray, PySparseArrayTraits, Pythonize}, - }, - geo::raster::RasterInfo, - prelude::{OptionalFlags, PolarsHandler}, - rasterization::{pixel_functions::PixelFn, rusterize_impl::RasterizeContext}, -}; -use ndarray::Array3; -use num_traits::Num; -use numpy::{Element, IntoPyArray}; -use polars::prelude::*; -use pyo3::prelude::*; -use pyo3_polars::PyDataFrame; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; - -pub struct DenseArray { - raster: Array3, - band_names: Vec, - raster_info: RasterInfo, -} - -impl DenseArray { - pub fn new(raster: Array3, band_names: Vec, raster_info: RasterInfo) -> Self { - Self { - raster, - band_names, - raster_info, - } - } -} - -// conversion to python -impl Pythonize for DenseArray -where - N: Num + Element, -{ - fn pythonize(self, py: Python, opt_flags: OptionalFlags) -> PyResult { - let data = self.raster.into_pyarray(py); - - if opt_flags.with_xarray_output() { - let xarray = build_xarray(py, self.raster_info, data, self.band_names)?; - Ok(PyOut::Dense(xarray)) - } else { - Ok(PyOut::Dense(data.into_any())) - } - } -} - -// triplets of (row, col, value) for all bands as a contiguous block -struct Triplets { - rows: Vec, - cols: Vec, - data: Vec, -} - -impl Triplets { - fn new(rows: Vec, cols: Vec, data: Vec) -> Self { - Self { rows, cols, data } - } -} - -pub struct SparseArray { - band_names: Vec, - triplets: Triplets, - lengths: Vec, - raster_info: RasterInfo, - pxfn: PixelFn, - background: N, -} - -impl SparseArray -where - N: Num + Copy, -{ - pub fn new( - band_names: Vec, - rows: Vec, - cols: Vec, - data: Vec, - lengths: Vec, - ctx: RasterizeContext, - ) -> Self { - Self { - band_names, - triplets: Triplets::new(rows, cols, data), - lengths, - raster_info: ctx.raster_info, - pxfn: ctx.pixel_fn, - background: ctx.background, - } - } - - fn build_raster(&self) -> Array3 { - let mut raster = self.raster_info.build_raster(self.band_names.len(), self.background); - - let offset = 0; - let rows = self.triplets.rows.as_slice(); - let cols = self.triplets.cols.as_slice(); - let data = self.triplets.data.as_slice(); - - // works with single and multiband rasters - raster - .outer_iter_mut() - .zip(self.lengths.iter()) - .for_each(|(mut band, n)| { - let end = offset + *n; - let band_rows = &rows[offset..end]; - let band_cols = &cols[offset..end]; - let band_data = &data[offset..end]; - - for ((band_row, band_col), band_value) in band_rows.iter().zip(band_cols).zip(band_data) { - (self.pxfn)(&mut band, *band_row, *band_col, *band_value, self.background); - } - }); - raster - } -} - -impl PySparseArrayTraits for SparseArray -where - T: Num + Element + Copy + PolarsHandler, -{ - // estimated size of the materialized array - fn size_str(&self) -> String { - let bytesize = size_of_val(&self.background); - let bytes = bytesize * self.raster_info.nrows * self.raster_info.ncols; - - if bytes < 1000 { - format!("{} bytes", bytes) - } else if bytes < 1000 * 1000 { - format!("{:.2} KB", bytes as f32 / 1000.0) - } else if bytes < 1000 * 1000 * 1000 { - format!("{:.2} MB", bytes as f32 / (1000.0 * 1000.0)) - } else { - format!("{:.2} GB", bytes as f32 / (1000.0 * 1000.0 * 1000.0)) - } - } - - fn extent(&self) -> (&f64, &f64, &f64, &f64) { - ( - &self.raster_info.xmin, - &self.raster_info.ymin, - &self.raster_info.xmax, - &self.raster_info.ymax, - ) - } - - fn shape(&self) -> (&usize, &usize) { - (&self.raster_info.nrows, &self.raster_info.ncols) - } - - fn resolution(&self) -> (&f64, &f64) { - (&self.raster_info.yres, &self.raster_info.yres) - } - - fn epsg(&self) -> &Option { - &self.raster_info.epsg - } - - fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult> { - let raster = self.build_raster(); - - let data = raster.into_pyarray(py); - - build_xarray(py, self.raster_info.clone(), data, self.band_names.clone()) - } - - fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult> { - let raster = self.build_raster(); - Ok(raster.into_pyarray(py).into_any()) - } - - fn to_frame(&self) -> PyDataFrame { - let mut columns = Vec::new(); - - // add bands for multiband raster - if self.lengths.len() > 1 { - let bands = self - .lengths - .iter() - .enumerate() - .flat_map(|(i, v)| std::iter::repeat_n(i + 1, *v)) - .map(|b| b as u64) - .collect::>(); - let bands_column = Column::new("band".into(), bands); - columns.push(bands_column); - } - - let rows = self.triplets.rows.par_iter().map(|v| *v as u64).collect::>(); - let length = rows.len(); - columns.push(Column::new("row".into(), rows)); - - let cols = self.triplets.cols.par_iter().map(|v| *v as u64).collect::>(); - columns.push(Column::new("col".into(), cols)); - - columns.push(T::from_named_vec("data", &self.triplets.data)); - - let df = DataFrame::new(length, columns).unwrap(); - PyDataFrame(df) - } -} - -// conversion to python -impl Pythonize for SparseArray -where - T: Num + Element + Copy + PolarsHandler + 'static, -{ - fn pythonize(self, _py: Python, _opt_flags: OptionalFlags) -> PyResult { - Ok(PyOut::Sparse(PySparseArray(Arc::new(self)))) - } -} diff --git a/src/encoding/pyarrays.rs b/src/encoding/pyarrays.rs deleted file mode 100644 index 4c856cc..0000000 --- a/src/encoding/pyarrays.rs +++ /dev/null @@ -1,64 +0,0 @@ -/* Python conversion traits and wrappers */ - -use pyo3::prelude::*; -use pyo3_polars::PyDataFrame; -use std::sync::Arc; - -use crate::prelude::OptionalFlags; - -#[derive(IntoPyObject)] -pub enum PyOut<'py> { - Dense(Bound<'py, PyAny>), - Sparse(PySparseArray), -} - -pub trait Pythonize { - // convert rusterization output into python object - fn pythonize(self, py: Python, opt_flags: OptionalFlags) -> PyResult; -} - -pub trait PySparseArrayTraits: Send + Sync { - fn size_str(&self) -> String; - fn shape(&self) -> (&usize, &usize); - fn resolution(&self) -> (&f64, &f64); - fn extent(&self) -> (&f64, &f64, &f64, &f64); - fn epsg(&self) -> &Option; - fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult>; - fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult>; - fn to_frame(&self) -> PyDataFrame; -} - -#[pyclass(name = "SparseArray")] -pub struct PySparseArray(pub Arc); - -#[pymethods] -impl PySparseArray { - fn __repr__(&self) -> String { - let epsg = if let Some(epsg) = self.0.epsg() { - epsg.to_string() - } else { - String::from("None") - }; - - format!( - "SparseArray:\n- Shape: {:?}\n- Extent: {:?}\n- Resolution: {:?}\n- EPSG: {}\n- Estimated size: {}", - self.0.shape(), - self.0.extent(), - self.0.resolution(), - epsg, - self.0.size_str() - ) - } - - fn to_xarray<'py>(&self, py: Python<'py>) -> PyResult> { - self.0.to_xarray(py) - } - - fn to_numpy<'py>(&self, py: Python<'py>) -> PyResult> { - self.0.to_numpy(py) - } - - fn to_frame(&self) -> PyDataFrame { - self.0.to_frame() - } -} diff --git a/src/geo/raster.rs b/src/geo/raster.rs deleted file mode 100644 index 7f186dc..0000000 --- a/src/geo/raster.rs +++ /dev/null @@ -1,142 +0,0 @@ -/* Structure to contain information on raster data */ - -use geo::BoundingRect; -use geo_types::{Geometry, Rect, coord}; -use num_traits::Num; -use numpy::{ - IntoPyArray, PyArray1, - ndarray::{Array, Array3}, -}; -use pyo3::prelude::*; - -#[derive(Clone)] -pub struct RasterInfo { - pub ncols: usize, - pub nrows: usize, - pub xmin: f64, - pub xmax: f64, - pub ymin: f64, - pub ymax: f64, - pub xres: f64, - pub yres: f64, - pub epsg: Option, -} - -#[derive(FromPyObject)] -#[pyo3(from_item_all)] -pub struct RawRasterInfo { - ncols: usize, - nrows: usize, - xmin: f64, - ymin: f64, - xmax: f64, - ymax: f64, - xres: f64, - yres: f64, - with_user_extent: bool, - tap: bool, - epsg: Option, -} - -impl RasterInfo { - pub fn from(raw: RawRasterInfo, geoms: &[Geometry]) -> Self { - let mut info = RasterInfo { - ncols: raw.ncols, - nrows: raw.nrows, - xmin: raw.xmin, - xmax: raw.xmax, - ymin: raw.ymin, - ymax: raw.ymax, - xres: raw.xres, - yres: raw.yres, - epsg: raw.epsg, - }; - - if info.xmin.is_infinite() { - // list or numpy.ndarray do not carry bounding information - let bounds = geoms.iter().fold(None, |acc, geom| { - let bounds = geom.bounding_rect(); - - match (acc, bounds) { - (None, None) => None, - (None, Some(r)) | (Some(r), None) => Some(r), - (Some(r1), Some(r2)) => Some(Rect::new( - coord! { x: r1.min().x.min(r2.min().x), y: r1.min().y.min(r2.min().y) }, - coord! { x: r1.max().x.max(r2.max().x), y: r1.max().y.max(r2.max().y) }, - )), - } - }); - - if let Some(b) = bounds { - info.xmin = b.min().x; - info.ymin = b.min().y; - info.xmax = b.max().x; - info.ymax = b.max().y; - } else { - panic!("Cannot infer bounding box from geometry.") - } - } - - let has_res = info.xres != 0.0; - let has_shape = info.nrows != 0; - - // extent by half pixel if custom extent not provided - if !raw.with_user_extent && !raw.tap && has_res { - info.xmin -= info.xres / 2.0; - info.xmax += info.xres / 2.0; - info.ymin -= info.yres / 2.0; - info.ymax += info.yres / 2.0; - } - - if !has_res { - info.assign_resolution(); - } else if raw.tap && has_res { - info.xmin = (info.xmin / info.xres).floor() * info.xres; - info.xmax = (info.xmax / info.xres).ceil() * info.xres; - info.ymin = (info.ymin / info.yres).floor() * info.yres; - info.ymax = (info.ymax / info.yres).ceil() * info.yres; - } - - if !has_shape { - info.assign_shape(); - } - - info - } - - #[inline] - fn assign_shape(&mut self) { - self.nrows = (0.5 + (self.ymax - self.ymin) / self.yres) as usize; - self.ncols = (0.5 + (self.xmax - self.xmin) / self.xres) as usize - } - - #[inline] - fn assign_resolution(&mut self) { - self.xres = (self.xmax - self.xmin) / self.ncols as f64; - self.yres = (self.ymax - self.ymin) / self.nrows as f64; - } - - pub fn build_raster(&self, bands: usize, background: T) -> Array3 - where - T: Num + Copy, - { - Array3::from_elem((bands, self.nrows, self.ncols), background) - } - - // construct coordinates for xarray (start from pixel's center) - pub fn make_coordinates<'py>(&self, py: Python<'py>) -> (Bound<'py, PyArray1>, Bound<'py, PyArray1>) { - let y_coords = Array::range( - self.ymax - self.yres / 2.0, - self.ymax - self.nrows as f64 * self.yres, - -self.yres, - ) - .into_pyarray(py); - let x_coords = Array::range( - self.xmin + self.xres / 2.0, - self.xmin + self.ncols as f64 * self.xres, - self.xres, - ) - .into_pyarray(py); - (y_coords, x_coords) - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 280cab9..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,196 +0,0 @@ -mod allocator; -mod geo { - pub mod edges; - pub mod parse_geometry; - pub mod raster; -} -mod encoding { - pub mod arrays; - mod build_xarray; - pub mod pyarrays; - pub mod writers; -} -mod rasterization { - pub mod burn_geometry; - pub mod burners; - pub mod pixel_functions; - pub mod rusterize_impl; -} -mod prelude; - -use crate::{ - encoding::pyarrays::{PyOut, Pythonize}, - geo::parse_geometry::ParsedGeometry, - prelude::*, - rasterization::{ - pixel_functions::set_pixel_function, - rusterize_impl::{Rasterize, RasterizeContext}, - }, -}; -use geo::raster::{RasterInfo, RawRasterInfo}; -use ndarray::ArrayView1; -use num_traits::Num; -use numpy::{Element, PyReadonlyArray1}; -use polars::prelude::*; -use pyo3::{conversion::FromPyObject, prelude::*, types::PyAny}; -use pyo3_polars::PyDataFrame; - -macro_rules! dispatch_rusterize { - ( - $dtype:expr, $encoding:expr, $py:expr, $ctx:expr, - [ $( ($str_val:pat, $rust_type:ty) ),* ] - ) => { - match ($dtype, $encoding) { - $( - ($str_val, "xarray" | "numpy") => rusterize_impl::<$rust_type, Dense>($py, $ctx), - ($str_val, "sparse") => rusterize_impl::<$rust_type, Sparse>($py, $ctx), - )* - _ => unimplemented!("Invalid dtype or encoding provided."), - } - }; -} - -pub enum FieldSource<'a, N> { - Scalar(N), - Array(ArrayView1<'a, N>), - Column(Column), -} - -struct Context<'py> { - geometry: ParsedGeometry, - raster_info: RasterInfo, - pypixel_fn: &'py str, - pybackground: Option<&'py Bound<'py, PyAny>>, - df: Option, - pyfield: Option<&'py str>, - pyby: Option<&'py str>, - pyburn: Option<&'py Bound<'py, PyAny>>, - opt_flags: OptionalFlags, -} - -fn rusterize_impl<'py, T, R>(py: Python<'py>, ctx: Context<'py>) -> PyResult> -where - T: Num + Copy + PolarsHandler + Default + PixelOps + Element + for<'a> FromPyObject<'a, 'py>, - R: Rasterize, - R::Output: Pythonize, -{ - let background = ctx - .pybackground - .and_then(|inner| inner.extract().ok()) - .unwrap_or_default(); - let pixel_fn = set_pixel_function(ctx.pypixel_fn); - - let arr: PyReadonlyArray1; - let field: FieldSource = match (&ctx.df, ctx.pyfield) { - (Some(df), Some(f)) => { - let casted = df - .clone() - .lazy() - .select([col(f).cast(T::polars_dtype()).alias("field")]) - .collect() - .unwrap(); - FieldSource::Column(casted.column("field").unwrap().clone()) - } - _ => match ctx.pyburn { - None => FieldSource::Scalar(T::one()), - Some(b) => match b.extract::() { - Ok(scalar) => FieldSource::Scalar(scalar), - Err(_) => { - arr = b.extract::>()?; - FieldSource::Array(arr.as_array()) - } - }, - }, - }; - let by_col: Option = match (&ctx.df, ctx.pyby) { - (Some(df), Some(b)) => Some( - df.clone() - .lazy() - .select([col(b).cast(DataType::String).alias("by")]) - .collect() - .unwrap() - .column(b) - .unwrap() - .clone(), - ), - _ => None, - }; - let by = by_col.as_ref().and_then(|c| c.str().ok()); - - let rctx = RasterizeContext { - raster_info: ctx.raster_info, - geometry: ctx.geometry, - field, - pixel_fn, - background, - opt_flags: ctx.opt_flags, - }; - - let ret = R::rasterize(rctx, by); - ret.pythonize(py, ctx.opt_flags) -} - -#[pyfunction] -#[pyo3(name = "_rusterize")] -#[pyo3(signature = (geometry, raw_raster_info, pypixel_fn, pydf=None, pyfield=None, pyby=None, pyburn=None, pybackground=None, pytouched=false, pyencoding="xarray", pydtype="float64"))] -#[allow(clippy::too_many_arguments)] -fn rusterize_py<'py>( - py: Python<'py>, - geometry: ParsedGeometry, - raw_raster_info: RawRasterInfo, - pypixel_fn: &'py str, - pydf: Option, - pyfield: Option<&'py str>, - pyby: Option<&'py str>, - pyburn: Option<&'py Bound>, - pybackground: Option<&'py Bound>, - pytouched: bool, - pyencoding: &str, - pydtype: &str, -) -> PyResult> { - // extract dataframe - let df: Option = pydf.map(|inner| inner.into()); - - // construct raster info - let raster_info = RasterInfo::from(raw_raster_info, &geometry); - - // optional runtime flags - let opt_flags = OptionalFlags::new(pytouched, pyencoding, pypixel_fn); - - let ctx = Context { - geometry, - raster_info, - pypixel_fn, - pybackground, - df, - pyfield, - pyby, - pyburn, - opt_flags, - }; - - dispatch_rusterize!( - pydtype, - pyencoding, - py, - ctx, - [ - ("uint8", u8), - ("uint16", u16), - ("uint32", u32), - ("uint64", u64), - ("int8", i8), - ("int16", i16), - ("int32", i32), - ("int64", i64), - ("float32", f32), - ("float64", f64) - ] - ) -} - -#[pymodule] -fn rusterize(m: &Bound) -> PyResult<()> { - m.add_function(wrap_pyfunction!(rusterize_py, m)?)?; - Ok(()) -} diff --git a/src/prelude.rs b/src/prelude.rs deleted file mode 100644 index cb336f3..0000000 --- a/src/prelude.rs +++ /dev/null @@ -1,128 +0,0 @@ -use bitflags::bitflags; -use polars::prelude::*; -use std::ops::AddAssign; - -// handle polars dtypes and conversions -pub trait PolarsHandler: Literal + Send + Sync { - type ChunkedArrayType: PolarsNumericType + 'static; - fn polars_dtype() -> DataType; - fn from_named_vec(name: &str, vec: &[Self]) -> Column - where - Self: Sized; -} - -macro_rules! impl_polars_handler { - ($($t:ty => { - dtype: $dtype:expr, - catype: $catype:ty - }),* $(,)?) => { - $( - impl PolarsHandler for $t { - type ChunkedArrayType = $catype; - - fn polars_dtype() -> DataType { - $dtype - } - - fn from_named_vec(name: &str, vec: &[Self]) -> Column { - Column::new(name.into(), vec) - } - } - )* - }; -} - -impl_polars_handler! { - f64 => { dtype: DataType::Float64, catype: Float64Type}, - f32 => { dtype: DataType::Float32, catype: Float32Type}, - u8 => { dtype: DataType::UInt8, catype: UInt8Type}, - i8 => { dtype: DataType::Int8, catype: Int8Type}, - u16 => { dtype: DataType::UInt16, catype: UInt16Type}, - i16 => { dtype: DataType::Int16, catype: Int16Type}, - u32 => { dtype: DataType::UInt32, catype: UInt32Type}, - i32 => { dtype: DataType::Int32, catype: Int32Type}, - u64 => { dtype: DataType::UInt64, catype: UInt64Type}, - i64 => { dtype: DataType::Int64, catype: Int64Type}, -} - -// handle NaN check for dtype that don't have it -pub trait NaNAware { - fn is_nan(&self) -> bool; -} - -impl NaNAware for f32 { - fn is_nan(&self) -> bool { - f32::is_nan(*self) - } -} - -impl NaNAware for f64 { - fn is_nan(&self) -> bool { - f64::is_nan(*self) - } -} - -macro_rules! impl_maybe_nan_for_int { - ($($t:ty),*) => { - $(impl NaNAware for $t { - fn is_nan(&self) -> bool { - false - } - })* - }; -} - -impl_maybe_nan_for_int!(u8, u16, u32, u64, i8, i16, i32, i64); - -// super trait to group all pixel operations -pub trait PixelOps: AddAssign + PartialOrd + NaNAware + Sized {} -impl PixelOps for T {} - -// optional flags at runtime -bitflags! { - #[derive(Copy, Clone)] - pub struct OptionalFlags: u32 { - // burn all pixels that are touched by the geometry - const ALL_TOUCHED = 1; - // same as ALL_TOUCHED but requires cache - const ALL_TOUCHED_CACHED = 1 << 2; - // output return type is Xarray - const OUT_AS_XARRAY = 1 << 3; - } -} - -impl OptionalFlags { - pub fn new(all_touched: bool, encoding: &str, pixel_fn: &str) -> Self { - let mut opt_flags = OptionalFlags::empty(); - - if all_touched { - opt_flags.insert(OptionalFlags::ALL_TOUCHED); - - if pixel_fn == "sum" || pixel_fn == "count" { - opt_flags.insert(OptionalFlags::ALL_TOUCHED_CACHED); - } - } - - if encoding == "xarray" { - opt_flags.insert(OptionalFlags::OUT_AS_XARRAY); - } - - opt_flags - } - - pub fn with_all_touched(&self) -> bool { - self.contains(OptionalFlags::ALL_TOUCHED) - } - - pub fn requires_deduplication(&self) -> bool { - self.contains(OptionalFlags::ALL_TOUCHED_CACHED) - } - - pub fn with_xarray_output(&self) -> bool { - self.contains(OptionalFlags::OUT_AS_XARRAY) - } -} - -// structures for selecting encoding type and rasterization logic -pub struct Dense; -pub struct Sparse; diff --git a/src/rasterization/pixel_functions.rs b/src/rasterization/pixel_functions.rs deleted file mode 100644 index d31a71d..0000000 --- a/src/rasterization/pixel_functions.rs +++ /dev/null @@ -1,95 +0,0 @@ -/* On-demand functions for geometry rasterizetion */ - -use crate::prelude::*; -use num_traits::Num; -use numpy::ndarray::ArrayViewMut2; -use std::ops::AddAssign; - -pub type PixelFn = fn(&mut ArrayViewMut2, usize, usize, T, T); - -// sum values or NaN/background -fn sum_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: T, bg: T) -where - T: Num + AddAssign + NaNAware + Copy, -{ - if array[[y, x]] == bg || array[[y, x]].is_nan() || value.is_nan() { - array[[y, x]] = value; - } else { - array[[y, x]] += value; - } -} - -// set first value only if currently NaN/background -fn first_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: T, bg: T) -where - T: Num + NaNAware + Copy, -{ - if array[[y, x]] == bg || array[[y, x]].is_nan() { - array[[y, x]] = value; - } -} - -// always set last value -fn last_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: T, _bg: T) -where - T: Num + Copy, -{ - array[[y, x]] = value; -} - -// set value if smaller than current -fn min_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: T, bg: T) -where - T: Num + NaNAware + PartialOrd + Copy, -{ - if array[[y, x]] == bg || array[[y, x]].is_nan() || array[[y, x]] > value { - array[[y, x]] = value; - } -} - -// set value if larger than current -fn max_values(array: &mut ArrayViewMut2, y: usize, x: usize, value: T, bg: T) -where - T: Num + NaNAware + PartialOrd + Copy, -{ - if array[[y, x]] == bg || array[[y, x]].is_nan() || array[[y, x]] < value { - array[[y, x]] = value; - } -} - -// count values at index -fn count_values(array: &mut ArrayViewMut2, y: usize, x: usize, _value: T, bg: T) -where - T: Num + AddAssign + NaNAware + Copy, -{ - if array[[y, x]] == bg || array[[y, x]].is_nan() { - array[[y, x]] = T::one(); - } else { - array[[y, x]] += T::one(); - } -} - -// mark value presence -fn any_values(array: &mut ArrayViewMut2, y: usize, x: usize, _value: T, _bg: T) -where - T: Num, -{ - array[[y, x]] = T::one(); -} - -// function call -pub fn set_pixel_function(fstr: &str) -> PixelFn -where - T: Num + Copy + PixelOps, -{ - match fstr { - "sum" => sum_values, - "first" => first_values, - "last" => last_values, - "min" => min_values, - "max" => max_values, - "count" => count_values, - "any" => any_values, - _ => panic!("'fun' has an invalid value: {fstr}. One of sum, first, last, min, max, count, or any",), - } -} diff --git a/src/rasterization/rusterize_impl.rs b/src/rasterization/rusterize_impl.rs deleted file mode 100644 index 5c6a1d9..0000000 --- a/src/rasterization/rusterize_impl.rs +++ /dev/null @@ -1,266 +0,0 @@ -/* Implementation of rusterize and rasterization logics */ - -use crate::{ - FieldSource, - encoding::{ - arrays::{DenseArray, SparseArray}, - writers::{DenseArrayWriter, PixelWriter, SparseArrayWriter, ToSparseArray}, - }, - geo::{edges::LineEdge, parse_geometry::ParsedGeometry, raster::RasterInfo}, - prelude::{Dense, OptionalFlags, PolarsHandler, Sparse}, - rasterization::{ - burn_geometry::Burn, - burners::{AllTouched, AllTouchedCached, LineBurnStrategy, Standard}, - pixel_functions::PixelFn, - }, -}; -use fixedbitset::FixedBitSet; -use ndarray::Axis; -use num_traits::Num; -use numpy::Element; -use polars::prelude::*; -use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; - -// cache pixels for all_touched purposes if pixel_function is "sum" or "count" -// pass 1 -> burn interior and exterior lines with all_touched and record visited pixels -// pass 2 -> fill inner values and skip visited from pass 1 -pub struct PixelCache { - bits: FixedBitSet, - width: usize, - xmin: isize, - ymin: isize, -} - -impl PixelCache { - pub fn new(linedges: &[LineEdge]) -> Self { - let (xmin, ymin, xmax, ymax) = linedges.iter().fold( - (f64::MAX, f64::MAX, f64::MIN, f64::MIN), - |(xmin, ymin, xmax, ymax), edge| { - ( - xmin.min(edge.x0).min(edge.x1), - ymin.min(edge.y0).min(edge.y1), - xmax.max(edge.x0).max(edge.x1), - ymax.max(edge.y0).max(edge.y1), - ) - }, - ); - - let width = (xmax.floor() - xmin.floor()) as usize + 1; - let length = (ymax.floor() - ymin.floor()) as usize + 1; - - Self { - bits: FixedBitSet::with_capacity(width * length), - width, - xmin: xmin as isize, - ymin: ymin as isize, - } - } - - #[inline] - fn unravel_index(&self, x: usize, y: usize) -> usize { - let local_x = (x as isize - self.xmin) as usize; - let local_y = (y as isize - self.ymin) as usize; - local_y * self.width + local_x - } - - pub fn insert(&mut self, x: usize, y: usize) -> bool { - let idx = self.unravel_index(x, y); - if self.bits.contains(idx) { - return false; - } - self.bits.insert(idx); - true - } - - pub fn contains(&self, x: usize, y: usize) -> bool { - let idx = self.unravel_index(x, y); - self.bits.contains(idx) - } -} - -pub struct RasterizeContext<'a, N> { - pub raster_info: RasterInfo, - pub geometry: ParsedGeometry, - pub field: FieldSource<'a, N>, - pub pixel_fn: PixelFn, - pub background: N, - pub opt_flags: OptionalFlags, -} - -macro_rules! dispatch_burn { - ($all_touched:expr, $dedup:expr, $func:ident, $ctx:expr, $writer:expr $(, $ext:expr)*) => { - match ($all_touched, $dedup) { - (true, true) => $func::($ctx, $writer $(, $ext)*), - (true, false) => $func::($ctx, $writer $(, $ext)*), - (false, _) => $func::($ctx, $writer $(, $ext)*), - } - }; -} - -pub trait Rasterize { - type Output; - - fn rasterize(ctx: RasterizeContext, by: Option<&ChunkedArray>) -> Self::Output; -} - -impl Rasterize for Dense -where - N: Num + PolarsHandler + Copy + Element, -{ - type Output = DenseArray; - - fn rasterize(ctx: RasterizeContext, by: Option<&ChunkedArray>) -> Self::Output { - let all_touched = ctx.opt_flags.with_all_touched(); - let dedup = ctx.opt_flags.requires_deduplication(); - - match by { - Some(by) => { - let (n_groups, group_idx) = get_groups(by); - let mut band_names: Vec = Vec::with_capacity(n_groups); - let mut raster = ctx.raster_info.build_raster(n_groups, ctx.background); - - raster - .outer_iter_mut() - .into_par_iter() - .zip(group_idx.into_par_iter()) - .map(|(band, (group_idx, idxs))| { - let mut writer = DenseArrayWriter::new(band, ctx.pixel_fn); - - dispatch_burn!(all_touched, dedup, process_multi, &ctx, &mut writer, &idxs); - - by.get(group_idx as usize).unwrap().to_string() - }) - .collect_into_vec(&mut band_names); - - DenseArray::new(raster, band_names, ctx.raster_info) - } - None => { - let band_names = vec![String::from("band_1")]; - let mut raster = ctx.raster_info.build_raster(1, ctx.background); - let mut writer = DenseArrayWriter::new(raster.index_axis_mut(Axis(0), 0), ctx.pixel_fn); - - dispatch_burn!(all_touched, dedup, process_single, &ctx, &mut writer); - - DenseArray::new(raster, band_names, ctx.raster_info) - } - } - } -} - -impl Rasterize for Sparse -where - N: Num + PolarsHandler + Copy + Element, -{ - type Output = SparseArray; - - fn rasterize(ctx: RasterizeContext, by: Option<&ChunkedArray>) -> Self::Output { - let all_touched = ctx.opt_flags.with_all_touched(); - let dedup = ctx.opt_flags.requires_deduplication(); - - match by { - Some(by) => { - let (n_groups, group_idx) = get_groups(by); - let mut writers: Vec> = Vec::with_capacity(n_groups); - - group_idx - .into_par_iter() - .map(|(group_idx, idxs)| { - let band_name = by.get(group_idx as usize).unwrap().to_string(); - let mut writer = SparseArrayWriter::new(band_name); - - dispatch_burn!(all_touched, dedup, process_multi, &ctx, &mut writer, &idxs); - - writer - }) - .collect_into_vec(&mut writers); - - writers.finish(ctx) - } - None => { - let mut writer = SparseArrayWriter::new(String::from("band_1")); - - dispatch_burn!(all_touched, dedup, process_single, &ctx, &mut writer); - - writer.finish(ctx) - } - } - } -} - -fn get_groups(by: &ChunkedArray) -> (usize, GroupsIdx) { - let groups = by.group_tuples(true, true).expect("No groups found!"); - (groups.len(), groups.into_idx()) -} - -fn process_single(ctx: &RasterizeContext, writer: &mut W) -where - N: Num + PolarsHandler + Copy, - W: PixelWriter, - S: LineBurnStrategy, -{ - match &ctx.field { - FieldSource::Scalar(s) => { - for geom in &ctx.geometry { - geom.burn::(&ctx.raster_info, *s, writer, ctx.background); - } - } - FieldSource::Array(arr) => { - arr.iter() - .zip(ctx.geometry.iter()) - .for_each(|(fv, geom)| geom.burn::(&ctx.raster_info, *fv, writer, ctx.background)); - } - FieldSource::Column(col) => { - let ca = col.as_materialized_series().unpack::().unwrap(); - if let Ok(slice) = ca.cont_slice() { - slice - .iter() - .zip(&ctx.geometry) - .for_each(|(fv, geom)| geom.burn::(&ctx.raster_info, *fv, writer, ctx.background)); - } else { - ca.iter().zip(&ctx.geometry).for_each(|(fv, geom)| { - if let Some(fv) = fv { - geom.burn::(&ctx.raster_info, fv, writer, ctx.background) - } - }); - } - } - } -} - -fn process_multi(ctx: &RasterizeContext, writer: &mut W, idxs: &[u32]) -where - N: Num + PolarsHandler + Copy, - W: PixelWriter, - S: LineBurnStrategy, -{ - match &ctx.field { - FieldSource::Scalar(s) => { - for geom in &ctx.geometry { - geom.burn::(&ctx.raster_info, *s, writer, ctx.background); - } - } - FieldSource::Array(arr) => { - arr.iter() - .zip(ctx.geometry.iter()) - .for_each(|(fv, geom)| geom.burn::(&ctx.raster_info, *fv, writer, ctx.background)); - } - FieldSource::Column(col) => { - let ca = col.as_materialized_series().unpack::().unwrap(); - if let Ok(slice) = ca.cont_slice() { - for &i in idxs.iter() { - let idx = i as usize; - if let Some(geom) = ctx.geometry.get(idx) { - geom.burn::(&ctx.raster_info, slice[idx], writer, ctx.background) - } - } - } else { - for &i in idxs.iter() { - let idx = i as usize; - if let (Some(fv), Some(geom)) = (ca.get(idx), ctx.geometry.get(idx)) { - geom.burn::(&ctx.raster_info, fv, writer, ctx.background) - } - } - } - } - } -}