diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6c97180..03ad80d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,259 +1,235 @@ -name: CI +name: Publish on: push: tags: - - '*' - + - "*" + workflow_dispatch: +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: ${{ !startsWith(github.ref, 'refs/tags/') }} + jobs: - linux: - runs-on: ${{ matrix.platform.runner }} + build-linux: + runs-on: ubuntu-latest strategy: matrix: platform: - { - runner: ubuntu-latest, target: x86_64, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc", } - { - runner: ubuntu-latest, target: x86, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc", } - { - runner: ubuntu-latest, target: aarch64, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc --zig", } - { - runner: ubuntu-latest, target: armv7, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } - { - runner: ubuntu-latest, target: s390x, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } - { - runner: ubuntu-latest, target: ppc64le, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } + steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - uses: actions/setup-python@v6 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist ${{ matrix.platform.maturin-args }} - --interpreter ${{ matrix.platform.interpreter }} + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: auto rust-toolchain: nightly - - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-linux-${{ matrix.platform.target }} path: dist - - musllinux: - runs-on: ${{ matrix.platform.runner }} + + build-musllinux: + runs-on: ubuntu-latest strategy: matrix: platform: - { - runner: ubuntu-latest, target: x86_64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: x86, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: aarch64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: armv7, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - uses: actions/setup-python@v6 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: musllinux_1_2 rust-toolchain: nightly - - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-musllinux-${{ matrix.platform.target }} path: dist - - windows: - runs-on: ${{ matrix.platform.runner }} + + build-windows: + runs-on: windows-latest strategy: matrix: platform: - { - runner: windows-latest, target: x64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } - - { - runner: windows-latest, - target: x86, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } + - { target: x86, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t" } + steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - uses: actions/setup-python@v6.0.0 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x + python-version: "3.x" architecture: ${{ matrix.platform.target }} - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} rust-toolchain: nightly - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-windows-${{ matrix.platform.target }} path: dist - macos: - runs-on: ${{ matrix.platform.runner }} + build-macos: + runs-on: macos-latest strategy: matrix: platform: - { - runner: macos-latest, - target: x86_64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } + target: x86_64, + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", + } - { - runner: macos-latest, - target: aarch64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } + target: aarch64, + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", + } + steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - uses: actions/setup-python@v6.0.0 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} rust-toolchain: nightly - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-macos-${{ matrix.platform.target }} path: dist build-sdist: - name: "build sdist" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - name: "Build sdist" - uses: PyO3/maturin-action@v1.49.4 + + - name: Build sdist + uses: PyO3/maturin-action@v1.51.0 with: command: sdist args: --out dist rust-toolchain: nightly - - - name: "Upload sdist" - uses: actions/upload-artifact@v4.6.2 + + - name: Upload sdist + uses: actions/upload-artifact@v7.0.1 with: name: wheels-sdist path: dist - + release: - name: "release" runs-on: ubuntu-latest - if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} - needs: [linux, musllinux, windows, macos, build-sdist] + if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' + needs: + - build-linux + - build-musllinux + - build-windows + - build-macos + - build-sdist + permissions: - # Use to sign the release artifacts id-token: write - # Used to upload release artifacts contents: write - # Used to generate artifact attestation attestations: write + steps: - - uses: actions/download-artifact@v5.0.0 - - name: "Generate artifact attestation" - uses: actions/attest-build-provenance@v3.0.0 + - uses: actions/download-artifact@v8.0.1 with: - subject-path: "wheels-*/*" - - - name: "Publish to PyPI" - if: ${{ startsWith(github.ref, 'refs/tags/') }} - uses: PyO3/maturin-action@v1.49.4 + pattern: wheels-* + merge-multiple: true + path: dist + + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v4.1.0 + with: + subject-path: dist/* + + - name: Publish to PyPI + if: startsWith(github.ref, 'refs/tags/') + uses: PyO3/maturin-action@v1.51.0 env: MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - + with: command: upload - args: --non-interactive --skip-existing wheels-*/* + args: --non-interactive --skip-existing dist/* rust-toolchain: nightly diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..8c10711 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,48 @@ +name: Deploy Docs + +on: + workflow_dispatch: + +permissions: + contents: write + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6.0.2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6.2.0 + with: + python-version: 3.14 + + - run: pip install "mkdocs-material" "mkdocstrings[python]" + - run: mkdocs gh-deploy --config-file docs/mkdocs.yml --force + + deploy_mkdocs: + needs: build + + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6.0.2 + with: + ref: gh-pages + + - uses: actions/configure-pages@v6.0.0 + + - uses: actions/upload-pages-artifact@v5.0.0 + with: + path: "." + + - id: deployment + uses: actions/deploy-pages@v5.0.0 diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 9ea7d34..ba967d1 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,17 +1,24 @@ -name: python-test +name: Python Test on: - push: pull_request: workflow_dispatch: permissions: contents: read +concurrency: + group: tests-${{ github.ref }} + cancel-in-progress: true + +env: + RUST_BACKTRACE: 1 + jobs: test-python: - if: ${{ contains(github.event.head_commit.message, '!test') || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} name: test ${{ matrix.python-version }} + runs-on: ubuntu-latest + continue-on-error: ${{ endsWith(matrix.python-version, 't') }} strategy: fail-fast: false matrix: @@ -25,68 +32,44 @@ jobs: - "3.14t" - "pypy3.11" - runs-on: ubuntu-latest - continue-on-error: ${{ endsWith(matrix.python-version, 't') }} - steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6.0.2 - - name: install rust stable - uses: dtolnay/rust-toolchain@stable - with: - toolchain: nightly + - uses: dtolnay/rust-toolchain@nightly - - name: set up python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6.2.0 with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - run: pip install pytest pytest-asyncio "maturin[patchelf]" - - - run: pip install -e . + - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin + - run: virtualenv --no-vcs-ignore .venv + - run: maturin develop --features use-small-offset + - run: pytest -v env: - RUST_BACKTRACE: 1 - - - run: pip freeze - - - run: pytest -vv - env: - RUST_BACKTRACE: 1 HYPOTHESIS_PROFILE: slow test-os: - if: ${{ contains(github.event.head_commit.message, '!test') || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} name: test on ${{ matrix.os }} - + runs-on: ${{ matrix.os }}-latest strategy: fail-fast: false matrix: os: [ubuntu, macos, windows] - runs-on: ${{ matrix.os }}-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6.0.2 - - name: install rust stable - uses: dtolnay/rust-toolchain@stable - with: - toolchain: nightly + - uses: dtolnay/rust-toolchain@nightly - - name: set up python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6.2.0 with: python-version: "3.13" + allow-prereleases: true - - run: pip install pytest pytest-asyncio maturin - - - run: pip install -e . - env: - RUST_BACKTRACE: 1 - - - run: pip freeze - - - run: pytest -vv + - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin + - run: virtualenv --no-vcs-ignore .venv + - run: maturin develop --features use-small-offset + - run: pytest -v env: - RUST_BACKTRACE: 1 HYPOTHESIS_PROFILE: slow diff --git a/.gitignore b/.gitignore index 61f52f5..2feb865 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ __pycache__ /.pytest_cache /htmlcov /backup +/.benchmarks +/.hypothesis diff --git a/Cargo.lock b/Cargo.lock index 3c1e8c7..1350f8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,26 +2,40 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "bumpalo" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "cachebox" -version = "5.2.1" +version = "6.0.0" dependencies = [ "cfg-if", + "chrono", "fastrand", - "hashbrown", - "mimalloc", "parking_lot", "pyo3", "pyo3-build-config", @@ -29,9 +43,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "shlex", @@ -39,27 +53,64 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "futures-core" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] -name = "hashbrown" -version = "0.14.5" +name = "futures-task" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] [[package]] name = "heck" @@ -68,27 +119,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "indoc" -version = "2.0.6" +name = "iana-time-zone" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] [[package]] -name = "libc" -version = "0.2.171" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] [[package]] -name = "libmimalloc-sys" -version = "0.1.44" +name = "js-sys" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ - "cc", - "libc", + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", ] +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + [[package]] name = "lock_api" version = "0.4.14" @@ -99,28 +170,25 @@ dependencies = [ ] [[package]] -name = "memoffset" -version = "0.9.1" +name = "log" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] -name = "mimalloc" -version = "0.1.48" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "libmimalloc-sys", + "autocfg", ] [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "parking_lot" @@ -145,43 +213,47 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", + "chrono", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "python3-dll-a", "target-lexicon", @@ -189,9 +261,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -199,9 +271,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -211,9 +283,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -224,31 +296,37 @@ dependencies = [ [[package]] name = "python3-dll-a" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d381ef313ae70b4da5f95f8a4de773c6aa5cd28f73adec4b4a31df70b66780d8" +checksum = "d80ba7540edb18890d444c5aa8e1f1f99b1bdf26fb26ae383135325f4a36042b" dependencies = [ "cc", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "scopeguard" version = "1.2.0" @@ -261,17 +339,23 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" -version = "2.0.100" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -280,24 +364,116 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.13.2" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] -name = "unindent" -version = "0.2.4" +name = "wasm-bindgen" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml index 15a7461..d7c886a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,17 @@ [package] name = "cachebox" -version = "5.2.3" +version = "6.0.0" edition = "2021" description = "The fastest memoizing and caching Python library written in Rust" readme = "README.md" license = "MIT" homepage = "https://github.com/awolverp/cachebox" repository = "https://github.com/awolverp/cachebox.git" -authors = ["awolverp"] +authors = ["Ali Pooralijan "] [lib] name = "cachebox" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] [profile.release] codegen-units = 1 @@ -19,38 +19,26 @@ debug = false incremental = false lto = true panic = "abort" -strip = "symbols" +strip = true -[features] -mimalloc = ["dep:mimalloc"] - -[dependencies.hashbrown] -version = "0.14.5" -default-features = false -features = ["inline-more", "raw"] - -[dependencies.fastrand] -version = "2.3.0" +[dependencies] +cfg-if = "1.0.4" +chrono = "0.4.44" +fastrand = "2.4.1" +parking_lot = {version="0.12.5", default-features=false} +pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib", "chrono"]} +# tokio = {version="1.52.3", default-features=false, features=["sync"]} -[dependencies.pyo3] -version = "0.27.1" -default-features = false -features = ["macros", "extension-module", "generate-import-lib"] +[build-dependencies] +pyo3-build-config = {version="0.28.3", default-features=false, features=["resolve-config"]} -[dependencies.cfg-if] -version = "1.0.3" - -[dependencies.parking_lot] -version = "0.12.5" - -[dependencies.mimalloc] -version = "0.1.48" -features = ["v3", "override", "local_dynamic_tls"] -optional = true +[features] +default = ["inline-more", "extension-module"] +inline-more = [] +extension-module = ["pyo3/extension-module"] -[build-dependencies.pyo3-build-config] -version = "0.27.1" -features = ["resolve-config"] +# testing features +use-small-offset = [] [lints.clippy] dbg_macro = "warn" diff --git a/LICENSE b/LICENSE index 1b08669..e5f48c2 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY new file mode 100644 index 0000000..dc6cf36 --- /dev/null +++ b/LICENSE-THIRD-PARTY @@ -0,0 +1,70 @@ +# Third-Party Licenses + +This project includes code from the following third-party sources: + +--- + +## hashbrown + +Repository: https://github.com/rust-lang/hashbrown +License: MIT OR Apache-2.0 + +### MIT License + +Copyright (c) 2016 Amanieu d'Antras + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +--- + +### Apache License 2.0 + +Copyright (c) 2016 Amanieu d'Antras + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +--- diff --git a/Makefile b/Makefile deleted file mode 100644 index e0412ac..0000000 --- a/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -help: - @echo "Commands:" - @echo -e "\tbuild-dev build source" - @echo -e "\tbuild-prod build source (release mode)" - @echo -e "\ttest-rs clippy and test rust code" - @echo -e "\ttest-py build and test python code" - @echo -e "\tformat format rust and python code" - @echo -e "\tclean clean all the unneeded files" - -.PHONY: build-dev -build-dev: - maturin develop - -.PHONY: build-prod -build-prod: - maturin develop --release - -.PHONY: test-rs -test-rs: - cargo clippy - cargo test -- --nocapture - -.PHONY: test-py -test-py: build-dev - coverage run -m pytest -s -vv - -rm -rf .pytest_cache - -ruff check . - ruff clean - coverage html - -.PHONY: format -format: - ruff format --line-length=100 . - ruff clean - cargo fmt - -.PHONY: clean -clean: - -rm -rf `find . -name __pycache__` - -rm -rf python/cachebox/*.so - -rm -rf target/release - -rm -rf .pytest_cache - -rm -rf .coverage - -rm -rf htmlcov - -ruff clean diff --git a/README.md b/README.md index f367152..3d46dd0 100644 --- a/README.md +++ b/README.md @@ -4,44 +4,36 @@ *The fastest caching Python library written in Rust* -[**Releases**](https://github.com/awolverp/cachebox/releases) | +[**Documentation**](https://awolverp.github.com/cachebox) | [**Releases**](https://github.com/awolverp/cachebox/releases) | [**Benchmarks**](https://github.com/awolverp/cachebox-benchmark) | [**Issues**](https://github.com/awolverp/cachebox/issues/new) [![License](https://img.shields.io/github/license/awolverp/cachebox.svg?style=flat-square)](https://github.com/awolverp/cachebox/blob/main/LICENSE) -[![Release](https://img.shields.io/github/v/release/awolverp/cachebox.svg?style=flat-square)](https://github.com/awolverp/cachebox/releases) -[![Python Versions](https://img.shields.io/pypi/pyversions/cachebox.svg?style=flat-square)](https://pypi.org/project/cachebox/) [![Downloads](https://img.shields.io/pypi/dm/cachebox?style=flat-square&color=%23314bb5)](https://pepy.tech/projects/cachebox) ------- +> [!WARNING]\ +> The new version v6 has incompatibilities with v5. For more info see [Migration Guide](https://awolverp.github.io/cachebox/migration). + ### What does it do? You can easily perform powerful caching operations in Python as fast as possible. This can make your application a lot faster and it can be a good choice in complex applications. **Ideal for optimizing large-scale applications** with efficient, low-overhead caching. **Key Features:** -- 🚀 Extremely fast (10-50x faster than other caching libraries -- [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) -- 📊 Minimal memory footprint (50% of standard dictionary memory usage) +- 🚀 Extremely fast (10-50x faster than other caching libraries - [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) +- 📊 Minimal memory footprint - 🔥 Full-featured and user-friendly - 🧶 Completely thread-safe - 🔧 Tested and correct - **\[R\]** written in Rust for maximum performance -- 🤝 Compatible with Python 3.9+ (PyPy and CPython) +- 🤝 Compatible with Python 3.10+ (PyPy and CPython) - 📦 Supports 7 advanced caching algorithms -### Page Contents -- ❓ [**When do I need caching and `cachebox`?**](#when-do-i-need-caching-and-cachebox) -- 🌟 [**Why `cachebox`?**](#why-cachebox) -- 🔧 [**Installation**](#installation) -- 💡 [**Preview**](#examples) -- 🎓 [**Getting started**](#getting-started) -- ✏️ [**Incompatible changes**](#%EF%B8%8F-incompatible-changes) -- 📌 [**Tips & Notes**](#tips-and-notes) - -### When do I need caching and `cachebox`? +### When do I need caching? - 📈 **Frequent Data Access** \ If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. @@ -65,19 +57,19 @@ This can make your application a lot faster and it can be a good choice in compl It uses the *Rust* language for high-performance. - **🧮 SwissTable** \ -It uses Google's high-performance SwissTable hash map. Credit to [hashbrown](https://github.com/rust-lang/hashbrown). +It uses Google's high-performance SwissTable hash map. Thanks to [hashbrown](https://github.com/rust-lang/hashbrown). - **✨ Low memory usage** \ It has very low memory usage. - **⭐ Zero Dependency** \ -As we said, `cachebox` is written in Rust so you don't have to install any other dependecies. +As we said, `cachebox` is written in *Rust* so you don't have to install any other dependecies. - **🧶 Thread safe** \ -It's completely thread-safe and uses locks to prevent problems. +It's completely thread-safe and uses *Rust* mutex to prevent problems. - **👌 Easy To Use** \ -You only need to import it and choose a cache implementation to use. It will behave like a dictionary. +You only need to import it and choose a cache implementation to use. - **🚫 Avoids Cache Stampede** \ It avoids [cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) by using a distributed lock system. @@ -89,15 +81,11 @@ cachebox is installable via `pip`: pip3 install -U cachebox ``` -> [!WARNING]\ -> The new version v5 has some incompatibilities with v4. For more info see [Incompatible changes](#incompatible-changes). - ## Examples The simplest example of **cachebox** could look like this: ```python import cachebox -# Like functools.lru_cache, If maxsize is set to 0, the cache can grow without bounds and limit. @cachebox.cached(cachebox.FIFOCache(maxsize=128)) def factorial(number: int) -> int: fact = 1 @@ -106,7 +94,6 @@ def factorial(number: int) -> int: return fact assert factorial(5) == 125 -assert len(factorial.cache) == 1 # coroutines are also supported @cachebox.cached(cachebox.LRUCache(maxsize=128)) @@ -142,707 +129,8 @@ assert cache["key"] == "value" assert cache.get("key") == "value" ``` -## Getting started -There are 3 useful functions: -- [**cached**](#cached--decorator): a decorator that helps you to cache your functions and calculations with a lot of options. -- [**is_cached**](#is_cached--function): check if a function/method cached by cachebox or not - -And 9 classes: -- [**BaseCacheImpl**](#basecacheimpl-️-class): base-class for all classes. -- [**Cache**](#cache-️-class): A simple cache that has no algorithm; this is only a hashmap. -- [**FIFOCache**](#fifocache-️-class): the FIFO cache will remove the element that has been in the cache the longest. -- [**RRCache**](#rrcache-️-class): the RR cache will remove a random element to make free up space when necessary. -- [**LRUCache**](#lrucache-️-class): the LRU cache will remove the element in the cache that has not been accessed in the longest time. -- [**LFUCache**](#lfucache-️-class): the LFU cache will remove the element in the cache that has been accessed the least often, regardless of time. -- [**TTLCache**](#ttlcache-️-class): the TTL cache will automatically remove the element in the cache that has expired. -- [**VTTLCache**](#vttlcache-️-class): the TTL cache will automatically remove the element in the cache that has expired when needed. -- [**Frozen**](#frozen-️-class): you can use this class for freezing your caches. - -You only need to import the classes you want and can work with them like a regular dictionaries (except for [VTTLCache](#vttlcache-️-class), this have some differences). - -The examples below will introduce you to these different features. -**All the methods in the examples are common across all classes (exceptions are noted where applicable).** - -* * * - -### `cached` (🎀 decorator) -Decorator to wrap a function with a memoizing callable that saves results in a cache. - -**Parameters:** -- `cache`: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - -- `key_maker`: Specifies a function that will be called with the same positional and keyword - arguments as the wrapped function itself. It has to return a suitable cache key - (must be hashable). - -- `clear_reuse`: The wrapped function has a function named `clear_cache` that uses `cache.clear` - method to clear the cache. This parameter will be passed to cache's `clear` method. - -- `callback`: Every time the `cache` is used, callback is also called. - The callback arguments are: event number (see `EVENT_MISS` or `EVENT_HIT` variables), key, and then result. - -- `copy_level`: The wrapped function always copies the result of your function and then returns it. - This parameter specifies how the result is copied before returning it. - `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and - `2` means "always copy the results". Defaults to 1. - -
-Examples - - -A simple example: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(128)) -def sum_as_string(a, b): - return str(a+b) - -assert sum_as_string(1, 2) == "3" - -assert len(sum_as_string.cache) == 1 -sum_as_string.cache_clear() -assert len(sum_as_string.cache) == 0 -``` - -A `key_maker` example: -```python -import cachebox - -def simple_key_maker(args: tuple, kwds: dict): - return args[0].path - -# Async methods are supported -@cachebox.cached(cachebox.LRUCache(128), key_maker=simple_key_maker) -async def request_handler(request: Request): - return Response("hello man") -``` - -A typed `key_maker` example using a predefined key function: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(128), key_maker=cachebox.make_typed_key) -def sum_as_string(a, b): - return str(a+b) - -sum_as_string(1.0, 1) -sum_as_string(1, 1) -print(len(sum_as_string.cache)) # 2 -``` - -You have the option to manage caches with `.cache` attribute as shown in previous examples. -There are more attributes and methods you can use: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(0)) -def sum_as_string(a, b): - return str(a+b) - -print(sum_as_string.cache) -# LRUCache(0 / 9223372036854775807, capacity=0) - -print(sum_as_string.cache_info()) -# CacheInfo(hits=0, misses=0, maxsize=9223372036854775807, length=0, memory=8) - -# `.cache_clear()` clears the cache -sum_as_string.cache_clear() -``` - -method example: *(Added in v5.1.0)* -```python -import cachebox - -class Example: - def __init__(self, num) -> None: - self.num = num - self._cache = cachebox.TTLCache(20, 10) - - @cachebox.cached(lambda self: self._cache) - def method(self, char: str): - return char * self.num - -ex = Example(10) -assert ex.method("a") == "a" * 10 -``` - -`callback` example: *(Added in v4.2.0)* -```python -import cachebox - -def callback_func(event: int, key, value): - if event == cachebox.EVENT_MISS: - print("callback_func: miss event", key, value) - elif event == cachebox.EVENT_HIT: - print("callback_func: hit event", key, value) - else: - # unreachable code - raise NotImplementedError - -@cachebox.cached(cachebox.LRUCache(0), callback=callback_func) -def func(a, b): - return a + b - -assert func(1, 2) == 3 -# callback_func: miss event (1, 2) 3 - -assert func(1, 2) == 3 # hit -# callback_func: hit event (1, 2) 3 - -assert func(1, 2) == 3 # hit again -# callback_func: hit event (1, 2) 3 - -assert func(5, 4) == 9 -# callback_func: miss event (5, 4) 9 -``` - -
- -> [!TIP]\ -> There's a new feature **since `v4.1.0`** for making a cached function not use cache for a call: -> ```python -> # with `cachebox__ignore=True` parameter, cachebox does not use cache and directly calls the function, returning its result. -> sum_as_string(10, 20, cachebox__ignore=True) -> ``` - -* * * - -### `cachedmethod` (🎀 decorator) -This decorator works excatly like `cached()`, but ignores `self` parameters in hashing and key making. - -> [!WARNING]\ -> This function has been deprecated since `v5.1.0`, use `cached` function instead. - -
-Example - -```python -import cachebox - -class MyClass: - @cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) - def my_method(self, name: str): - return "Hello, " + name + "!" - -c = MyClass() -c.my_method() -``` - -
- -* * * - -### `is_cached` (📦 function) -Checks whether a function/method is cached by cachebox or not. - -**Parameters:** -- `func`: The function/method to check. - -
-Example - -```python -import cachebox - -@cachebox.cached(cachebox.FIFOCache(0)) -def func(): - pass - -assert cachebox.is_cached(func) -``` - -
- -* * * - -### `BaseCacheImpl` (🏗️ class) -Base implementation for cache classes in the cachebox library. - -This abstract base class defines the generic structure for cache implementations, -supporting different key and value types through generic type parameters. -Serves as a foundation for specific cache variants like Cache and FIFOCache. - -
-Example - -```python -import cachebox - -# subclass -class ClassName(cachebox.BaseCacheImpl): - ... - -# type-hint -def func(cache: BaseCacheImpl): - ... - -# isinstance -cache = cachebox.LFUCache(0) -assert isinstance(cache, cachebox.BaseCacheImpl) -``` - -
- -* * * - -### `Cache` (🏗️ class) -A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. - -Provides a flexible key-value storage mechanism with: -- Configurable maximum size (zero means unlimited) -- Lower memory usage compared to standard dict -- Thread-safe operations -- Useful memory management methods - -Supports initialization with optional initial data and capacity -and provides dictionary-like access with additional cache-specific operations. - -> [!TIP]\ -> Differs from standard `dict` by: -> - being thread-safe and unordered, while dict isn't thread-safe and ordered (Python 3.6+). -> - using much less memory than dict. -> - supporting useful and new methods for managing memory, while dict does not. -> - **not supporting** `popitem()`, while dict does. -> - an option to limit the size of `Cache` which dict doesn't support. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | N/A | - -
-Example - -```python -from cachebox import Cache - -# These parameters are common in classes: -# `maxsize` specifies the limit size of the cache (zero means infinity); this is unchangable. -# `iterable` allows creating a cache from a dict or an iterable. -# `capacity` will make the cache attempt to allocate a new hash table with at -# least enough capacity for inserting the given number of elements without reallocating. -cache = Cache(maxsize=100, iterable=None, capacity=100) - -# behaves like a regular dict -cache["key"] = "value" -# using `.insert(key, value)` is recommended -cache.insert("key", "value") - -print(cache["key"]) # value - -del cache["key"] -cache["key"] # KeyError: key - -# cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded -cache.update({i:i for i in range(200)}) -# OverflowError: The cache has reached the bound. -``` - -
- -* * * - -### `FIFOCache` (🏗️ class) -A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. - -This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. - -**Key features**: -- Deterministic item eviction order (oldest items removed first) -- Efficient key-value storage and retrieval -- Supports dictionary-like operations -- Allows optional initial data population - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------------- | ------- | -| Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(1) | - -
-Example - -```python -from cachebox import FIFOCache - -cache = FIFOCache(5, {i:i*2 for i in range(5)}) - -print(len(cache)) # 5 -cache["new-key"] = "new-value" -print(len(cache)) # 5 - -print(cache.get(3, "default-val")) # 6 -print(cache.get(6, "default-val")) # default-val - -print(cache.popitem()) # (1, 2) - -# insert method returns a value: -# - If the cache did not have this key present, None is returned. -# - If the cache did have this key present, the value is updated, and the old value is returned. -print(cache.insert(3, "val")) # 6 -print(cache.insert("new-key", "val")) # None - -# Returns the first key in cache; this is the one which will be removed by `popitem()`. -print(cache.first()) -``` - -
- -* * * - -### `RRCache` (🏗️ class) -A thread-safe cache implementation with Random Replacement (RR) policy. - -This cache randomly selects and removes elements when the cache reaches its maximum size, -ensuring a simple and efficient caching mechanism with configurable capacity. - -Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | O(1) | - -
-Example - -```python -from cachebox import RRCache - -cache = RRCache(10, {i:i for i in range(10)}) -print(cache.is_full()) # True -print(cache.is_empty()) # False - -# Returns the number of elements the map can hold without reallocating. -print(cache.capacity()) # 28 - -# Shrinks the cache to fit len(self) elements. -cache.shrink_to_fit() -print(cache.capacity()) # 10 - -# Returns a random key -print(cache.random_key()) # 4 -``` - -
- -* * * - -### `LRUCache` (🏗️ class) -Thread-safe Least Recently Used (LRU) cache implementation. - -Provides a cache that automatically removes the least recently used items when -the cache reaches its maximum size. Supports various operations like insertion, -retrieval, and management of cached items with configurable maximum size and -initial capacity. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | - -
-Example - -```python -from cachebox import LRUCache - -cache = LRUCache(0, {i:i*2 for i in range(10)}) - -# access `1` -print(cache[0]) # 0 -print(cache.least_recently_used()) # 1 -print(cache.popitem()) # (1, 2) - -# .peek() searches for a key-value in the cache and returns it without moving the key to recently used. -print(cache.peek(2)) # 4 -print(cache.popitem()) # (2, 4) - -# Does the `popitem()` `n` times and returns count of removed items. -print(cache.drain(5)) # 5 -``` - -
- -* * * - -### `LFUCache` (🏗️ class) -A thread-safe Least Frequently Used (LFU) cache implementation. - -This cache removes elements that have been accessed the least number of times, -regardless of their access time. It provides methods for inserting, retrieving, -and managing cache entries with configurable maximum size and initial capacity. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | - -
-Example - -```python -from cachebox import LFUCache - -cache = cachebox.LFUCache(5) -cache.insert('first', 'A') -cache.insert('second', 'B') - -# access 'first' twice -cache['first'] -cache['first'] - -# access 'second' once -cache['second'] - -assert cache.least_frequently_used() == 'second' -assert cache.least_frequently_used(2) is None # 2 is out of range - -for item in cache.items_with_frequency(): - print(item) -# ('second', 'B', 1) -# ('first', 'A', 2) -``` - -
- -* * * - -### `TTLCache` (🏗️ class) -A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. - -This cache automatically removes elements that have expired based on their time-to-live setting. -Supports various operations like insertion, retrieval, and iteration. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | - -
-Example - -```python -from cachebox import TTLCache -import time - -# The `ttl` param specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. -cache = TTLCache(0, ttl=2) -cache.update({i:str(i) for i in range(10)}) - -print(cache.get_with_expire(2)) # ('2', 1.99) - -# Returns the oldest key in cache; this is the one which will be removed by `popitem()` -print(cache.first()) # 0 - -cache["mykey"] = "value" -time.sleep(2) -cache["mykey"] # KeyError -``` - -
- -* * * - -### `VTTLCache` (🏗️ class) -A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. - -This cache allows storing key-value pairs with optional expiration times. When an item expires, -it is automatically removed from the cache. The cache supports a maximum size and provides -various methods for inserting, retrieving, and managing cached items. - -Key features: -- Per-key time-to-live (TTL) support -- Configurable maximum cache size -- Thread-safe operations -- Automatic expiration of items - -Supports dictionary-like operations such as get, insert, update, and iteration. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | - -> [!TIP]\ -> `VTTLCache` vs `TTLCache`: -> - In `VTTLCache` each item has its own unique time-to-live, unlike `TTLCache`. -> - `VTTLCache` is generally slower than `TTLCache`. - -
-Example - -```python -from cachebox import VTTLCache -import time - -# The `ttl` param specifies the time-to-live value for `iterable` (in seconds); cannot be zero or negative. -cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) -print(len(cache)) # 4 -time.sleep(3) -print(len(cache)) # 0 - -# The "key1" is exists for 5 seconds -cache.insert("key1", "value", ttl=5) -# The "key2" is exists for 2 seconds -cache.insert("key2", "value", ttl=2) - -time.sleep(2) -# "key1" is exists for 3 seconds -print(cache.get("key1")) # value - -# "key2" has expired -print(cache.get("key2")) # None -``` - -
- -* * * - -### `Frozen` (🏗️ class) -**This is not a cache**; This is a wrapper class that prevents modifications to an underlying cache implementation. - -This class provides a read-only view of a cache, optionally allowing silent -suppression of modification attempts instead of raising exceptions. - -
-Example - -```python -from cachebox import Frozen, FIFOCache - -cache = FIFOCache(10, {1:1, 2:2, 3:3}) - -# parameters: -# cls: your cache -# ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. -frozen = Frozen(cache, ignore=True) -print(frozen[1]) # 1 -print(len(frozen)) # 3 - -# Frozen ignores this action and do nothing -frozen.insert("key", "value") -print(len(frozen)) # 3 - -# Let's try with ignore=False -frozen = Frozen(cache, ignore=False) - -frozen.insert("key", "value") -# TypeError: This cache is frozen. -``` - -
- -> [!NOTE]\ -> The **Frozen** class can't prevent expiring in [TTLCache](#ttlcache) or [VTTLCache](#vttlcache). -> -> For example: -> ```python -> cache = TTLCache(0, ttl=3, iterable={i:i for i in range(10)}) -> frozen = Frozen(cache) -> -> time.sleep(3) -> print(len(frozen)) # 0 -> ``` - -## ⚠️ Incompatible Changes -These are changes that are not compatible with the previous version: - -**You can see more info about changes in [Changelog](CHANGELOG.md).** - -#### CacheInfo's cachememory attribute renamed! -The `CacheInfo.cachememory` was renamed to `CacheInfo.memory`. - -```python -@cachebox.cached({}) -def func(a: int, b: int) -> str: - ... - -info = func.cache_info() - -# Older versions -print(info.cachememory) - -# New version -print(info.memory) -``` - -#### Errors in the `__eq__` method will not be ignored! -Now the errors which occurred while doing `__eq__` operations will not be ignored. - -```python -class A: - def __hash__(self): - return 1 - - def __eq__(self, other): - raise NotImplementedError("not implemeneted") - -cache = cachebox.FIFOCache(0, {A(): 10}) - -# Older versions: -cache[A()] # => KeyError - -# New version: -cache[A()] -# Traceback (most recent call last): -# File "script.py", line 11, in -# cache[A()] -# ~~~~~^^^^^ -# File "script.py", line 7, in __eq__ -# raise NotImplementedError("not implemeneted") -# NotImplementedError: not implemeneted -``` - -#### Cache comparisons will not be strict! -In older versions, cache comparisons depended on the caching algorithm. Now, they work just like dictionary comparisons. - -```python -cache1 = cachebox.FIFOCache(10) -cache2 = cachebox.FIFOCache(10) - -cache1.insert(1, 'first') -cache1.insert(2, 'second') - -cache2.insert(2, 'second') -cache2.insert(1, 'first') - -# Older versions: -cache1 == cache2 # False - -# New version: -cache1 == cache2 # True -``` - -## Tips and Notes -#### How to save caches in files? -There's no built-in file-based implementation, but you can use `pickle` for saving caches in files. For example: -```python -import cachebox -import pickle -c = cachebox.LRUCache(100, {i:i for i in range(78)}) - -with open("file", "wb") as fd: - pickle.dump(c, fd) - -with open("file", "rb") as fd: - loaded = pickle.load(fd) - -assert c == loaded -assert c.capacity() == loaded.capacity() -``` - -> [!TIP]\ -> For more, see this [issue](https://github.com/awolverp/cachebox/issues/8). - -* * * - -#### How to copy the caches? -You can use `copy.deepcopy` or `cache.copy` for copying caches. For example: -```python -import cachebox -cache = cachebox.LRUCache(100, {i:i for i in range(78)}) - -# shallow copy -shallow = cache.copy() - -# deep copy -import copy -deep = copy.deepcopy(cache) -``` +## Learn more +Read the documentation for full information and learn more: [**Documentation**](https://awolverp.github.com/cachebox) ## License This repository is licensed under the [MIT License](LICENSE) diff --git a/cachebox/__init__.py b/cachebox/__init__.py new file mode 100644 index 0000000..da11c37 --- /dev/null +++ b/cachebox/__init__.py @@ -0,0 +1,26 @@ +from ._cachebox import BaseCacheImpl as BaseCacheImpl +from ._cachebox import Cache as Cache +from ._cachebox import FIFOCache as FIFOCache +from ._cachebox import LFUCache as LFUCache +from ._cachebox import LRUCache as LRUCache +from ._cachebox import RRCache as RRCache +from ._cachebox import TTLCache as TTLCache +from ._cachebox import VTTLCache as VTTLCache +from ._core import __version__ as __version__ +from ._core import _use_small_offset_feature as _use_small_offset_feature +from .utils import EVENT_HIT as EVENT_HIT +from .utils import EVENT_MISS as EVENT_MISS +from .utils import Frozen as Frozen +from .utils import cached as cached +from .utils import clear_cached_cache as clear_cached_cache +from .utils import get_cached_cache as get_cached_cache +from .utils import get_cached_cache_info as get_cached_cache_info +from .utils import get_cached_callback as get_cached_callback +from .utils import is_cached as is_cached +from .utils import make_hash_key as make_hash_key +from .utils import make_key as make_key +from .utils import make_typed_key as make_typed_key +from .utils import postprocess_copy as postprocess_copy +from .utils import postprocess_copy_mutables as postprocess_copy_mutables +from .utils import postprocess_deepcopy as postprocess_deepcopy +from .utils import postprocess_deepcopy_mutables as postprocess_deepcopy_mutables diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py new file mode 100644 index 0000000..def0a30 --- /dev/null +++ b/cachebox/_cachebox.py @@ -0,0 +1,370 @@ +import threading +import time +import typing +from datetime import datetime, timedelta + +from ._core import BaseCacheImpl as BaseCacheImpl +from ._core import Cache as Cache +from ._core import FIFOCache as FIFOCache +from ._core import LFUCache as LFUCache +from ._core import LRUCache as LRUCache +from ._core import RRCache as RRCache + +# private import +from ._core import TTLCache as _CoreTTLCache +from ._core import VTTLCache as _CoreVTTLCache + +if typing.TYPE_CHECKING: + from ._core import _IterableType + +KT = typing.TypeVar("KT", bound=typing.Hashable) +VT = typing.TypeVar("VT") + + +class TTLCache(_CoreTTLCache[KT, VT]): + """ + A cache with a Time-To-Live (TTL) eviction policy. + + Each entry carries an expiration timestamp and is considered stale — and + eligible for eviction — once that deadline has passed, regardless of how + recently or frequently it was accessed. + + Every entry is stamped with an absolute ``expires_at`` timestamp at + insertion time (computed as ``now + global_ttl``). Entries are stored in + insertion order and eviction proceeds from the front of that queue, but + only after confirming the candidate has actually expired. A live entry at + the front of the queue blocks eviction of everything behind it, so the + cache may temporarily exceed capacity if the oldest entries are still + fresh. + + Like ``FIFOCache``, this implementation backs the queue with a + double-ended queue for O(1) front removal and a hash map for O(1) key + lookups. The same logical-index trick applies: the table stores + monotonically increasing counters rather than physical deque positions, and + a ``front_offset`` counter converts a logical index back to a physical one + at read time via ``entries[table[key] - front_offset]``. This keeps + eviction and lookup O(1) without rewriting the table on every eviction. + Every read also checks ``expires_at`` against the current wall-clock time + and treats any expired entry as a cache miss. + + Without ``sweep_interval``, an expiry sweep is triggered automatically on + every call to ``insert``, ``update``, ``current_size``, ``remaining_size``, + ``last``, ``first``, ``items``, ``keys``, ``values``, and ``__iter__``. A + completely idle cache will accumulate stale entries between these calls, + but any normal interaction is sufficient to reclaim them. When + ``sweep_interval`` is set, a background thread performs the sweep on that + interval instead, reclaiming expired entries independent of method calls. + + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ---------------- | ------- | + | Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(n) - very rare | + + Pros: + - Insert, lookup, and evict are all O(1) amortized: the + ``front_offset`` trick eliminates the O(n) index-shifting that a + naive implementation would require on every eviction. + - Entries expire automatically without a background thread or explicit + invalidation call; stale data is never returned to the caller. + - TTL expiry and insertion-order eviction compose cleanly: the oldest + expired entry is always evicted first. + - A single ``global_ttl`` keeps configuration simple; every entry ages + at the same rate. + + Cons: + - Wall-clock dependency: correctness relies on a monotonically + advancing system clock. Clock adjustments (NTP steps, + suspend/resume) can cause entries to expire earlier or later than + intended. + - When ``sweep_interval`` is set, a background thread wakes on that + interval to remove all expired entries, adding a small amount of + background CPU usage for the lifetime of the cache. + - No per-entry TTL override: all entries share ``global_ttl``; mixed + expiry requirements need a different policy or a wrapper layer. + - A rare O(n) index rebase (triggered when ``front_offset`` nears + ``usize::MAX - isize::MAX``) introduces an occasional latency spike; + amortised cost is negligible but worst-case latency is unbounded in + principle. + + Use ``TTLCache`` when cached data has a natural freshness window (API + responses, auth tokens, DNS records, rate-limit counters), when automatic + expiry without a background reaper is sufficient, or when access patterns + are unpredictable enough that recency- or frequency-based eviction would + offer no meaningful advantage. + + Avoid it when strong temporal locality makes LRU a better fit, when + per-entry TTL granularity is required (consider ``VTTLCache`` instead), or + when the system clock is unreliable or subject to adjustment. + + ```python + from cachebox import TTLCache + import time + + cache = TTLCache(0, global_ttl=2) + cache.update({i:str(i) for i in range(10)}) + + print(cache.get_with_expire(2)) # ('2', 1.99) + + # Returns the oldest key in cache; this is the one which will be removed by `popitem()` + print(cache.first()) # 0 + + cache["mykey"] = "value" + time.sleep(2) + cache["mykey"] # KeyError + ``` + """ + + def __init__( + self, + maxsize: int, + global_ttl: float | timedelta, + iterable: typing.Optional["_IterableType[KT, VT]"] = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT], int] | None = None, + sweep_interval: float | timedelta | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + global_ttl: Time-to-live for every entry, as seconds (float) or a + ``timedelta``. Applied at insertion time. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching — for example, sizing entries by memory + footprint or byte length. + sweep_interval: If set, starts a background thread that sweeps and + removes all expired entries on this interval (in seconds or as + a ``timedelta``). When ``None``, expiry is lazy. Defaults to + ``None``. Must be greater than or equal to 1. + + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. + + Raises: + ValueError: If ``sweep_interval`` is set to a value less than 1. + """ + super().__init__( + maxsize, + global_ttl, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + self._thread: threading.Thread | None = None + self._thread_is_running: bool = False + + if sweep_interval is not None: + if isinstance(sweep_interval, timedelta): + sweep_interval = sweep_interval.total_seconds() + + if sweep_interval < 1: + raise ValueError("sweep_interval must be more than 1 seconds.") + + self._thread_is_running = True + self._thread = threading.Thread( + target=self._sweeper_thread, + args=(sweep_interval,), + daemon=True, + ) + self._thread.start() + + self._sweep_interval = sweep_interval + + @property + def sweep_interval(self) -> float | None: + """The configured ``sweep_interval`` in seconds.""" + return self._sweep_interval + + def _sweeper_thread(self, interval: float): + while self._thread_is_running: + time.sleep(interval) + self.expire() + + def stop_sweeper(self) -> None: + """Signals the background sweeper thread to stop, if one is active.""" + self._thread_is_running = False + + def __del__(self) -> None: + self.stop_sweeper() + + +class VTTLCache(_CoreVTTLCache[KT, VT]): + """ + A cache with a Variable Time-To-Live (VTTL) eviction policy. + + Each item can be inserted with its own individual TTL (time-to-live). When + an item's TTL expires, it is considered stale and will be evicted. Items + inserted without a TTL never expire and are only evicted when the cache + reaches capacity. + + Expiration is managed lazily by default: stale entries are not removed + immediately when they expire, but are cleaned up on the next access or + when the cache needs to reclaim capacity. Optionally, a ``sweep_interval`` + can be configured to spawn a background thread that proactively removes + expired items on a fixed schedule, bounding the window in which stale + data can be observed or memory held unnecessarily. + + Internally, a lazy-evaluated min-heap tracks expiration deadlines. The + heap is only fully sorted when needed (e.g. during eviction), keeping + insert costs low on average. A hash table stores cursors into the heap for + O(1) key lookups. A running total enables O(1) capacity checks. + + When the cache is full and eviction is needed, expired items are reclaimed + first (in expiration order, cheapest deadline first). If no expired items + exist, the item with the nearest upcoming expiration is evicted. Items with + no TTL are the last resort and are evicted only when all expiring items + have been exhausted. + + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | -------------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + + Pros: + - Per-item TTL control: each entry can have a different lifetime. + - Expired items are reclaimed before live items, maximising useful + capacity. + - Lazy expiry avoids background threads and timer overhead by default. + - Optional background sweeping bounds stale-data visibility and memory + retention when lazy eviction is insufficient. + - Insert, lookup, and evict are O(1) amortized (O(log n) worst-case + during heap rebalancing). + - TTL-free items coexist naturally alongside expiring ones. + + Cons: + - Without sweeping, stale items may linger in memory until the next + access or eviction pressure forces a cleanup. + - With sweeping, a background thread is running for the lifetime of + the cache, adding concurrency overhead and requiring thread-safe + internal locking. + - Slightly higher per-insert cost compared to pure LRU/LFU. + - No guarantee on the exact eviction moment for expired items in lazy + mode; callers that require strict TTL enforcement should validate + timestamps on read, or configure a sufficiently short + ``sweep_interval``. + + Use ``VTTLCache`` when different items have different natural lifetimes + (e.g. session tokens, API responses with varying freshness requirements, + or multi-tier data with mixed staleness tolerances). Set + ``sweep_interval`` when bounded staleness or proactive memory reclamation + is required. + + Avoid it when all items share a uniform TTL (consider ``TTLCache`` instead), + when strict and immediate expiry is a hard requirement, or when memory pressure + from temporarily lingering stale entries is unacceptable and a background thread + is not an option. + + ```python + from cachebox import VTTLCache + import time + + cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) + print(len(cache)) # 4 + time.sleep(3) + print(len(cache)) # 0 + + # The "key1" is exists for 5 seconds + cache.insert("key1", "value", ttl=5) + # The "key2" is exists for 2 seconds + cache.insert("key2", "value", ttl=2) + + time.sleep(2) + # "key1" is exists for 3 seconds + print(cache.get("key1")) # value + + # "key2" has expired + print(cache.get("key2")) # None + ``` + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Optional["_IterableType[KT, VT]"] = None, + ttl: float | timedelta | datetime | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT], int] | None = None, + sweep_interval: float | timedelta | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + iterable: Initial data to populate the cache. + ttl: Time-to-live duration for ``iterable`` items. This *is not* a global ttl. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching — for example, sizing entries by memory + footprint or byte length. + sweep_interval: If set, starts a background thread that sweeps and + removes all expired entries on this interval (in seconds or as + a ``timedelta``). When ``None``, expiry is lazy. Defaults to + ``None``. Must be greater than or equal to 1. + + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. + + Raises: + ValueError: If ``sweep_interval`` is set to a value less than 1. + """ + super().__init__( + maxsize, + iterable, + ttl, + capacity=capacity, + getsizeof=getsizeof, + ) + + self._thread: threading.Thread | None = None + self._thread_is_running: bool = False + + if sweep_interval is not None: + if isinstance(sweep_interval, timedelta): + sweep_interval = sweep_interval.total_seconds() + + if sweep_interval < 1: + raise ValueError("sweep_interval must be more than 1 seconds.") + + self._thread_is_running = True + self._thread = threading.Thread( + target=self._sweeper_thread, + args=(sweep_interval,), + daemon=True, + ) + self._thread.start() + + self._sweep_interval = sweep_interval + + @property + def sweep_interval(self) -> float | None: + """The configured ``sweep_interval`` in seconds.""" + return self._sweep_interval + + def _sweeper_thread(self, interval: float): + while self._thread_is_running: + time.sleep(interval) + self.expire() + + def stop_sweeper(self) -> None: + """Signals the background sweeper thread to stop, if one is active.""" + self._thread_is_running = False + + def __del__(self) -> None: + self.stop_sweeper() diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi new file mode 100644 index 0000000..f6f4917 --- /dev/null +++ b/cachebox/_core.pyi @@ -0,0 +1,1661 @@ +import typing +from datetime import datetime, timedelta + +from _typeshed import SupportsItems + +_use_small_offset_feature: typing.Final[bool] +__version__: typing.Final[str] + +KT = typing.TypeVar("KT", bound=typing.Hashable) +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + +_IterableType: typing.TypeAlias = ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | BaseCacheImpl[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] +) + +class BaseCacheImpl(typing.Generic[KT, VT]): + """ + Base implementation for cache classes. + + This abstract base class defines the generic structure for cache + implementations. + """ + + def __new__(cls, *args, **kwds) -> typing.Self: + """ + Allocates memory and returns an uninitialized instance. + + Warning: + Using the returned instance before calling ``__init__`` is unsafe + and causes panic errors. + """ + ... + + def __init__( + self, + maxsize: int, + iterable: _IterableType[KT, VT] | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT], int] | None = None, + ) -> None: + """ + Initializes a new instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching - for example, sizing entries by memory + footprint or byte length. + + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. + """ + ... + + @property + def maxsize(self) -> int: + """The configured ``maxsize``.""" + ... + + @property + def getsizeof(self) -> typing.Callable[[KT, VT], int] | None: + """The configured ``getsizeof`` function.""" + ... + + def current_size(self) -> int: + """ + Returns the current total cumulative size of all stored entries. + + Returns: + The sum of sizes of all entries currently in the cache. + """ + ... + + def remaining_size(self) -> int: + """ + Returns the remaining available size. + + Returns: + The result of ``maxsize - current_size``. + """ + ... + + def capacity(self) -> int: + """ + Returns the number of elements the map can hold without reallocating. + + Returns: + The current allocated capacity. + """ + ... + + def __len__(self) -> int: + """ + Returns the number of entries currently in the cache. + + Returns: + The number of entries in the cache. + """ + ... + + def __sizeof__(self) -> int: ... + def __bool__(self) -> bool: ... + def __contains__(self, key: KT) -> bool: ... + def contains(self, key: KT) -> bool: + """ + Returns ``True`` if the cache contains an entry for ``key``. + + Equivalent to ``key in self``. Prefer this method over ``key in self`` + to keep code compatible across different cache policies. + + Args: + key: The key to look up. + + Returns: + ``True`` if the key exists in the cache, ``False`` otherwise. + """ + ... + + def is_empty(self) -> bool: + """ + Returns ``True`` if the cache is empty. + + Returns: + ``True`` if the cache contains no entries. + """ + ... + + def is_full(self) -> bool: + """ + Returns ``True`` when the cumulative size has reached the maxsize limit. + + Returns: + ``True`` if the cache is at capacity. + """ + ... + + def insert( + self, key: KT, value: VT, *args: typing.Any, **kwargs: typing.Any + ) -> typing.Optional[VT]: ... + def __setitem__(self, key: KT, value: VT) -> None: ... + def update( + self, + iterable: _IterableType[KT, VT], + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: ... + def get( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Union[VT, DT]: ... + def __getitem__(self, key: KT) -> VT: ... + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT | DT]: ... + def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: + """ + Removes the specified key and returns the corresponding value. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + + Raises: + KeyError: If the key is not found and no ``default`` is provided. + """ + ... + + def __delitem__(self, key: KT) -> None: ... + def popitem(self) -> typing.Tuple[KT, VT]: ... + def drain(self, n: int) -> int: + """ + Calls ``popitem()`` ``n`` times and returns the count of removed items. + + Args: + n: The number of items to remove. + + Returns: + The number of items successfully removed. + """ + ... + + def shrink_to_fit(self) -> None: + """Shrinks the internal allocation as close to the current length as possible.""" + ... + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + ... + + def __eq__(self, other: typing.Any) -> bool: ... + def __ne__(self, other: typing.Any) -> bool: ... + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... + def values(self) -> typing.Iterable[VT]: ... + def keys(self) -> typing.Iterable[KT]: ... + def __iter__(self) -> typing.Iterator[KT]: ... + def copy(self) -> typing.Self: ... + def __copy__(self) -> typing.Self: ... + def __getstate__(self) -> object: ... + def __setstate__(self, state: object) -> None: ... + def __repr__(self) -> str: ... + +class Cache(BaseCacheImpl[KT, VT]): + """ + A thread-safe, memory-efficient key-value cache with no eviction policy. + + Items remain in the cache until manually removed or the cache is cleared. + + ``Cache`` is essentially a configurable hashmap-like store. When an item is + inserted, it is stored directly without any ordering, priority tracking, or + access metadata. If a maximum size is configured, insertions beyond that + limit are rejected with an ``OverflowError``. All read and write operations + are thread-safe. + + Because no eviction logic runs in the background, there is no overhead from + tracking usage order, frequency counters, or expiry timestamps. + + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ------ | ------- | + | Worse-case | O(1) | O(1) | O(1) | N/A | + + Pros: + - Minimal overhead: no bookkeeping for eviction means lower CPU and + memory usage per entry compared to policy-based caches. + - Predictable behavior: items are never silently removed, so cache hits + are deterministic once an item is stored. + - Thread-safe: safe for concurrent reads and writes out of the box. + - Configurable capacity: a hard size limit prevents unbounded memory + growth. + + Cons: + - No automatic eviction: the cache can fill up and stop accepting new + entries if a max size is set, requiring manual management. + - Unordered: unlike a standard ``dict`` (Python 3.7+), insertion order + is not preserved. + - Not suitable for volatile data: stale entries persist forever unless + explicitly invalidated. + + Use ``Cache`` when you have a fixed, well-known set of keys that are + expensive to compute and never go stale (e.g. parsed config values, + compiled regex patterns, loaded templates), and when the lowest possible + overhead is required. + + Avoid it when cached data can become stale, when the working set is + unpredictable in size, or when automatic memory pressure relief is needed. + + ```python + from cachebox import Cache + + cache = Cache(maxsize=100, iterable=None, capacity=100) + + # behaves like a regular dict + cache["key"] = "value" + # using `.insert(key, value)` is recommended + cache.insert("key", "value") + + print(cache["key"]) # value + + del cache["key"] + cache["key"] # KeyError: key + + # cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded + cache.update({i:i for i in range(200)}) + # OverflowError: The cache has reached the bound. + ``` + """ + + # | Class | get | insert | delete | popitem | + # |---|---|---|---|---| + # | \`Cache\` | O(1) | O(1) | O(1) | N/A | + # | \`FIFOCache\` | O(1) | O(1) | O(min(i, n-i)) | O(1) | + # | \`RRCache\` | O(1) | O(1) | O(1) | O(1) | + # | \`LRUCache\` | O(1)~ | O(1)~ | O(1)~ | O(1)~ | + # | \`LFUCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + # | \`TTLCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | + # | \`VTTLCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + + Raises: + OverflowError: If the cache has reached its ``maxsize`` limit, + since this class has no eviction algorithm. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Always raises ``OverflowError``. + + ``Cache`` has no policy or algorithm to select an item for eviction. + + Raises: + OverflowError: Always, because ``Cache`` has no eviction policy. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. Items are not ordered. + + Returns: + An iterable of ``(key, value)`` tuples. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. Keys are not ordered. + + Returns: + An iterable of keys. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. Values are not ordered. + + Returns: + An iterable of values. + """ + ... + +class FIFOCache(BaseCacheImpl[KT, VT]): + """ + A cache with a First-In-First-Out (FIFO) eviction policy. + + When the cache is full, the oldest inserted item is always the first to be + removed, regardless of how often it has been accessed. + + Items are stored in insertion order. When capacity is reached, the item + that has been present the longest is evicted. There is no concept of + "recently used" or "frequently used" - age alone determines eviction order. + Conceptually it behaves like a queue: new items join the back and evictions + come from the front. + + This implementation backs that queue with a double-ended queue for O(1) + front removal, paired with a hash map for O(1) key lookups. Logical indices + (a monotonically increasing counter) are stored in the table rather than + physical deque positions, so eviction never requires rewriting the index. + A ``front_offset`` counter recovers physical positions at read time as + ``entries[table[key] - front_offset]``. + + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ---------------- | ------- | + | Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(n) - very rare | + + Pros: + - Insert, lookup, and evict are all O(1) amortized. + - Eviction order is fully deterministic and easy to reason about. + - No per-read overhead: unlike LRU, FIFO requires no bookkeeping on + cache hits. + + Cons: + - Access-blind eviction: a hot item is evicted just as readily as one + never read, hurting hit rates on workloads with temporal locality. + - Logical-index indirection adds internal complexity vs. a naive queue. + - A rare O(n) index rebase (when ``front_offset`` nears + ``usize::MAX - isize::MAX``) introduces an occasional latency spike. + + Use ``FIFOCache`` when eviction order must be predictable and auditable, + access patterns are roughly uniform, or read overhead must be minimal + (insert-heavy workloads with infrequent re-reads). + + Avoid it when the workload has strong temporal locality; in those cases LRU + or LFU will deliver meaningfully better hit rates. + + ```python + from cachebox import FIFOCache + + cache = FIFOCache(5, {i:i*2 for i in range(5)}) + + print(len(cache)) # 5 + cache["new-key"] = "new-value" + print(len(cache)) # 5 + + print(cache.get(3, "default-val")) # 6 + print(cache.get(6, "default-val")) # default-val + + print(cache.popitem()) # (1, 2) + + # Returns the first key in cache; this is the one which will be removed by `popitem()`. + print(cache.first()) + ``` + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the oldest item in the cache. + + Returns: + A ``(key, value)`` tuple for the item that was inserted first. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in insertion order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in insertion order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in insertion order. + """ + ... + + def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the key at position ``n`` in insertion order. + + The key at position 0 is the one that will be removed by ``popitem()``. + + Args: + n: The index to look up. Defaults to 0 (the oldest item). + + Returns: + The key at the given index. + + Raises: + IndexError: If the cache is empty or ``n`` is out of range. + """ + ... + + def last(self) -> typing.Optional[KT]: + """ + Returns the most recently inserted key. Equivalent to ``self.first(-1)``. + + Returns: + The key of the most recently inserted item. + + Raises: + IndexError: If the cache is empty. + """ + ... + +class RRCache(BaseCacheImpl[KT, VT]): + """A thread-safe, memory-efficient cache with a Random Replacement eviction policy. + + When the cache reaches its maximum size, a randomly selected item is + evicted to make room for new entries. + + Items are stored without any ordering or priority tracking. The Random + Replacement policy selects entries for eviction uniformly at random, + ensuring fair treatment across all cached items regardless of access + patterns. + + | | get | insert | delete | popitem(i) | + | ------------ | ----- | ------- | ------ | -------------- | + | Worse-case | O(1) | O(1) | O(1) | O(min(i, n-i)) | + + Pros: + - Low overhead: computationally cheap compared to tracking access order + or frequency. + - Thread-safe: safe for concurrent reads and writes out of the box. + - Configurable capacity: a hard size limit prevents unbounded memory + growth while allowing new entries through automatic eviction. + - No indefinite staleness: items are eventually replaced by the + eviction policy. + + Cons: + - Non-deterministic eviction: random selection means recently cached or + frequently accessed items may be unexpectedly removed. + - Unordered: insertion order is not preserved. + - Less optimal than LRU/LFU on skewed access patterns. + + Use ``RRCache`` when the working set can grow unpredictably, access + patterns are roughly uniform, and low overhead with simple eviction logic + is preferred. + + Avoid it when access patterns are highly skewed, cache hits are + mission-critical, or fine-grained eviction control is required. + + ```python + from cachebox import RRCache + + cache = RRCache(10, {i:i for i in range(10)}) + print(cache.is_full()) # True + print(cache.is_empty()) # False + + # Returns a random key + print(cache.random_key()) # 4 + ``` + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Randomly selects, removes, and returns a ``(key, value)`` pair. + + Returns: + A randomly chosen ``(key, value)`` tuple. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. Items are not ordered. + + Returns: + An iterable of ``(key, value)`` tuples. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. Keys are not ordered. + + Returns: + An iterable of keys. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. Values are not ordered. + + Returns: + An iterable of values. + """ + ... + + def random_key(self) -> KT: + """ + Randomly selects and returns a key from the cache. + + Returns: + A randomly chosen key. + + Raises: + KeyError: If the cache is empty. + """ + ... + +class LRUCache(BaseCacheImpl[KT, VT]): + """ + A cache with a Least-Recently-Used (LRU) eviction policy. + + When the cache is full, the item that has not been accessed for the longest + time is removed first, regardless of how many times it was accessed in the + past. + + Items are tracked by access recency - every read or write promotes an item + to "most recently used". When capacity is reached, the least recently used + item (accessed longest ago) is evicted. + + This implementation pairs a doubly-linked list with a hash map. The list + maintains items in access order (most recently used at the back, least + recently used at the front); the hash map stores cursors into the list for + O(1) lookups. On every access the item is moved to the back. On eviction + the front item is removed. A running total enables O(1) capacity checks. + + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | --------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | + + Pros: + - Excellent hit rates on temporal-locality workloads. + - Insert, lookup, and evict are all O(1) amortized. + - Automatically adapts to access patterns without manual tuning. + - Per-hit cost is minimal (O(1) linked-list manipulation). + + Cons: + - Per-read overhead from updating the linked list on every cache hit. + - Burst traffic can keep a transiently hot item alive at the expense of + items with better long-term utility. + - Implementation complexity from doubly-linked list and cursor-based + hash table. + - Memory overhead from storing prev/next pointers for every entry. + + Use ``LRUCache`` when the workload exhibits temporal locality, hit rate is + the primary metric, or access patterns are unknown or unpredictable. + + Avoid it for write-heavy workloads with few re-reads, ultra-low-latency + requirements, or frequency-heavy bimodal access patterns (consider LFU + instead). + + ```python + from cachebox import LRUCache + + cache = LRUCache(0, {i:i*2 for i in range(10)}) + + # access `1` + print(cache[0]) # 0 + print(cache.least_recently_used()) # 1 + print(cache.popitem()) # (1, 2) + + # .peek() searches for a key-value in the cache and returns it without moving the key to recently used. + print(cache.peek(2)) # 4 + print(cache.popitem()) # (3, 6) + ``` + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the least recently used item. + + Returns: + A ``(key, value)`` tuple for the least recently used item. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in access order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in access order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in access order. + """ + ... + + def peek( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a key without updating its recency. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def least_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key that has not been accessed for the longest time. + + Returns: + The least recently used key. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def most_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key that was accessed most recently. + + Returns: + The most recently used key. + + Raises: + KeyError: If the cache is empty. + """ + ... + +class LFUCache(BaseCacheImpl[KT, VT]): + """ + A cache with a Least-Frequently-Used (LFU) eviction policy. + + When the cache is full, the item with the lowest access count is evicted + first. Ties in frequency are broken by recency - among equally rare items, + the oldest is evicted. + + Access counts are tracked per key. This implementation uses a lazy binary + min-heap keyed on access frequency, paired with a hash map that maps each + key to its cursor (a stable pointer into the heap's backing buffer). The + heap is "lazy": it does not restore the heap invariant after every frequency + increment; instead it sets a dirty flag and defers re-sorting until the + next eviction, amortising heap-maintenance cost across many hits. + + On a cache hit the frequency counter is incremented in O(1) and the heap is + marked dirty. On eviction the heap is sorted if dirty, then the + minimum-frequency item is popped in O(n log n) worst-case (amortised + O(log n) under typical distributions). Lookups are O(1) via the hash map. + + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | -------------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + + Pros: + - Frequency-aware eviction protects hot items under heavy cache + pressure. + - O(1) cache hits: incrementing a counter and marking the heap dirty + is constant-time work with no structural reorganisation. + - Lazy heap sorting amortises the O(n log n) sort cost across many + inserts and hits. + + Cons: + - Eviction is O(n log n) worst-case, introducing latency spikes under + adversarial access patterns. + - Frequency counters accumulate indefinitely, causing "cache pollution" + where historically hot but currently cold items monopolise capacity. + - Access patterns must be skewed for LFU to outperform simpler + policies; on uniform workloads the extra bookkeeping is pure overhead. + + Use ``LFUCache`` when the workload has a stable hot set, cache pollution + from one-time scans is a concern, or hit rate matters more than worst-case + eviction latency. + + Avoid it when access patterns shift rapidly (use LRU instead) or when all + keys are accessed with roughly equal probability. + + ```python + from cachebox import LFUCache + + cache = cachebox.LFUCache(5) + cache.insert('first', 'A') + cache.insert('second', 'B') + + # access 'first' twice + cache['first'] + cache['first'] + + # access 'second' once + cache['second'] + + assert cache.least_frequently_used() == 'second' + assert cache.least_frequently_used(2) is None # 2 is out of range + + for item in cache.items_with_frequency(): + print(item) + # ('second', 'B', 1) + # ('first', 'A', 2) + ``` + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the least frequently used item. + + Returns: + A ``(key, value)`` tuple for the item with the lowest access count. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in frequency order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in frequency order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in frequency order. + """ + ... + + def items_with_frequency(self) -> typing.Iterable[typing.Tuple[KT, VT, int]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs with their + frequency counter. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in frequency order. + """ + ... + + def peek( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a key without incrementing its frequency counter. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + """ + ... + + def least_frequently_used(self, n: int = 0) -> KT: + """ + Returns the key with the lowest access count. + + Args: + n: If given, returns the ``n``-th least frequently used key + (0-indexed). Defaults to 0. + + Returns: + The key with the ``n``-th lowest access count. + + Raises: + IndexError: If the cache is empty or ``n`` is out of range. + + Warning: + This method may re-sort the cache. Do not call it while iterating + over the cache. + """ + ... + +class TTLCache(BaseCacheImpl[KT, VT]): + """ + A cache with time-to-live (TTL) expiration. + + Items expire automatically after a configurable duration. Eviction follows + a FIFO order among non-expired items when the cache is full. + """ + + def __init__( + self, + maxsize: int, + global_ttl: float | timedelta, + iterable: _IterableType[KT, VT] | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT], int] | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + global_ttl: Default time-to-live for all entries, in seconds or as + a ``timedelta``. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1. + """ + ... + + @property + def global_ttl(self) -> float: + """The configured ``global_ttl`` in seconds.""" + ... + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the item that has been in the cache the longest. + + Returns: + A ``(key, value)`` tuple for the oldest item. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in insertion order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in insertion order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in insertion order. + """ + ... + + def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the key at position ``n`` in insertion order. + + The key at position 0 is the one that will be removed by ``popitem()``. + + Args: + n: The index to look up. Defaults to 0 (the oldest item). + + Returns: + The key at the given index. + + Raises: + IndexError: If the cache is empty or ``n`` is out of range. + """ + ... + + def last(self) -> typing.Optional[KT]: + """ + Returns the most recently inserted key. Equivalent to ``self.first(-1)``. + + Returns: + The key of the most recently inserted item. + + Raises: + IndexError: If the cache is empty. + """ + ... + + def expire(self, *, reuse: bool = False) -> None: + """ + Manually removes all expired key-value pairs from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + ... + + def get_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Retrieves a value along with its remaining TTL. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def pop_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Removes a key and returns its value along with its remaining TTL. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def popitem_with_expire(self) -> typing.Tuple[VT, DT, float]: + """ + Removes and returns the oldest item along with its remaining TTL. + + Returns: + A tuple of ``(key, value, remaining_ttl)`` where ``remaining_ttl`` + is the expiration duration in seconds. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items_with_expire(self) -> typing.Iterable[typing.Tuple[KT, VT, float]]: + """ + Returns an ordered iterable of items with their remaining TTL. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value, remaining_ttl)`` tuples in insertion + order, where ``remaining_ttl`` is in seconds. + """ + ... + +class VTTLCache(BaseCacheImpl[KT, VT]): + """ + A cache with a Variable Time-To-Live (VTTL) eviction policy. + + Each item can be inserted with its own individual TTL (time-to-live). When + an item's TTL expires, it is considered stale and will be evicted. Items + inserted without a TTL never expire and are only evicted when the cache + reaches capacity. + """ + + def __init__( + self, + maxsize: int, + iterable: _IterableType[KT, VT] | None = None, + ttl: float | timedelta | datetime | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT], int] | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + iterable: Initial data to populate the cache. + ttl: Time-to-live duration for ``iterable`` items. This *is not* a global ttl. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1. + """ + ... + + def insert( + self, + key: KT, + value: VT, + ttl: float | timedelta | datetime | None = None, + ) -> typing.Optional[VT]: + """ + Insert a key-value pair into the cache with an optional time-to-live (TTL). + Returns the previous value associated with the key, if it existed. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + ttl: An optional time-to-live duration for the item. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update( + self, + iterable: _IterableType[KT, VT], + ttl: float | timedelta | datetime | None = None, + ) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + ttl: An optional time-to-live duration for items. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ttl: float | timedelta | datetime | None = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + ttl: An optional time-to-live duration for items. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the key-value pair that is closest to expiration. + + Returns: + A tuple containing the key and value of the removed item. + + Raises: + KeyError: If the cache is empty. + """ + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in insertion order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in insertion order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in insertion order. + """ + ... + + def expire(self, *, reuse: bool = False) -> None: + """ + Manually removes all expired key-value pairs from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + ... + + def get_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float | None]: + """ + Retrieves a value along with its remaining TTL. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def pop_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float | None]: + """ + Removes a key and returns its value along with its remaining TTL. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def popitem_with_expire(self) -> typing.Tuple[VT, DT, float | None]: + """ + Removes and returns the oldest item along with its remaining TTL. + + Returns: + A tuple of ``(key, value, remaining_ttl)`` where ``remaining_ttl`` + is the expiration duration in seconds. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items_with_expire(self) -> typing.Iterable[typing.Tuple[KT, VT, float | None]]: + """ + Returns an ordered iterable of items with their remaining TTL. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value, remaining_ttl)`` tuples in insertion + order, where ``remaining_ttl`` is in seconds. + """ + ... diff --git a/cachebox/utils.py b/cachebox/utils.py new file mode 100644 index 0000000..c7fd21b --- /dev/null +++ b/cachebox/utils.py @@ -0,0 +1,759 @@ +import _thread +import asyncio +import functools +import inspect +import typing +from collections import namedtuple +from copy import copy as _shallow_copy +from copy import deepcopy as _deep_copy + +from ._cachebox import BaseCacheImpl, LRUCache + +if typing.TYPE_CHECKING: + from ._core import _IterableType + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") +FT = typing.TypeVar("FT", bound=typing.Callable[..., typing.Any]) + +_PostProcess: typing.TypeAlias = typing.Callable[[typing.Any], typing.Any] +_Callback: typing.TypeAlias = typing.Callable[[int, typing.Any, typing.Any], typing.Any] + + +_COPY_TYPES = frozenset((dict, list, set)) + + +def postprocess_copy_mutables(value: VT) -> VT: + """ + Shallow-copy *value* before returning it (only `dict`, `list`, and `set`) + """ + if type(value) in _COPY_TYPES: + return _shallow_copy(value) + + return value + + +def postprocess_copy(value: VT) -> VT: + """Shallow-copy *value* before returning it""" + return _shallow_copy(value) + + +def postprocess_deepcopy_mutables(value: VT) -> VT: + """ + Deep-copy *value* before returning it (only `dict`, `list`, and `set`) + """ + if type(value) in _COPY_TYPES: + return _deep_copy(value) + + return value + + +def postprocess_deepcopy(value: VT) -> VT: + """Deep-copy *value* before returning it""" + return _deep_copy(value) + + +_KWDS_MARK = object() +_FAST_TYPES = frozenset((int, str)) + + +def make_key(*args, **kwds) -> typing.Hashable: + """ + Default cache key. + + Fast-path: a single ``int`` or ``str`` argument is returned as-is. + Otherwise a plain tuple (plus a kwargs sentinel when needed) is returned. + """ + if not kwds: + if len(args) == 1 and type(args[0]) in _FAST_TYPES: + return args[0] + return args + + key = args + (_KWDS_MARK,) + for item in kwds.items(): + key += item + return key[0] if len(key) == 1 and type(key[0]) in _FAST_TYPES else key + + +def make_hash_key(*args, **kwds) -> int: + """ + Key as the hash of all positional and keyword arguments. + + Avoids storing the raw argument tuple, at the cost of potential hash + collisions mapping distinct inputs to the same cache slot. + """ + if not kwds: + return hash(args) + key = args + (_KWDS_MARK,) + for item in kwds.items(): + key += item + return hash(key) + + +def make_typed_key(*args, **kwds) -> tuple: + """ + Key that includes the runtime type of every argument. + + Ensures ``f(1)`` and ``f(1.0)`` are cached separately even though + ``1 == 1.0``. + """ + key: tuple = args + if kwds: + key += (_KWDS_MARK,) + for item in kwds.items(): + key += item + + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for v in kwds.values()) + + return key + + +class Frozen(BaseCacheImpl[KT, VT]): # pragma: no cover + """ + A wrapper class that prevents modifications to an underlying cache implementation. + + This class provides a read-only view of a cache, optionally allowing silent + suppression of modification attempts instead of raising exceptions. + + Example:: + + from cachebox import Frozen, FIFOCache + + cache = FIFOCache(10, {1:1, 2:2, 3:3}) + + frozen = Frozen(cache, ignore=True) + print(frozen[1]) # 1 + print(len(frozen)) # 3 + + # Frozen ignores this action and do nothing + frozen.insert("key", "value") + print(len(frozen)) # 3 + + # Let's try with ignore=False + frozen = Frozen(cache, ignore=False) + + frozen.insert("key", "value") + # TypeError: This cache is frozen. + """ + + __slots__ = ("__cache", "ignore") + + def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: + """ + Initialize a frozen cache wrapper. + + Args: + cls: The underlying cache implementation to be frozen. + ignore: If ``True``, silently ignores modification attempts; if ``False``, raises + ``TypeError`` when modification is attempted. Default is ``False``. + """ + assert isinstance(cls, BaseCacheImpl) + assert type(cls) is not Frozen + + self.__cache = cls + self.ignore = ignore + + def _guard(self) -> None: + if not self.ignore: + raise TypeError("This cache is frozen.") + + @property + def cache(self) -> BaseCacheImpl[KT, VT]: + """Returns the wrapped cache implementation.""" + return self.__cache + + @property + def maxsize(self) -> int: + """The configured ``maxsize``.""" + return self.__cache.maxsize + + @property + def getsizeof(self) -> typing.Callable[[KT, VT], int] | None: + """Callable or None: The configured ``getsizeof`` function.""" + return self.__cache.getsizeof + + def current_size(self) -> int: + """ + Returns the current total cumulative size of all stored entries. + + Returns: + The sum of sizes of all entries currently in the cache. + """ + return self.__cache.current_size() + + def remaining_size(self) -> int: + """ + Returns the remaining available size. + + Returns: + The result of ``maxsize - current_size``. + """ + return self.__cache.remaining_size() + + def capacity(self) -> int: + """ + Returns the number of elements the map can hold without reallocating. + + Returns: + The current allocated capacity. + """ + return self.__cache.capacity() + + def __len__(self) -> int: + """ + Returns the number of entries currently in the cache. + + Returns: + The number of entries in the cache. + """ + return len(self.__cache) + + def __sizeof__(self) -> int: + return self.__cache.__sizeof__() + + def __bool__(self) -> bool: + return bool(self.__cache) + + def __contains__(self, key: KT) -> bool: + return self.__cache.contains(key) + + def contains(self, key: KT) -> bool: + """ + Returns ``True`` if the cache contains an entry for ``key``. + + Equivalent to ``key in self``. Prefer this method over ``key in self`` + to keep code compatible across different cache policies. + + Args: + key: The key to look up. + + Returns: + ``True`` if the key exists in the cache, ``False`` otherwise. + """ + return self.__cache.contains(key) + + def is_empty(self) -> bool: + """ + Returns ``True`` if the cache is empty. + + Returns: + ``True`` if the cache contains no entries. + """ + return self.__cache.is_empty() + + def is_full(self) -> bool: + """ + Returns ``True`` when the cumulative size has reached the maxsize limit. + + Returns: + ``True`` if the cache is at capacity. + """ + return self.__cache.is_full() + + def insert( + self, + key: KT, + value: VT, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT]: + return self._guard() + + def __setitem__(self, key: KT, value: VT) -> None: + return self._guard() + + def update( + self, + iterable: "_IterableType[KT, VT]", + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: + return self._guard() + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + return self.__cache.get(key, default) + + def __getitem__(self, key: KT) -> VT: + return self.__cache[key] + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT | DT]: + return self._guard() + + def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: + """ + Removes the specified key and returns the corresponding value. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + + Raises: + KeyError: If the key is not found and no ``default`` is provided. + """ + return self._guard() # type: ignore[return-value] + + def __delitem__(self, key: KT) -> None: + return self._guard() + + def popitem(self) -> typing.Tuple[KT, VT]: + return self._guard() # type: ignore[return-value] + + def drain(self, n: int) -> int: + """ + Calls ``popitem()`` ``n`` times and returns the count of removed items. + + Args: + n: The number of items to remove. + + Returns: + The number of items successfully removed. + """ + return self._guard() # type: ignore[return-value] + + def shrink_to_fit(self) -> None: + """Shrinks the internal allocation as close to the current length as possible.""" + return self._guard() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + return self._guard() + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + return self.__cache.items() + + def values(self) -> typing.Iterable[VT]: + return self.__cache.values() + + def keys(self) -> typing.Iterable[KT]: + return self.__cache.keys() + + def __iter__(self) -> typing.Iterator[KT]: + return iter(self.__cache) + + def copy(self) -> "Frozen[KT, VT]": + return Frozen(self.__cache.copy(), ignore=self.ignore) + + def __copy__(self) -> "Frozen[KT, VT]": + return Frozen(self.__cache.copy(), ignore=self.ignore) + + def __repr__(self) -> str: + return "Frozen(%s)" % repr(self.__cache) + + +class _Lock: + __slots__ = ("_lock", "waiters") + + def __init__(self) -> None: + self._lock = _thread.allocate_lock() + self.waiters = 0 + + def __enter__(self) -> None: + self.waiters += 1 + self._lock.acquire() + + def __exit__(self, *_) -> None: + self.waiters -= 1 + self._lock.release() + + +class _AsyncLock: + __slots__ = ("_lock", "waiters") + + def __init__(self) -> None: + self._lock = asyncio.Lock() + self.waiters = 0 + + async def __aenter__(self) -> None: + self.waiters += 1 + await self._lock.acquire() + + async def __aexit__(self, *_) -> None: + self.waiters -= 1 + self._lock.release() + + +CacheInfo = namedtuple( + "CacheInfo", ("hits", "misses", "maxsize", "current_size", "length", "memory") +) +EVENT_MISS = 1 +EVENT_HIT = 2 + + +def _cached_wrapper( + func, + cache: BaseCacheImpl | typing.Callable, + key_maker: typing.Callable[[tuple, dict], typing.Hashable], + clear_reuse: bool, + callback: typing.Callable[[int, typing.Any, typing.Any], None] | None, + postprocess: _PostProcess | None, +): + cache_is_fn = callable(cache) + + # Per-instance caches receive `self` as args[0]; exclude it from the ke + _make_key = ( + (lambda a, k: key_maker(*a[1:], **k)) + if cache_is_fn + else (lambda a, k: key_maker(*a, **k)) + ) + + hits = misses = 0 + locks: dict[typing.Hashable, _Lock] = {} + pending_errors: dict[typing.Hashable, BaseException] = {} + + def _wrapped(*args, **kwds): + nonlocal hits, misses + + # Passing `cachebox__ignore=True` bypasses the cache and + # calls the function directly. + if kwds.pop("cachebox__ignore", False): + return func(*args, **kwds) + + _cache: BaseCacheImpl = cache(args[0]) if cache_is_fn else cache # type: ignore[arg-type] + key = _make_key(args, kwds) + + # Most calls are expected to hit the cache; avoid acquiring a lock. + # Implementations are thread-safe. + try: + result = _cache[key] + hits += 1 + if callback is not None: + callback(EVENT_HIT, key, result) + + return postprocess(result) if postprocess is not None else result + except KeyError: + pass + + lock = locks.get(key) + if lock is None: + locks[key] = lock = _Lock() + + # Acquire the per-key lock so that only one task computes the value + # while the rest wait. + with lock: + # Re-raise any exception stored by a previous owner so that all + # waiters fail with the same error. + err = pending_errors.get(key) + if err is not None: + if lock.waiters == 0: + del pending_errors[key] + raise err + + # Re-check the cache; a previous waiter may have already populated + # it while we were waiting for the lock. + try: + result = _cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = func(*args, **kwds) + except Exception as exc: + if lock.waiters > 0: + pending_errors[key] = exc + raise + else: + _cache[key] = result + misses += 1 + event = EVENT_MISS + + if lock.waiters == 0: + locks.pop(key, None) + + if callback is not None: + callback(event, key, result) + + return postprocess(result) if postprocess is not None else result + + if not cache_is_fn: + _wrapped.cache = cache # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo( # type: ignore[attr-defined] + hits, + misses, + cache.maxsize, + cache.current_size(), + len(cache), + cache.__sizeof__(), + ) + + def cache_clear() -> None: + nonlocal hits, misses + cache.clear(reuse=clear_reuse) # type: ignore[union-attr] + hits = misses = 0 + locks.clear() + pending_errors.clear() + + _wrapped.cache_clear = cache_clear # type: ignore[attr-defined] + + _wrapped.callback = callback # type: ignore[attr-defined] + return _wrapped + + +def _async_cached_wrapper( + func, + cache: BaseCacheImpl | typing.Callable, + key_maker: typing.Callable[..., typing.Hashable], + clear_reuse: bool, + callback: _Callback | None, + postprocess: _PostProcess | None, +): + cache_is_fn = callable(cache) + _make_key = ( + (lambda a, k: key_maker(*a[1:], **k)) + if cache_is_fn + else (lambda a, k: key_maker(*a, **k)) + ) + + hits = misses = 0 + locks: dict[typing.Hashable, _AsyncLock] = {} + pending_errors: dict[typing.Hashable, BaseException] = {} + + async def _wrapped(*args, **kwds): + nonlocal hits, misses + + # Passing `cachebox__ignore=True` bypasses the cache and + # calls the function directly. + if kwds.pop("cachebox__ignore", False): + return await func(*args, **kwds) + + _cache: BaseCacheImpl = cache(args[0]) if cache_is_fn else cache # type: ignore[arg-type] + key = _make_key(args, kwds) + + # Hot path - no lock needed. + try: + result = _cache[key] + hits += 1 + if callback is not None: + ret = callback(EVENT_HIT, key, result) + if inspect.isawaitable(ret): + await ret + return postprocess(result) if postprocess is not None else result + except KeyError: + pass + + lock = locks.get(key) + if lock is None: + locks[key] = lock = _AsyncLock() + + async with lock: + err = pending_errors.get(key) + if err is not None: + if lock.waiters == 0: + del pending_errors[key] + + raise err + + try: + result = _cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = await func(*args, **kwds) + except Exception as exc: + if lock.waiters > 0: + pending_errors[key] = exc + raise + else: + _cache[key] = result + misses += 1 + event = EVENT_MISS + + if lock.waiters == 0: + locks.pop(key, None) + + if callback is not None: + ret = callback(event, key, result) + if inspect.isawaitable(ret): + await ret + + return postprocess(result) if postprocess is not None else result + + if not cache_is_fn: + _wrapped.cache = cache # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo( # type: ignore[attr-defined] + hits, + misses, + cache.maxsize, + cache.current_size(), + len(cache), + cache.__sizeof__(), + ) + + def cache_clear() -> None: + nonlocal hits, misses + cache.clear(reuse=clear_reuse) # type: ignore[union-attr] + hits = misses = 0 + locks.clear() + pending_errors.clear() + + _wrapped.cache_clear = cache_clear # type: ignore[attr-defined] + + _wrapped.callback = callback # type: ignore[attr-defined] + return _wrapped + + +def cached( + cache: BaseCacheImpl | dict | typing.Callable[..., BaseCacheImpl] | None = None, + key_maker: typing.Callable[..., typing.Hashable] = make_key, + clear_reuse: bool = False, + callback: _Callback | None = None, + copy_level: int = 1, + postprocess: _PostProcess | None = postprocess_copy_mutables, +) -> typing.Callable[[FT], FT]: + """ + Decorator to memoize function/method results. + + Args: + cache: Cache instance, ``dict``, or callable ``(self) -> cache`` for + per-instance caches. ``None`` defaults to an unbounded + :class:`LRUCache`. + key_maker: Converts ``(args, kwds)`` to a hashable key. Built-ins: + :func:`make_key` (default), :func:`make_hash_key`, + :func:`make_typed_key`. + clear_reuse: Pass ``reuse=True`` to ``cache.clear()`` when + :func:`cache_clear` is called. + callback: Called as ``callback(event, key, value)`` on every hit/miss. + May be a coroutine in async contexts. + copy_level: It has been deprecated and no longer has any effect. Use + the postprocess parameter instead. + postprocess: Optional ``(value) -> value`` transform applied before + returning a result to the caller. Ready-to-use options: + + * ``None`` - return the cached object as-is. + * :func:`postprocess_copy` - shallow-copy. + * :func:`postprocess_copy_mutables` - shallow-copy only `dict`, `list` and `set` (default). + * :func:`postprocess_deepcopy` - deep-copy. + * :func:`postprocess_deepcopy_mutables` - deep-copy only `dict`, `list` and `set`. + + Note: + Pass ``cachebox__ignore=True`` at call-time to bypass the cache. + If *cache* isn't a lambda/function, these attributes will be attached to + your function: ``cache`` (property), ``cache_info`` (callable), ``clear_cache`` (callable), + and ``callback`` (property). + + Examples:: + + @cachebox.cached(cachebox.LRUCache(128)) + def add(a, b): + return a + b + + # Per-instance method cache + class Foo: + def __init__(self): + self._cache = cachebox.LRUCache(0) + + @cachebox.cached(lambda self: self._cache) + def compute(self, n): + return n * 2 + """ + if copy_level != 1: + import warnings + + warnings.warn( + "`copy_level` parameter has been deprecated and no longer has any effect. Use the `postprocess` parameter instead", + category=DeprecationWarning, + ) + + if cache is None: + cache = LRUCache(0) + elif type(cache) is dict: + cache = LRUCache(0, cache) # type: ignore[arg-type] + + cache_is_fn = callable(cache) + if not isinstance(cache, BaseCacheImpl) and not cache_is_fn: + raise TypeError("expected a cachebox cache or a callable, got %r" % (cache,)) + + def decorator(func: FT) -> FT: + builder = ( + _async_cached_wrapper + if inspect.iscoroutinefunction(func) + else _cached_wrapper + ) + wrapper = builder(func, cache, key_maker, clear_reuse, callback, postprocess) # type: ignore[arg-type] + return functools.update_wrapper(wrapper, func) # type: ignore[return-value] + + return decorator + + +def is_cached(func: object) -> bool: + """ + Return ``True`` if *func* was decorated with :func:`cached`. + + Args: + func: an object or function to check. + """ + return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) # type: ignore[union-attr] + + +def get_cached_cache(cached_func: object) -> BaseCacheImpl: + """ + A way to get ``cached_func.cache``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache # type: ignore + + +def get_cached_cache_info(cached_func: object) -> CacheInfo: + """ + A way to get ``cached_func.cache_info()``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache_info() # type: ignore + + +def get_cached_callback(cached_func: object) -> _Callback | None: + """ + A way to get ``cached_func.callback``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.callback # type: ignore + + +def clear_cached_cache(cached_func: object) -> None: + """ + A way to call ``cached_func.cache_clear()``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache_clear() # type: ignore diff --git a/docs/docs/api/impls.md b/docs/docs/api/impls.md new file mode 100644 index 0000000..09fe2a8 --- /dev/null +++ b/docs/docs/api/impls.md @@ -0,0 +1,153 @@ + +::: cachebox._core.BaseCacheImpl + options: + members: + - __init__ + - maxsize + - getsizeof + - current_size + - remaining_size + - capacity + - __len__ + - __contains__ + - contains + - is_empty + - is_full + - insert + - __setitem__ + - update + - get + - __getitem__ + - setdefault + - pop + - __delitem__ + - popitem + - drain + - shrink_to_fit + - clear + - __eq__ + - __ne__ + - items + - values + - keys + - __iter__ + - copy + - __repr__ + +::: cachebox._core.Cache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + +::: cachebox._core.FIFOCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - first + - last + +::: cachebox._core.RRCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + +::: cachebox._core.LRUCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - peek + - least_recently_used + - most_recently_used + +::: cachebox._core.LFUCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - items_with_frequency + - peek + - least_frequently_used + +::: cachebox._cachebox.TTLCache + options: + members: + - __init__ + - sweep_interval + - stop_sweeper + - global_ttl + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - first + - last + - expire + - get_with_expire + - pop_with_expire + - popitem_with_expire + - items_with_expire + +::: cachebox._cachebox.VTTLCache + options: + members: + - __init__ + - sweep_interval + - stop_sweeper + - insert + - update + - setdefault + - popitem + - items + - values + - keys + - first + - last + - expire + - get_with_expire + - pop_with_expire + - popitem_with_expire + - items_with_expire diff --git a/docs/docs/api/index.md b/docs/docs/api/index.md new file mode 100644 index 0000000..8b264bd --- /dev/null +++ b/docs/docs/api/index.md @@ -0,0 +1,4 @@ +You can see + +- Core API reference [here](./impls.md) +- Utilities API reference [here](./utils.md) diff --git a/docs/docs/api/utils.md b/docs/docs/api/utils.md new file mode 100644 index 0000000..d2df650 --- /dev/null +++ b/docs/docs/api/utils.md @@ -0,0 +1,22 @@ + +::: cachebox.utils.postprocess_copy_mutables +::: cachebox.utils.postprocess_copy +::: cachebox.utils.postprocess_deepcopy_mutables +::: cachebox.utils.postprocess_deepcopy + +::: cachebox.utils.make_key +::: cachebox.utils.make_hash_key +::: cachebox.utils.make_typed_key + +::: cachebox.utils.Frozen + +::: cachebox.utils.CacheInfo +::: cachebox.utils.EVENT_MISS +::: cachebox.utils.EVENT_HIT + +::: cachebox.utils.cached +::: cachebox.utils.is_cached +::: cachebox.utils.get_cached_cache +::: cachebox.utils.get_cached_cache_info +::: cachebox.utils.get_cached_callback +::: cachebox.utils.clear_cached_cache diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md new file mode 100644 index 0000000..c236054 --- /dev/null +++ b/docs/docs/getting-started.md @@ -0,0 +1,322 @@ +# Getting Started + +This guide walks you through the most common cachebox patterns. +All cache classes behave like Python dictionaries unless noted otherwise. + +## Using the `@cached` Decorator +The simplest way to cache a function's return value: + +```python hl_lines="3" +import cachebox + +@cachebox.cached(cachebox.FIFOCache(maxsize=128)) +def factorial(number: int) -> int: + fact = 1 + for num in range(2, number + 1): + fact *= num + return fact + +assert factorial(5) == 120 +``` + +The first parameter `cache`, you can specify the cache instance it should use for caching. + +```python hl_lines="4" +import cachebox + +@cachebox.cached( + cachebox.LRUCache(maxsize=128), +) +def factorial(number: int) -> int: + fact = 1 + for num in range(2, number + 1): + fact *= num + return fact + +assert factorial(5) == 120 +``` + +### Async Functions + +Coroutines are supported out of the box: + +```python +import cachebox + +@cachebox.cached(cachebox.LRUCache(maxsize=128)) +async def make_request(method: str, url: str) -> dict: + response = await client.request(method, url) + return response.json() +``` + +### Using a Custom Key Maker +There are 3 ready-to-use key maker functions, and by default the `@cached` decorator uses the simplest one of them. + +You can use ready-to-use functions, or create a custom one. + +=== "Standard way" + + ```python hl_lines="3 4 8" + import cachebox + + def path_key(request): + return request.path + + @cachebox.cached( + cachebox.LRUCache(128), + key_maker=path_key, + ) + async def request_handler(request): + return Response("hello") + ``` + +=== "Using `lambda`" + + ```python hl_lines="5" + import cachebox + + @cachebox.cached( + cachebox.LRUCache(128), + key_maker=lambda request: request.path, + ) + async def request_handler(request): + return Response("hello") + ``` + +Ready to use key makers are: + +- [make_key function](api/utils.md#cachebox.utils.make_key) +- [make_typed_key function](api/utils.md#cachebox.utils.make_typed_key) +- [make_hash_key function](api/utils.md#cachebox.utils.make_hash_key) + + +### Callbacks on Cache Events +The `@cached` decorator supports callback on every hit/miss, using `callback` parameter. + +```python hl_lines="3 4 5 6 7 11" +import cachebox + +def on_cache_event(event: int, key, value): + if event == cachebox.EVENT_MISS: + print(f"MISS key={key}") + elif event == cachebox.EVENT_HIT: + print(f"HIT key={key}") + +@cachebox.cached( + cachebox.LRUCache(0), + callback=on_cache_event, +) +def add(a, b): + return a + b + +add(1, 2) # MISS key=(1, 2) +add(1, 2) # HIT key=(1, 2) +``` + +!!! tip + + `callback`s can be a coroutine in async contexts. + + +### Setting a Postprocessor +The `@cached` decorator also supports postprocessors, using `postprocess` parameter. +It can be used as a transformer which applied before returning a result to the caller. + +There are 3 ready-to-use key maker functions, and by default the `@cached` decorator uses +[`postprocess_copy_mutables` function](api/utils.md#cachebox.utils.postprocess_copy_mutables). + +```python hl_lines="3 4 5 9" +import cachebox + +def postprocess(result): + print(f"RESULT: {result}") + return result + +@cachebox.cached( + cachebox.LRUCache(0), + postprocess=postprocess, +) +def add(a, b): + return a + b + +add(1, 2) # RESULT: 3 +``` + +Ready to use postprocessors: + +- [postprocess_copy function](api/utils.md#cachebox.utils.postprocess_copy) +- [postprocess_copy_mutables function](api/utils.md#cachebox.utils.postprocess_copy_mutables) +- [postprocess_deepcopy function](api/utils.md#cachebox.utils.postprocess_deepcopy) +- [postprocess_deepcopy_mutables function](api/utils.md#cachebox.utils.postprocess_deepcopy_mutables) + +### Bypass the Cache for a Call +Sometimes you need to execute the wrapped function without reading from or writing to the cache. +Pass `cachebox__ignore=True` when calling the function: + +```python +import cachebox + +@cachebox.cached(cachebox.LRUCache(128)) +def add(a, b): + print("computing...") + return a + b + +add(1, 2) # computing... +add(1, 2) # returned from cache + +add(1, 2, cachebox__ignore=True) +# computing... +``` + +This affects only the current call. Future calls continue to use the cache normally. + +### Caching Methods + +For instance methods, each object often needs its own cache. The cache can be stored on the instance and provided dynamically using a callable. + +```python hl_lines="6 8" +import cachebox + +class MyService: + def __init__(self, multiplier: int): + self.multiplier = multiplier + self._cache = cachebox.TTLCache(20, 10) + + @cachebox.cached(lambda self: self._cache) + def compute(self, char: str): + return char * self.multiplier + +svc = MyService(5) + +assert svc.compute("a") == "aaaaa" +assert svc.compute("a") == "aaaaa" # cached +``` + +Using a cache stored on the instance ensures that each object maintains its own cached values: + +```python +svc1 = MyService(2) +svc2 = MyService(5) + +assert svc1.compute("x") == "xx" +assert svc2.compute("x") == "xxxxx" +``` + +Because each instance has a separate cache, entries created by `svc1` are not visible to `svc2`. + +### Caching `@staticmethod`s +`@staticmethod`s behave like normal functions attached to a class. Since they do not receive `self` or `cls`, you can provide a cache instance directly. + +```python +import cachebox + +class TextUtils: + @staticmethod + @cachebox.cached(cachebox.LRUCache(128)) + def normalize(text: str) -> str: + print("normalizing...") + return text.strip().lower() + +TextUtils.normalize(" Hello ") +TextUtils.normalize(" Hello ") # cached +``` + +The cache is shared by all callers because the method does not belong to a specific instance. + +### Caching `@classmethod`s +`@classmethod`s receive the class (`cls`) as their first argument. +The cache can be shared across the class or selected dynamically based on the class. + +```python +import cachebox + +class UserRepository: + _cache = cachebox.LRUCache(128) + + @classmethod + @cachebox.cached(lambda cls: cls._cache) + def get_user(cls, user_id: int): + print("loading user...") + return {"id": user_id} + +UserRepository.get_user(1) +UserRepository.get_user(1) # cached +``` + +This pattern is useful when the cache should be associated with the class itself rather than with +individual instances. +Class methods can also be used with inheritance. Each subclass may provide its own cache: + +```python +import cachebox + +class BaseRepository: + _cache = cachebox.LRUCache(128) + + @classmethod + @cachebox.cached(lambda cls: cls._cache) + def get_item(cls, item_id): + return f"{cls.__name__}:{item_id}" + +class ProductRepository(BaseRepository): + _cache = cachebox.LRUCache(128) + +class OrderRepository(BaseRepository): + _cache = cachebox.LRUCache(128) +``` + +In this example, each repository class maintains an independent cache while reusing +the same cached method implementation. + +## Using a Cache Implemetations +You can use all cache implementations without `@cached` method. +You only need to import the classes you want and can work with them like a regular dictionaries +(except for [`VTTLCache`](api/impls.md#cachebox._cachebox.VTTLCache), this have some differences). + +```python +from cachebox import FIFOCache + +cache = FIFOCache(maxsize=128) +cache["key"] = "value" +assert cache["key"] == "value" +assert cache.get("missing", "default") == "default" +``` + +You can see examples of each cache implementation in [API Reference](api/impls.md). Also these examples are exist in their docstrings. + +## Immutable (Frozen) Cache + +Wrap any cache with `Frozen` to prevent further writes: + +```python +from cachebox import Frozen, LRUCache + +cache = LRUCache(10, {1: "a", 2: "b"}) +frozen = Frozen(cache, ignore=False) + +frozen[3] = "c" # TypeError: This cache is frozen. +``` + +## Saving a Cache to Disk + +Use Python's `pickle` module: + +```python +import cachebox, pickle + +cache = cachebox.LRUCache(100, {i: i for i in range(50)}) + +with open("cache.pkl", "wb") as f: + pickle.dump(cache, f) + +with open("cache.pkl", "rb") as f: + loaded = pickle.load(f) + +assert cache == loaded +``` + +## Next Steps + +- Browse the full [API Reference](api/index.md) for every class and method. +- Check [Tips & Notes](tips.md) for copying caches and advanced patterns. +- Read the [Migration Guide](migration.md) if upgrading from v5. diff --git a/docs/docs/index.md b/docs/docs/index.md new file mode 100644 index 0000000..6f0840a --- /dev/null +++ b/docs/docs/index.md @@ -0,0 +1,76 @@ +--- +title: Cachebox +description: The fastest caching Python library written in Rust +--- + +
+

Cachebox

+ The fastest caching Python library written in Rust +
+ +--- + +Cachebox lets you perform powerful caching operations in Python as fast as possible. +It can make your application significantly faster and is an excellent choice for complex, +high-scale applications. + +## Key Features + +
+ +- :rocket: **Extremely Fast** + + 10–50x faster than other caching libraries - [see benchmarks](https://github.com/awolverp/cachebox-benchmark). + +- :bar_chart: **Low Memory Usage** + + Only ~50% of the memory consumed by a standard Python dictionary. + +- :thread: **Thread-Safe** + + All cache operations are fully thread-safe via internal locking. + +- :package: **Zero Dependencies** + + Written entirely in Rust - no Python dependencies to install. + +- :fire: **Full-Featured** + + 7 caching algorithms, TTL support, decorators, callbacks, and more. + +- :handshake: **Compatible** + + Works with Python 3.10+ on both CPython and PyPy. + +
+ +## When Should I Use Caching? +- **Frequent Data Access**: If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. + +- **Expensive Operations**: If you have operations that are computationally expensive, caching can help reduce the number of times these operations need to be performed. + +- **High Traffic Scenarios**: If your application handles high traffic, caching can help reduce the load on your server by reducing the number of requests that need to be processed. + +- **Web Page Rendering**: If you are rendering web pages, caching can help reduce the time it takes to generate the page by caching the results of expensive rendering operations. Caching HTML pages can speed up the delivery of static content. + +- **Rate Limiting**: If you have a rate limiting system in place, caching can help reduce the number of requests that need to be processed by the rate limiter. Also, caching can help you to manage rate limits imposed by third-party APIs by reducing the number of requests sent. + +- **Machine Learning Models**: If your application frequently makes predictions using the same input data, caching the results can save computation time. + + +## Quick Example + +```python +import cachebox + +@cachebox.cached(cachebox.LRUCache(maxsize=128)) +def get_user(user_id: int) -> dict: + # Expensive DB call - cached after first call + return db.query("SELECT * FROM users WHERE id = ?", user_id) + +# First call hits the database +user = get_user(42) + +# Subsequent calls are served from cache instantly +user = get_user(42) +``` diff --git a/docs/docs/installation.md b/docs/docs/installation.md new file mode 100644 index 0000000..313ec94 --- /dev/null +++ b/docs/docs/installation.md @@ -0,0 +1,32 @@ +**cachebox** is available on PyPI. You can use *pip* or *uv* to install cachebox. +You can install MarkupEver using **pip**: + +=== "Using pip" + + ```console + $ pip install -U cachebox + ``` + +=== "Using uv" + + ```console + $ uv add cachebox + ``` + +That's it - cachebox has **zero Python dependencies**. The Rust extension is distributed as a +pre-built wheel for all major platforms and Python versions. + +!!! tip "Use Virtual Environments" + + It's recommended to use virtual environments for installing and managing libraries in Python. + +!!! warning "Upgrading from v5 to v6" + Version 6 introduces several breaking changes. Please review the + [Migration Guide](migration.md) before upgrading. + +## Verifying the Installation + +```python +import cachebox +print(cachebox.__version__) +``` diff --git a/docs/docs/migration.md b/docs/docs/migration.md new file mode 100644 index 0000000..9c2f610 --- /dev/null +++ b/docs/docs/migration.md @@ -0,0 +1,140 @@ +# Migration Guide + +This page documents breaking changes between major versions. + +## v5 → v6 +These are changes that are not compatible with the previous version: + +### `copy_level` parameter has been deprecated in `@cached` +The `copy_level` parameter has been marked as deprecated and no longer has any effect. +The new `postprocess` feature gives you more control over results. + +```python +# v5 +@cachebox.cached(cachebox.RRCache(10), copy_level=2) +def add(a: int, b: int) -> dict: + return {a: b} + +# v6 +@cachebox.cached(cachebox.RRCache(10), postprocess=cachebox.postprocess_copy) +def add(a: int, b: int) -> dict: + return {a: b} +``` + +### `TTLCache.ttl` has been renamed to `TTLCache.global_ttl` +`TTLCache.ttl` has been renamed to `TTLCache.global_ttl` because it was causing developers to confuse the usage of +`TTLCache.ttl` with `VTTLCache`'s `ttl` parameter. + +```python +# v5 +cache = cachebox.TTLCache(maxsize=125, ttl=10) +print(cache.ttl) + +# v6 +cache = cachebox.TTLCache(maxsize=125, global_ttl=10) +print(cache.global_ttl) +``` + +### Maxmemory limit has been removed +In version 5, we could limit the cache classes by memory using the `maxmemory` parameter. +But it caused a -75% performance regression, and that was not the library's target. Our focus is on performance & speed. +So we removed it, but added a new parameter: `getsizeof`. A callable that computes the size of a key-value pair. +Now you can use this to implement weighted caching - for example, sizing entries by memory footprint or byte length. +This could cover `maxmemory`, while keeping performance on top. + +```python +# v5 +cache = cachebox.LRUCache(maxsize=125, maxmemory=1000) + +# v6 +import sys + +def getsizeof(key, val): + return sys.getsizeof(key) + sys.getsizeof(val) + +cache = cachebox.LRUCache(maxsize=1000, getsizeof=getsizeof) +``` + +Due to this breaking change, we also removed the `memory` property from cache classes, and +added new methods: `current_size` and `remaining_size`. + +```python +# v5 +print(cache.memory) + +# v6 +print(cache.current_size()) +print(cache.remaining_size()) +``` + +### `cachedmethod` has been removed +`cachedmethod` was deprecated in v5.1.0 and has been fully removed in v6. Use `cached` with a `lambda self:` cache accessor instead: + +```python +# v5 +@cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) +def my_method(self, name: str): ... + +# v6 +@cachebox.cached(lambda self: self._cache) +def my_method(self, name: str): ... +``` + +## v4 → v5 +These are changes that are not compatible with the previous version: + +### `CacheInfo.cachememory` has been renamed to `CacheInfo.memory` +```python +info = func.cache_info() + +# v4 +print(info.cachememory) + +# v5 +print(info.memory) +``` + +### `__eq__` errors are no longer silently swallowed +In v4, errors raised inside a custom `__eq__` method were caught and converted to a `KeyError`. +In v5, they propagate normally. + +```python +class A: + def __hash__(self): return 1 + def __eq__(self, other): raise NotImplementedError + +cache = cachebox.FIFOCache(0, {A(): 10}) + +# v4: raises KeyError +# v5: raises NotImplementedError +cache[A()] +``` + +### Cache comparisons are no longer order-dependent +In v4, two caches with the same keys/values in a different insertion order were considered unequal. +In v5, cache equality follows standard dictionary semantics. + +```python +c1 = cachebox.FIFOCache(10) +c2 = cachebox.FIFOCache(10) + +c1.insert(1, 'a'); c1.insert(2, 'b') +c2.insert(2, 'b'); c2.insert(1, 'a') + +# v4: False (order-dependent) +# v5: True (dict-like) +print(c1 == c2) +``` + +### `cachedmethod` deprecated +`cachedmethod` is deprecated since v5.1.0. Use `cached` with a `lambda self:` cache accessor: + +```python +# Before (v4) +@cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) +def my_method(self, name: str): ... + +# After (v5.1.0+) +@cachebox.cached(lambda self: self._cache) +def my_method(self, name: str): ... +``` diff --git a/docs/docs/tips.md b/docs/docs/tips.md new file mode 100644 index 0000000..44dfcd3 --- /dev/null +++ b/docs/docs/tips.md @@ -0,0 +1,238 @@ +# Tips & Notes + +## Saving a Cache to a File + +Cachebox does not include built-in persistence, but all cache classes support Python's +`pickle` module: + +```python +import cachebox, pickle + +cache = cachebox.LRUCache(100, {i: i for i in range(78)}) + +# Save +with open("cache.pkl", "wb") as f: + pickle.dump(cache, f) + +# Load +with open("cache.pkl", "rb") as f: + loaded = pickle.load(f) + +assert cache == loaded +assert cache.capacity() == loaded.capacity() +``` + +!!! note + + Don't set `lambda` as `getsizeof` for caches when you want to pickle them. + +## Copying a Cache +All cache classes support Python's `copy` module, both shallow-copy and deep-copy: + +```python +import cachebox +import copy + +cache = cachebox.LRUCache(100, {i: i for i in range(10)}) + +shallow = copy.copy(cache) # shallow copy +deep = copy.deepcopy(cache) # deep copy +``` + +## Pre-allocating Capacity +If you know roughly how many items a cache will hold, set `capacity` to avoid +hash table rehashing during initial population: + +```python +cache = cachebox.LRUCache(maxsize=10_000, capacity=10_000) +``` + +## Thread Safety +All cache operations (reads, writes, eviction) are protected by internal Rust mutexes. +You do **not** need to add external synchronisation. + +## TTL and Frozen Caches +`Frozen` cannot prevent TTL expiration in `TTLCache` or `VTTLCache`. +Items will still expire naturally even when the cache is frozen. + +```python +from cachebox import Frozen, TTLCache +import time + +cache = TTLCache(0, ttl=1, iterable={1: "a"}) +frozen = Frozen(cache) +time.sleep(1) +print(len(frozen)) # 0 — expired despite being frozen +``` + +## Attached attributes to cached functions +When you use the `@cached` decorator, If *cache* isn't a lambda/function, these attributes will be attached to +your function: + +=== "`cache` (property)" + + The cache class we're using for caching results. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + assert type(add.cache) is cachebox.LFUCache + ``` + + !!! tip + You can use [get_cached_cache function](api/utils.md#cachebox.utils.get_cached_cache) to prevent lint + & IDE warnings. + + ```python + assert type(cachebox.get_cached_cache(add)) is cachebox.LFUCache + ``` + +=== "`cache_info` (callable)" + + By calling it, you will get a basic statistics. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + cache_info = add.cache_info() # CacheInfo(hits=0, misses=0, maxsize=20, size=0) + ``` + + !!! tip + You can use [get_cached_cache_info function](api/utils.md#cachebox.utils.get_cached_cache_info) to prevent lint + & IDE warnings. + + ```python + cache_info = cachebox.get_cached_cache_info(add) # CacheInfo(hits=0, misses=0, maxsize=20, size=0) + ``` + +=== "`cache_clear` (callable)" + + Call it if you want to clear cache and reset statistics. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + add.cache_clear() + ``` + + !!! tip + You can use [clear_cached_cache function](api/utils.md#cachebox.utils.clear_cached_cache) to prevent lint + & IDE warnings. + + ```python + cachebox.clear_cached_cache(add) + ``` + +=== "`callback` (property)" + + The configured `callback`. + + ```python hl_lines="12" + import cachebox + + def callback(event, key, value): ... + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + callback=callback, + ) + def add(a: int, b: int) -> int: + return a + b + + assert add.callback is callback + ``` + + !!! tip + You can use [get_cached_callback function](api/utils.md#cachebox.utils.get_cached_callback) to prevent lint + & IDE warnings. + + ```python + assert cachebox.get_cached_callback(add) is callback + ``` + + +## TTLCache/VTTLCache background thread +By default, both `TTLCache` and `VTTLCache` use **lazy expiry**: stale entries are +only cleaned up when the cache is interacted with (e.g. on insert, lookup, or +iteration). A completely idle cache will hold expired entries in memory until +the next interaction. + +To reclaim expired entries proactively — independent of any method calls — pass a +`sweep_interval` to start a background sweeper thread: + +```python +import cachebox +from datetime import timedelta + +# Sweep every 30 seconds +ttl_cache = cachebox.TTLCache(maxsize=1000, global_ttl=60, sweep_interval=30) + +# timedelta is also accepted +vttl_cache = cachebox.VTTLCache(maxsize=1000, sweep_interval=timedelta(seconds=30)) +``` + +The thread is a **daemon thread**, meaning it will not prevent the Python process +from exiting when the main thread finishes. + +!!! note + + `sweep_interval` must be **≥ 1 second**. Smaller values raise a `ValueError`: + + ```python + cachebox.TTLCache(100, global_ttl=5, sweep_interval=0.5) + # ValueError: sweep_interval must be more than 1 seconds. + ``` + +```python +cache = cachebox.TTLCache(100, global_ttl=60, sweep_interval=30) +print(cache.sweep_interval) # 30.0 + +# Without a sweeper, sweep_interval is None +cache2 = cachebox.TTLCache(100, global_ttl=60) +print(cache2.sweep_interval) # None +``` + +Call `stop_sweeper()` when you want to halt background sweeping without +destroying the cache itself. This is useful when you need to pause periodic +eviction or cleanly shut down the thread before the cache goes out of scope: + +```python +cache = cachebox.TTLCache(100, global_ttl=60, sweep_interval=10) + +# ... later, during shutdown ... +cache.stop_sweeper() +``` + +!!! note + + The sweeper thread is also stopped automatically when the cache is garbage + collected (via `__del__`), so manual cleanup is only necessary when explicit + lifecycle control is required. + +Use a **sweeper** when: +- The cache may be idle for long periods but memory should still be reclaimed. +- You need to bound the window in which stale data could be observed (e.g. via `items()` or `__iter__`). +- You are using `VTTLCache` with short, heterogeneous TTLs and want predictable cleanup. + +Stick with **lazy expiry** when: +- The cache sees regular traffic and on-access cleanup is sufficient. +- You want to avoid any background thread overhead. +- Memory pressure from temporarily lingering stale entries is acceptable. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 0000000..2e4f795 --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,95 @@ +site_name: Cachebox +site_description: The fastest caching Python library written in Rust +site_url: https://awolverp.github.io/cachebox +repo_url: https://github.com/awolverp/cachebox +repo_name: awolverp/cachebox +edit_uri: edit/main/docs/ + +theme: + name: material + palette: + - scheme: default + primary: deep orange + accent: orange + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: deep orange + accent: orange + toggle: + icon: material/brightness-4 + name: Switch to light mode + + features: + - table + - navigation.instant + - navigation.instant.progress + - navigation.tabs + - navigation.prune + - toc.integrate + - search + - search.suggest + - search.share + - projects + - optimize + - content.code.copy + - content.code.select + - content.code.annotate + - navigation.footer + + icon: + repo: fontawesome/brands/github + +plugins: + - search + - mkdocstrings: + handlers: + python: + paths: [cachebox] + options: + docstring_style: google + docstring_section_style: list + signature_crossrefs: true + inherited_members: true + parameter_headings: true + type_parameter_headings: true + show_root_heading: true + show_root_full_path: false + show_symbol_type_heading: true + show_symbol_type_toc: true + merge_init_into_class: true + show_signature_annotations: true + show_signature_type_parameters: true + show_bases: false + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.tabbed: + alternate_style: true + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - tables + - attr_list + - md_in_html + - toc: + permalink: true + +nav: + - Home: index.md + - Installation: installation.md + - Getting Started: getting-started.md + - Tips & Notes: tips.md + - Migration Guide: migration.md + - API Reference: + - API Reference: api/index.md + - Classes: api/impls.md + - Utilities: api/utils.md diff --git a/pyproject.toml b/pyproject.toml index d2b779e..0b3db93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -43,11 +42,7 @@ Homepage = 'https://github.com/awolverp/cachebox' [project.optional-dependencies] -[tool.pytest.ini_options] -asyncio_default_fixture_loop_scope = "function" - [tool.maturin] -python-source = "python" features = ["pyo3/extension-module"] module-name = "cachebox._core" diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py deleted file mode 100644 index 3438d0c..0000000 --- a/python/cachebox/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -from ._core import ( - __author__ as __author__, - __version__ as __version__, -) -from ._cachebox import ( - BaseCacheImpl as BaseCacheImpl, - Cache as Cache, - FIFOCache as FIFOCache, - RRCache as RRCache, - LRUCache as LRUCache, - LFUCache as LFUCache, - TTLCache as TTLCache, - VTTLCache as VTTLCache, - IteratorView as IteratorView, -) -from .utils import ( - Frozen as Frozen, - cached as cached, - cachedmethod as cachedmethod, - make_key as make_key, - make_hash_key as make_hash_key, - make_typed_key as make_typed_key, - EVENT_HIT as EVENT_HIT, - EVENT_MISS as EVENT_MISS, - is_cached as is_cached, -) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py deleted file mode 100644 index fb47917..0000000 --- a/python/cachebox/_cachebox.py +++ /dev/null @@ -1,2198 +0,0 @@ -import copy as _std_copy -import typing -from datetime import datetime, timedelta - -from . import _core -from ._core import BaseCacheImpl - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - - -def _items_to_str(items: typing.Iterable[typing.Any], length) -> str: - if length <= 50: - return "{" + ", ".join(f"{k!r}: {v!r}" for k, v in items) + "}" - - c = 0 - left = [] - - while c < length: - k, v = next(items) # type: ignore[call-overload] - - if c <= 50: - left.append(f"{k!r}: {v!r}") - - else: - break - - c += 1 - - return "{%s, ... %d more ...}" % (", ".join(left), length - c) - - -class IteratorView(typing.Generic[VT]): - __slots__ = ("iterator", "func") - - def __init__(self, iterator, func: typing.Callable[[tuple], typing.Any]): - self.iterator = iterator - self.func = func - - def __iter__(self): - self.iterator = self.iterator.__iter__() - return self - - def __next__(self) -> VT: - return self.func(self.iterator.__next__()) - - -class Cache(BaseCacheImpl[KT, VT]): - """ - A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. - - Provides a flexible key-value storage mechanism with: - - Configurable maximum size (zero means unlimited) - - Lower memory usage compared to standard dict - - Thread-safe operations - - Useful memory management methods - - Differs from standard dict by: - - Being thread-safe - - Unordered storage - - Size limitation - - Memory efficiency - - Additional cache management methods - - Supports initialization with optional initial data and capacity, - and provides dictionary-like access with additional cache-specific operations. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[dict, typing.Iterable[tuple], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new Cache instance. - - Args: - maxsize (int): Maximum number of elements the cache can hold. Zero means unlimited. - iterable (Union[Cache, dict, tuple, Generator, None], optional): Initial data to populate the cache. Defaults to None. - capacity (int, optional): Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy, it works same as `maxsize` if objects do not support `__sizeof__` - method. - - Creates a new cache with specified size constraints and optional initial data. The cache can be pre-sized - to improve performance when the number of expected elements is known in advance. - """ - self._raw = _core.Cache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - - Note: raises `OverflowError` if the cache reached the maxsize limit, - because this class does not have any algorithm. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. Return the value for key if key is - in the cache, else `default`. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.NoReturn: # pragma: no cover - raise NotImplementedError() - - def drain(self, n: int) -> typing.NoReturn: # pragma: no cover - raise NotImplementedError() - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - - Note: raises `OverflowError` if the cache reached the maxsize limit. - """ - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, Cache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, Cache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def copy(self) -> "Cache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "Cache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "Cache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class FIFOCache(BaseCacheImpl[KT, VT]): - """ - A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. - - This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. - Supports various operations like insertion, retrieval, deletion, and iteration. - - Attributes: - maxsize: The maximum number of items the cache can hold. - capacity: The initial capacity of the cache before resizing. - - Key features: - - Deterministic item eviction order (oldest items removed first) - - Efficient key-value storage and retrieval - - Supports dictionary-like operations - - Allows optional initial data population - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new FIFOCache instance. - - Args: - maxsize: The maximum number of items the cache can hold. - iterable: Optional initial data to populate the cache. Can be another FIFOCache, - a dictionary, tuple, generator, or None. - capacity: Optional initial capacity of the cache before resizing. Defaults to 0. - maxmemory: Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - When maxmemory is set, updating an existing key can evict the updated key - if it is the oldest entry. - """ - self._raw = _core.FIFOCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Removes the element that has been in the cache the longest.""" - try: - return self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, FIFOCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, FIFOCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def first(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.get_index(n) - - def last(self) -> typing.Optional[KT]: - """ - Returns the last key in cache. Equals to `self.first(-1)`. - """ - return self._raw.get_index(len(self._raw) - 1) - - def copy(self) -> "FIFOCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "FIFOCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "FIFOCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class RRCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe cache implementation with Random Replacement (RR) policy. - - This cache randomly selects and removes elements when the cache reaches its maximum size, - ensuring a simple and efficient caching mechanism with configurable capacity. - - Supports operations like insertion, retrieval, deletion, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new RRCache instance. - - Args: - maxsize (int): Maximum size of the cache. A value of zero means unlimited capacity. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. Defaults to None. - capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - When maxmemory is set, updates can evict any key, including the updated key. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Note: - - The cache size limit is immutable after initialization. - - If an iterable is provided, the cache will be populated using the update method. - """ - self._raw = _core.RRCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Randomly selects and removes a (key, value) pair from the cache.""" - try: - return self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def random_key(self) -> KT: - """ - Randomly selects and returns a key from the cache. - Raises `KeyError` If the cache is empty. - """ - try: - return self._raw.random_key() - except _core.CoreKeyError: - raise KeyError() from None - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, RRCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, RRCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def copy(self) -> "RRCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "RRCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "RRCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class LRUCache(BaseCacheImpl[KT, VT]): - """ - Thread-safe Least Recently Used (LRU) cache implementation. - - Provides a cache that automatically removes the least recently used items when - the cache reaches its maximum size. Supports various operations like insertion, - retrieval, and management of cached items with configurable maximum size and - initial capacity. - - Key features: - - Configurable maximum cache size - - Optional initial capacity allocation - - Thread-safe operations - - Efficient key-value pair management - - Supports initialization from dictionaries or iterables - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new LRU Cache instance. - - Args: - maxsize (int): Maximum size of the cache. Zero indicates unlimited size. - iterable (dict | Iterable[tuple], optional): Initial data to populate the cache. - capacity (int, optional): Pre-allocated capacity for the cache to minimize reallocations. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Notes: - - The cache size is immutable after initialization. - - If an iterable is provided, it will be used to populate the cache. - """ - self._raw = _core.LRUCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Searches for a key-value in the cache and returns it (without moving the key to recently used). - """ - try: - return self._raw.peek(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the least recently used item from the cache and returns it as a (key, value) tuple. - Raises KeyError if the cache is empty. - """ - try: - return self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, LRUCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, LRUCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def least_recently_used(self) -> typing.Optional[KT]: - """ - Returns the key in the cache that has not been accessed in the longest time. - """ - return self._raw.least_recently_used() - - def most_recently_used(self) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed in the shortest time. - """ - return self._raw.most_recently_used() - - def copy(self) -> "LRUCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "LRUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "LRUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class LFUCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe Least Frequently Used (LFU) cache implementation. - - This cache removes elements that have been accessed the least number of times, - regardless of their access time. It provides methods for inserting, retrieving, - and managing cache entries with configurable maximum size and initial capacity. - - Key features: - - Thread-safe cache with LFU eviction policy - - Configurable maximum size and initial capacity - - Supports initialization from dictionaries or iterables - - Provides methods for key-value management similar to dict - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new Least Frequently Used (LFU) cache. - - Args: - maxsize (int): Maximum size of the cache. A value of zero means unlimited size. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. - capacity (int, optional): Initial hash table capacity to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - The cache uses a thread-safe LFU eviction policy, removing least frequently accessed items when the cache reaches its maximum size. - """ - self._raw = _core.LFUCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def peek( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Union[VT, DT]: # pragma: no cover - """ - Searches for a key-value in the cache and returns it (without moving the key to recently used). - """ - try: - return self._raw.peek(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes and returns the least frequently used (LFU) item from the cache. - """ - try: - return self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, LFUCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, LFUCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: (x[0], x[1])) - - def items_with_frequency(self) -> IteratorView[typing.Tuple[KT, VT, int]]: - """ - Returns an iterable view - containing tuples of `(key, value, frequency)` - of the cache's items along with their access frequency. - - Notes: - - The returned iterator should not be used to modify the cache. - - Frequency represents how many times the item has been accessed. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed the least, regardless of time. - - If n is given, returns the nth least frequently used key. - - Notes: - - This method may re-sort the cache which can cause iterators to be stopped. - - Do not use this method while using iterators. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.least_frequently_used(n) - - def copy(self) -> "LFUCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "LFUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "LFUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - # NOTE: we cannot use self._raw.items() here because iterables a tuples of (key, value, frequency) - _items_to_str(self.items(), len(self._raw)), - ) - - -class TTLCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. - - This cache automatically removes elements that have expired based on their time-to-live setting. - Supports various operations like insertion, retrieval, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - ttl: typing.Union[float, timedelta], - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new TTL cache instance. - - Args: - maxsize: Maximum number of elements the cache can hold. - ttl: Time-to-live for cache entries, either as seconds or a timedelta. - iterable: Optional initial items to populate the cache, can be a dict or iterable of tuples. - capacity: Optional initial capacity for the underlying cache storage. Defaults to 0. - maxmemory: Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Raises: - ValueError: If the time-to-live (ttl) is not a positive number. - """ - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be a positive number and non-zero") - - self._raw = _core.TTLCache(maxsize, ttl, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - @property - def ttl(self) -> float: - return self._raw.ttl() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Retrieves the value and expiration duration for a given key from the cache. - - Returns a tuple containing the value associated with the key and its duration. - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.get(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Removes the specified key from the cache and returns its value and expiration duration. - - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to remove from the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.remove(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Removes the element that has been in the cache the longest.""" - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack2() - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Removes and returns the element that has been in the cache the longest, along with its key and expiration duration. - - If the cache is empty, raises a KeyError. - - Returns: - A tuple of (key, value, duration), where: - - key is the key of the removed item - - value is the value of the removed item - - duration is the time-to-live for the removed item - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack3() - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, TTLCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, TTLCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: - """ - Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack3()) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack2()) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.key()) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.value()) - - def first(self, n: int = 0) -> typing.Optional[KT]: # pragma: no cover - """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.get_index(n) - - def last(self) -> typing.Optional[KT]: - """ - Returns the last key in cache. Equals to `self.first(-1)`. - """ - return self._raw.get_index(len(self._raw) - 1) - - def expire(self) -> None: # pragma: no cover - """ - Manually removes expired key-value pairs from memory and releases their memory. - - Notes: - - This operation is typically automatic and does not require manual invocation. - """ - self._raw.expire() - - def copy(self) -> "TTLCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "TTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "TTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d, ttl=%f](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - self._raw.ttl(), - _items_to_str(self.items(), len(self._raw)), - ) - - -class VTTLCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. - - This cache allows storing key-value pairs with optional expiration times. When an item expires, - it is automatically removed from the cache. The cache supports a maximum size and provides - various methods for inserting, retrieving, and managing cached items. - - Key features: - - Per-key time-to-live (TTL) support - - Configurable maximum cache size - - Thread-safe operations - - Automatic expiration of items - - Supports dictionary-like operations such as get, insert, update, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - ttl: typing.Union[float, timedelta, datetime, None] = None, # This is not a global TTL! - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new VTTLCache instance. - - Args: - maxsize (int): Maximum size of the cache. Zero indicates unlimited size. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. - ttl (float or timedelta or datetime, optional): Time-to-live duration for `iterable` items. - capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Raises: - ValueError: If provided TTL is zero or negative. - """ - self._raw = _core.VTTLCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable, ttl) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert( - self, - key: KT, - value: VT, - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> typing.Optional[VT]: - """ - Insert a key-value pair into the cache with an optional time-to-live (TTL). - Returns the previous value associated with the key, if it existed. - - Args: - key (KT): The key to insert. - value (VT): The value to associate with the key. - ttl (float or timedelta or datetime, optional): Time-to-live duration for the item. - If a timedelta or datetime is provided, it will be converted to seconds. - - Raises: - ValueError: If the provided TTL is zero or negative. - """ - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - return self._raw.insert(key, value, ttl) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Retrieves the value and expiration duration for a given key from the cache. - - Returns a tuple containing the value associated with the key and its duration. - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.get(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Removes the specified key from the cache and returns its value and expiration duration. - - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to remove from the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.remove(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def setdefault( - self, - key: KT, - default: typing.Optional[DT] = None, - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> typing.Union[VT, DT]: - """ - Inserts a key-value pair into the cache with an optional time-to-live (TTL). - - If the key is not in the cache, it will be inserted with the default value. - If the key already exists, its current value is returned. - - Args: - key: The key to insert or retrieve from the cache. - default: The value to insert if the key is not present. Defaults to None. - ttl: Optional time-to-live for the key. Can be a float (seconds), timedelta, or datetime. - If not specified, the key will not expire. - - Returns: - The value associated with the key, either existing or the default value. - - Raises: - ValueError: If the provided TTL is not a positive value. - """ - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - return self._raw.setdefault(key, default, ttl) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes and returns the key-value pair that is closest to expiration. - - Returns: - A tuple containing the key and value of the removed item. - - Raises: - KeyError: If the cache is empty. - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - else: - return val.pack2() - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Removes and returns the key-value pair that is closest to expiration, along with its expiration duration. - - Returns: - A tuple containing the key, value, and expiration duration of the removed item. - - Raises: - KeyError: If the cache is empty. - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack3() - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update( - self, - iterable: typing.Union[dict, typing.Iterable[tuple]], - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - self._raw.update(iterable, ttl) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value, None) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, VTTLCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, VTTLCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: - """ - Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack3()) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack2()) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.key()) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.value()) - - def expire(self) -> None: # pragma: no cover - """ - Manually removes expired key-value pairs from memory and releases their memory. - - Notes: - - This operation is typically automatic and does not require manual invocation. - """ - self._raw.expire() - - def copy(self) -> "VTTLCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "VTTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "VTTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self.items(), len(self._raw)), - ) diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi deleted file mode 100644 index b246b0e..0000000 --- a/python/cachebox/_core.pyi +++ /dev/null @@ -1,83 +0,0 @@ -import typing - -__version__: str -__author__: str - -class CoreKeyError(Exception): - """ - An exception when a key is not found in a cache. - This exception is internal to the library core and won't affect you. - """ - - ... - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - -class BaseCacheImpl(typing.Generic[KT, VT]): - """ - Base implementation for cache classes in the cachebox library. - - This abstract base class defines the generic structure for cache implementations, - supporting different key and value types through generic type parameters. - Serves as a foundation for specific cache variants like Cache and FIFOCache. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - maxmemory: int = ..., - ) -> None: ... - @staticmethod - def __class_getitem__(*args: typing.Any) -> None: ... - @property - def maxsize(self) -> int: ... - @property - def maxmemory(self) -> int: ... - def __len__(self) -> int: ... - def __sizeof__(self) -> int: ... - def __bool__(self) -> bool: ... - def __contains__(self, key: KT) -> bool: ... - def __setitem__(self, key: KT, value: VT) -> None: ... - def __getitem__(self, key: KT) -> VT: ... - def __delitem__(self, key: KT) -> None: ... - def __str__(self) -> str: ... - def __iter__(self) -> typing.Iterator[KT]: ... - def __eq__(self, other: typing.Any) -> bool: ... - def __ne__(self, other: typing.Any) -> bool: ... - def capacity(self) -> int: ... - def memory(self) -> int: ... - def is_full(self) -> bool: ... - def is_empty(self) -> bool: ... - def insert( - self, key: KT, value: VT, *args: typing.Any, **kwargs: typing.Any - ) -> typing.Optional[VT]: ... - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: ... - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: ... - def setdefault( - self, - key: KT, - default: typing.Optional[DT] = None, - *args: typing.Any, - **kwargs: typing.Any, - ) -> typing.Optional[VT | DT]: ... - def popitem(self) -> typing.Tuple[KT, VT]: ... - def drain(self, n: int) -> int: ... - def clear(self, *, reuse: bool = False) -> None: ... - def shrink_to_fit(self) -> None: ... - def update( - self, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], - *args: typing.Any, - **kwargs: typing.Any, - ) -> None: ... - def keys(self) -> typing.Iterable[KT]: ... - def values(self) -> typing.Iterable[VT]: ... - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... - def __copy__(self) -> "BaseCacheImpl[KT, VT]": ... - def __deepcopy__(self, memo: typing.Dict[str, object]) -> "BaseCacheImpl[KT, VT]": ... - def copy(self) -> "BaseCacheImpl[KT, VT]": ... diff --git a/python/cachebox/py.typed b/python/cachebox/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py deleted file mode 100644 index b520d28..0000000 --- a/python/cachebox/utils.py +++ /dev/null @@ -1,599 +0,0 @@ -import _thread -import asyncio -import functools -import inspect -import typing -from collections import defaultdict, namedtuple - -from ._cachebox import BaseCacheImpl, FIFOCache - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") -FT = typing.TypeVar("FT", bound=typing.Callable[..., typing.Any]) - - -class Frozen(BaseCacheImpl[KT, VT]): # pragma: no cover - """ - A wrapper class that prevents modifications to an underlying cache implementation. - - This class provides a read-only view of a cache, optionally allowing silent - suppression of modification attempts instead of raising exceptions. - """ - - __slots__ = ("__cache", "ignore") - - def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: - """ - Initialize a frozen cache wrapper. - - :param cls: The underlying cache implementation to be frozen - :type cls: BaseCacheImpl[KT, VT] - :param ignore: If True, silently ignores modification attempts; if False, raises TypeError when modification is attempted - :type ignore: bool, optional - """ - assert isinstance(cls, BaseCacheImpl) - assert type(cls) is not Frozen - - self.__cache = cls - self.ignore = ignore - - @property - def cache(self) -> BaseCacheImpl[KT, VT]: - return self.__cache - - @property - def maxsize(self) -> int: - return self.__cache.maxsize - - @property - def maxmemory(self) -> int: - return self.__cache.maxmemory - - def __len__(self) -> int: - return len(self.__cache) - - def __sizeof__(self) -> int: - return self.__cache.__sizeof__() - - def __bool__(self) -> bool: - return bool(self.__cache) - - def __contains__(self, key: KT) -> bool: - return key in self.__cache - - def __setitem__(self, key: KT, value: VT) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def __getitem__(self, key: KT) -> VT: - return self.__cache[key] - - def __delitem__(self, key: KT) -> None: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> typing.Iterator[KT]: - return iter(self.__cache) - - def __richcmp__(self, other: typing.Any, op: int) -> bool: - return self.__cache.__richcmp__(other, op) - - def capacity(self) -> int: - return self.__cache.capacity() - - def memory(self) -> int: - return self.__cache.memory() - - def is_full(self) -> bool: - return self.__cache.is_full() - - def is_empty(self) -> bool: - return self.__cache.is_empty() - - def insert(self, key: KT, value: VT, *args, **kwargs) -> typing.Optional[VT]: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - return self.__cache.get(key, default) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - if self.ignore: - return None # type: ignore[return-value] - - raise TypeError("This cache is frozen.") - - def setdefault( - self, key: KT, default: typing.Optional[DT] = None, *args, **kwargs - ) -> typing.Optional[typing.Union[VT, DT]]: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def popitem(self) -> typing.Tuple[KT, VT]: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def drain(self, n: int) -> int: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def clear(self, *, reuse: bool = False) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def shrink_to_fit(self) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def update( - self, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], - *args, - **kwargs, - ) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def keys(self) -> typing.Iterable[KT]: - return self.__cache.keys() - - def values(self) -> typing.Iterable[VT]: - return self.__cache.values() - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - return self.__cache.items() - - -class _LockWithCounter: - """ - A lock with a counter to track the number of waiters. - - This class provides a lock mechanism that supports both synchronous and asynchronous contexts, - with the ability to track the number of threads or coroutines waiting to acquire the lock. - """ - - __slots__ = ("lock", "waiters") - - def __init__(self, is_async: bool = False): - self.lock = _thread.allocate_lock() if not is_async else asyncio.Lock() - self.waiters = 0 - - async def __aenter__(self) -> None: - self.waiters += 1 - await self.lock.acquire() # type: ignore[misc] - - async def __aexit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - def __enter__(self) -> None: - self.waiters += 1 - self.lock.acquire() - - def __exit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - -def _copy_if_need(obj: VT, tocopy=(dict, list, set), level: int = 1) -> VT: - from copy import copy - - if level == 0: - return obj - - if level == 2: - return copy(obj) - - return copy(obj) if (type(obj) in tocopy) else obj - - -def make_key(args: tuple, kwds: dict, fasttype=(int, str)): - """ - Create a hashable key from function arguments for caching purposes. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - fasttype (tuple, optional): Types that can be directly used as keys. Defaults to (int, str). - - Returns: - A hashable key representing the function arguments, optimized for simple single-argument cases. - """ - key = args - if kwds: - key += (object,) - for item in kwds.items(): - key += item - - if fasttype and len(key) == 1 and type(key[0]) in fasttype: - return key[0] - - return key - - -def make_hash_key(args: tuple, kwds: dict): - """ - Create a hashable hash key from function arguments for caching purposes. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - - Returns: - int: A hash value representing the function arguments. - """ - return hash(make_key(args, kwds)) - - -def make_typed_key(args: tuple, kwds: dict): - """ - Create a hashable key from function arguments that includes type information. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - - Returns: - A hashable key representing the function arguments, including the types of the arguments. - """ - key = make_key(args, kwds, fasttype=()) - - key += tuple(type(v) for v in args) - if kwds: - key += tuple(type(v) for v in kwds.values()) - - return key - - -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "memory"]) -EVENT_MISS = 1 -EVENT_HIT = 2 - - -def _cached_wrapper( - func, - cache: typing.Union[BaseCacheImpl, typing.Callable], - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -): - is_method = cache_is_function = inspect.isfunction(cache) - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks: defaultdict[typing.Hashable, _LockWithCounter] = defaultdict(_LockWithCounter) - exceptions: typing.Dict[typing.Hashable, BaseException] = {} - - def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return func(*args, **kwds) - - _cache = cache(args[0]) if cache_is_function else cache - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = _cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - callback(EVENT_HIT, key, result) - - return _copy_if_need(result, level=copy_level) - - with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = _cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - _cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - callback(event, key, result) - - return _copy_if_need(result, level=copy_level) - - if not cache_is_function: - _wrapped.cache = cache - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.memory() - ) - - _wrapped.callback = callback - - if not cache_is_function: - - def cache_clear() -> None: - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def _async_cached_wrapper( - func, - cache: typing.Union[BaseCacheImpl, typing.Callable], - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -): - is_method = cache_is_function = inspect.isfunction(cache) - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks: defaultdict[typing.Hashable, _LockWithCounter] = defaultdict( - lambda: _LockWithCounter(True) - ) - exceptions: typing.Dict[typing.Hashable, BaseException] = {} - - async def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return await func(*args, **kwds) - - _cache = cache(args[0]) if cache_is_function else cache - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = _cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - awaitable = callback(EVENT_HIT, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - async with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = _cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = await func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - _cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - awaitable = callback(event, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - if not cache_is_function: - _wrapped.cache = cache - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.memory() - ) - - _wrapped.callback = callback - - if not cache_is_function: - - def cache_clear() -> None: - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def cached( - cache: typing.Union[BaseCacheImpl, dict, typing.Callable[..., BaseCacheImpl], None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, -) -> typing.Callable[[FT], FT]: - """ - Decorator to create a memoized cache for function results. - - Wraps a function to automatically cache and retrieve its results based on input parameters. - - Args: - cache (BaseCacheImpl, dict, callable): Cache implementation to store results. Defaults to FIFOCache. - Can be a function that got `self` and should return cache. - key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. - clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. - callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. - copy_level (int, optional): Level of result copying. Defaults to 1. - - Returns: - Callable: Decorated function with caching capabilities. - - Example for functions:: - - @cachebox.cached(cachebox.LRUCache(128)) - def sum_as_string(a, b): - return str(a+b) - - assert sum_as_string(1, 2) == "3" - - assert len(sum_as_string.cache) == 1 - sum_as_string.cache_clear() - assert len(sum_as_string.cache) == 0 - - Example for methods:: - - class A: - def __init__(self, num): - self.num = num - self._cache = cachebox.FIFOCache(0) - - @cachebox.cached(lambda self: self._cache) - def method(self, n): - return self.num * n - - instance = A(10) - assert A.method(2) == 20 - """ - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl) and not inspect.isfunction(cache): - raise TypeError("we expected cachebox caches or function, got %r" % (cache,)) - - def decorator(func: FT) -> FT: - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - - return functools.update_wrapper(wrapper, func) # type: ignore[return-value] - - return decorator - - -def cachedmethod( - cache: typing.Union[BaseCacheImpl, dict, None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, -) -> typing.Callable[[FT], FT]: - """ - **This function is deperecated due to issue [#35](https://github.com/awolverp/cachebox/issues/35)**. - Use `cached` method instead. - - Decorator to create a method-specific memoized cache for function results. - - Similar to `cached()`, but ignores `self` parameter when generating cache keys. - - Args: - cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. - key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. - clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. - callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. - copy_level (int, optional): Level of result copying. Defaults to 1. - - Returns: - Callable: Decorated method with method-specific caching capabilities. - """ - import warnings - - warnings.warn( - "cachedmethod is deprecated, use cached instead. see issue https://github.com/awolverp/cachebox/issues/35", - DeprecationWarning, - stacklevel=2, - ) - - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl): - raise TypeError("we expected cachebox caches, got %r" % (cache,)) - - def decorator(func: FT) -> FT: - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - - return functools.update_wrapper(wrapper, func) # type: ignore[return-value] - - return decorator - - -def is_cached(func: object) -> bool: - """ - Check if a function/method cached by cachebox or not - """ - return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) diff --git a/python/tests/conftest.py b/python/tests/conftest.py deleted file mode 100644 index f46c034..0000000 --- a/python/tests/conftest.py +++ /dev/null @@ -1,30 +0,0 @@ -import cachebox -import pytest -import typing - - -@pytest.fixture( - scope="function", - params=[ - cachebox.Cache, - cachebox.FIFOCache, - cachebox.LFUCache, - cachebox.LRUCache, - cachebox.TTLCache, - cachebox.RRCache, - cachebox.VTTLCache, - ], -) -def random_cache_impl(request): - typ: typing.Type[cachebox.BaseCacheImpl] = request.param - - def inner(maxsize, iterable=None): - if typ is cachebox.TTLCache: - return typ(maxsize, ttl=10, iterable=iterable) - - if typ is cachebox.VTTLCache: - return typ(maxsize, ttl=10, iterable=iterable) - - return typ(maxsize, iterable=iterable) - - return inner diff --git a/python/tests/mixin.py b/python/tests/mixin.py deleted file mode 100644 index 6afb29b..0000000 --- a/python/tests/mixin.py +++ /dev/null @@ -1,570 +0,0 @@ -import dataclasses -import sys -import typing - -import pytest -from cachebox import BaseCacheImpl, TTLCache - - -@dataclasses.dataclass -class EQ: - def __init__(self, val: int) -> None: - self.val = val - - def __eq__(self, other: "EQ") -> bool: - return self.val == other.val - - def __hash__(self) -> int: - return self.val - - -@dataclasses.dataclass -class NoEQ: - def __init__(self, val: int) -> None: - self.val = val - - def __hash__(self) -> int: - return self.val - - -@dataclasses.dataclass -class Sized: - size: int - key: int - - def __sizeof__(self) -> int: - return self.size - - def __hash__(self) -> int: - return self.key - - def __eq__(self, other: object) -> bool: - if not isinstance(other, Sized): - return False - return self.key == other.key - - -class SizeError: - def __sizeof__(self) -> int: - raise ValueError("boom") - - -def getsizeof(obj, use_sys=True): # pragma: no cover - try: - if use_sys: - return sys.getsizeof(obj) - else: - return obj.__sizeof__() - except TypeError: # PyPy doesn't implement getsizeof or __sizeof__ - return len(obj) - - -class _TestMixin: # pragma: no cover - CACHE: typing.Type[BaseCacheImpl] - - KWARGS: dict = {} - NO_POLICY: bool = False - - def test__new__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=8) - assert cache.maxsize == 10 - assert 20 > cache.capacity() >= 8, "capacity: {}".format(cache.capacity()) - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(20, **self.KWARGS, capacity=0) - assert cache.maxsize == 20 - assert 2 >= cache.capacity() >= 0 # This is depends on platform - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(20, **self.KWARGS, capacity=100) - assert cache.maxsize == 20 - assert 30 > cache.capacity() >= 20 - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(0, **self.KWARGS, capacity=8) - assert cache.maxsize == sys.maxsize - assert 20 > cache.capacity() >= 8 - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(10, **self.KWARGS, capacity=8, maxmemory=30) - assert cache.maxsize == 10 - assert 20 > cache.capacity() >= 8 - assert cache.maxmemory == 30 - - def test_overflow(self): - if not self.NO_POLICY: - return - - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - for i in range(10): - cache[i] = i - - with pytest.raises(OverflowError): - cache["new-key"] = "new-value" - - def test_maxmemory_config(self): - cache = self.CACHE(10, **self.KWARGS, maxmemory=128) - assert cache.maxmemory == 128 - assert cache.memory() == 0 - - def test_maxmemory_enforced(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=100) - - k1 = Sized(10, 1) - v1 = Sized(80, 101) - cache[k1] = v1 - - k2 = Sized(10, 2) - v2 = Sized(80, 102) - - if self.NO_POLICY: - with pytest.raises(OverflowError): - cache[k2] = v2 - assert k1 in cache - else: - cache[k2] = v2 - assert k2 in cache - assert cache.memory() <= cache.maxmemory - - def test_maxmemory_enforced_base_types(self): - size_of_int = sys.getsizeof(1, 1) - - cache = self.CACHE(0, **self.KWARGS, maxmemory=size_of_int * 10) - - for i in range(5): - cache[i] = i - - if self.NO_POLICY: - with pytest.raises(OverflowError): - cache[10] = 10 - - assert 1 in cache - else: - cache[10] = 10 - assert 10 in cache - assert cache.memory() <= cache.maxmemory - - def test_update_overflow_preserves_entry(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=60) - - key = Sized(10, 1) - value = Sized(10, 101) - cache[key] = value - - too_big = Sized(100, 102) - with pytest.raises(OverflowError): - cache[key] = too_big - - assert cache[key].key == 101 - assert cache.memory() <= cache.maxmemory - - def test_update_sizeof_error_preserves_entry(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=60) - - key = Sized(10, 1) - value = Sized(10, 101) - cache[key] = value - - with pytest.raises(ValueError): - cache[key] = SizeError() - - assert cache[key].key == 101 - - def test___len__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - assert len(cache) == 0 - assert cache.is_empty() ^ bool(cache) - - cache[0] = 0 - assert len(cache) == 1 - - cache[1] = 1 - cache[2] = 2 - cache[3] = 3 - assert len(cache) == 4 - - cache[0] = 10 - cache[1] = 5 - assert len(cache) == 4 - - for i in range(1000, 1000 + (10 - len(cache))): - cache[i] = i - - assert len(cache) == 10 - assert cache.is_full() - - def test___contains__(self): - cache = self.CACHE(1, **self.KWARGS, capacity=1) - - assert 1 not in cache - cache[1] = 1 - assert 1 in cache - - def test___setitem__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - with pytest.raises(KeyError): - cache[1] - - cache[1] = 1 - cache[1] - cache[0] = 0 - cache[0] - cache[2] = 2 - cache[3] = 3 - - with pytest.raises(KeyError): - cache[4] - - del cache[1] - del cache[2] - del cache[3] - - with pytest.raises(KeyError): - del cache["error"] - - cache[0] - - with pytest.raises(KeyError): - cache[2] - - def test___repr__(self): - cache = self.CACHE(1000, **self.KWARGS, capacity=2) - assert repr(cache).startswith(self.CACHE.__module__ + "." + self.CACHE.__name__) - - cache.update((i, i) for i in range(1000)) - assert str(cache) == repr(cache) - - def test_insert(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - assert cache.insert(1, 1) is None - assert cache.insert(1, 1) == 1 - assert cache.insert(1, 10) == 1 - assert cache.insert(1, 2) == 10 - - cache[5] = 5 - - assert cache.insert(5, "value") == 5 - assert cache.insert(5, 5) == "value" - - del cache[5] - - assert cache.insert(5, 5) is None - - def test_get(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i - - assert cache.get(0, None) == 0 - assert cache.get(1, None) == 1 - assert cache.get("no-exists") is None - assert cache.get("no-exists", None) is None - assert cache.get("no-exists", 111) == 111 - - def test_pop(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i * 2 - - assert cache.pop(1, None) == 2 - assert cache.get(1, None) is None - assert cache.pop(2, None) == 4 - assert cache.get(2, None) is None - - assert cache.pop(10, None) is None - assert cache.pop(10, 2) == 2 - - def test_setdefault(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.setdefault("name", "nick") - obj["age"] = 18 - assert 18 == obj.setdefault("age", 1000) - assert 18 == obj["age"] - assert "nick" == obj["name"] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.setdefault("newkey", 0) - - def test_clear(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=True) - assert 0 == len(obj) - try: - assert getsizeof(obj, False) >= cap - except AssertionError as e: - # if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=False) - assert 0 == len(obj) - # this is not stable and - # may increases the capacity! - try: - assert cap != getsizeof(obj, False) - except AssertionError as e: - # if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - def test_update(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update([(1, "a"), (2, "b")]) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.update([(3, "a"), (4, "b")]) - else: - obj.update([(3, "a"), (4, "b")]) - - kw = self.KWARGS.copy() - kw["iterable"] = {1: 1, 2: 2} - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - kw["iterable"] = [(1, "a"), (2, "b")] - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - def test_eq_implemetation(self): - # see https://github.com/awolverp/cachebox/issues/5 - - size = 1000 - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(NoEQ(val=i), i) - cache.get(NoEQ(val=i)) - - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(EQ(val=i), i) - cache.get(EQ(val=i)) - - def test_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == sorted(obj.keys()) - assert v == sorted(obj.values()) - assert list(zip(k, v)) == sorted(obj.items()) - - with pytest.raises(RuntimeError): - for i in obj: - del obj[i] - - for i in range(100): - obj[i] = i * 2 - - for i in range(50): - del obj[i] - - p = iter(obj) - next(p) - - obj.shrink_to_fit() - - with pytest.raises(RuntimeError): - next(p) - - obj = self.CACHE(0, **self.KWARGS) - obj.update({i: i for i in range(20)}) - - for key, value in obj.items(): - assert obj[key] == value - - try: - for key, value in obj.items(): - obj[key] = value * 2 - except RuntimeError: - if not isinstance(obj, TTLCache): - raise - - with pytest.raises(RuntimeError): - for key, value in obj.items(): - obj[str(key)] = value - - def test___eq__(self): - cache = self.CACHE(100, **self.KWARGS, capacity=100) - - with pytest.raises(TypeError): - cache > cache - - with pytest.raises(TypeError): - cache < cache - - with pytest.raises(TypeError): - cache >= cache - - with pytest.raises(TypeError): - cache <= cache - - assert cache == cache - assert not cache != cache - - for i in range(90): - cache[i] = i - - assert cache == cache - assert not cache != cache - - c2 = self.CACHE(100, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert cache == c2 - assert not c2 != cache - - c2 = self.CACHE(1000, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert not cache == c2 - assert c2 != cache - - def _test_pickle(self, check_order: typing.Callable): - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[0] - for _ in range(9): - c1[1] - for _ in range(8): - c1[2] - for _ in range(7): - c1[3] - for _ in range(6): - c1[4] - for _ in range(5): - c1[5] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[8] - for _ in range(1): - c1[9] - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2, f"{c1} - {c2}" - assert c1.capacity() == c2.capacity() - check_order(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[1] - for _ in range(9): - c1[2] - for _ in range(8): - c1[0] - for _ in range(7): - c1[3] - for _ in range(6): - c1[5] - for _ in range(5): - c1[4] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[9] - for _ in range(1): - c1[8] - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - check_order(c1, c2) - - def test_copy(self): - import copy - - # shallow copy - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c1.insert("dict", {}) - c2 = c1.copy() - - assert c2 == c1 - c2["dict"][1] = 1 - - assert c1["dict"][1] == 1 - - c2.insert(1, 1) - assert 1 not in c1 - - # deepcopy - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c1.insert("dict", {}) - c2 = copy.deepcopy(c1) - - assert c2 == c1 - c2["dict"][1] = 1 - - assert 1 not in c1["dict"] - - c2.insert(1, 1) - assert 1 not in c1 - - def test_cache_type(self): - class AType: - pass - - cache = self.CACHE(maxsize=0, **self.KWARGS) - cache[AType] = AType - assert cache[AType] is AType diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py deleted file mode 100644 index 7ec2ea7..0000000 --- a/python/tests/test_caches.py +++ /dev/null @@ -1,612 +0,0 @@ -import time -from datetime import timedelta - -import pytest -from cachebox import ( - Cache, - FIFOCache, - LFUCache, - LRUCache, - RRCache, - TTLCache, - VTTLCache, -) - -from .mixin import Sized, _TestMixin - - -class TestCache(_TestMixin): - CACHE = Cache - NO_POLICY = True - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestFIFOCache(_TestMixin): - CACHE = FIFOCache - - def test_policy(self): - cache = FIFOCache(5) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_update_can_evict_self_on_maxmemory(self): - cache = FIFOCache(0, maxmemory=50) - - k1 = Sized(10, 1) - v1 = Sized(10, 101) - k2 = Sized(10, 2) - v2 = Sized(10, 102) - - cache[k1] = v1 - cache[k2] = v2 - - cache[k1] = Sized(40, 103) - - assert k1 not in cache - assert k2 in cache - assert cache.memory() <= cache.maxmemory - - def test_ordered_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - assert obj.first(-1) == obj.last() - assert obj.first(-10000) is None - - -class TestRRCache(_TestMixin): - CACHE = RRCache - - def test_popitem(self): - obj = RRCache(3) - with pytest.raises(KeyError): - obj.popitem() - with pytest.raises(KeyError): - obj.random_key() - - obj[1] = 1 - assert obj.random_key() == 1 - assert obj.popitem() == (1, 1) - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestLRUCache(_TestMixin): - CACHE = LRUCache - - def test_policy(self): - obj = self.CACHE(3) - - obj[1] = 1 - obj[2] = 2 - obj[3] = 3 - - assert (1, 1) == obj.popitem() - - obj[1] = 1 - obj[2] - - assert (3, 3) == obj.popitem() - - obj[4] = 4 - assert 1 == obj.get(1) - - obj[5] = 5 - assert 2 not in obj - - def test_ordered_iterators(self): - obj = self.CACHE(20, **self.KWARGS, capacity=20) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - - k = [0, 2, 4, 1, 5, 3] - v = [0, 4, 8, 2, 10, 7] - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_recently_used_funcs(self): - obj = LRUCache(10) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - obj.peek(4) - - assert obj.peek(6) is None - - assert obj.most_recently_used() == 3 - assert obj.least_recently_used() == 0 - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestLFUCache(_TestMixin): - CACHE = LFUCache - - def test_policy(self): - obj = self.CACHE(5, {i: i for i in range(5)}) - - for i in range(5): - obj[i] = i - - for i in range(10): - assert 0 == obj[0] - for i in range(7): - assert 1 == obj[1] - for i in range(3): - assert 2 == obj[2] - for i in range(4): - assert 3 == obj[3] - for i in range(6): - assert 4 == obj[4] - - assert (2, 2) == obj.popitem() - assert (3, 3) == obj.popitem() - - for i in range(10): - assert 4 == obj.get(4) - - assert (1, 1) == obj.popitem() - - assert 2 == len(obj) - obj.clear() - - for i in range(5): - obj[i] = i - - assert [0, 1, 2, 3, 4] == list(obj.keys()) - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - obj[5] = 4 - assert [5, 3, 4, 1, 0] == list(obj.keys()) - - def test_items_with_frequency(self): - # no need to test completely items_with_frequency - # because it's tested in test_iterators - obj = LFUCache(10, {1: 2, 3: 4}) - for key, val, freq in obj.items_with_frequency(): - assert key in obj - assert val == obj[key] - assert isinstance(freq, int) - - def test_least_frequently_used(self): - obj = LFUCache(10) - - for i in range(5): - obj[i] = i * 2 - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - assert obj.least_frequently_used() == 2 - assert obj.least_frequently_used(1) == 3 - assert obj.least_frequently_used(4) == 0 - assert obj.least_frequently_used(5) is None - assert obj.least_frequently_used(5) is None - assert obj.least_frequently_used(-len(obj)) == obj.least_frequently_used() - assert obj.least_frequently_used(-1000) is None - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestTTLCache(_TestMixin): - CACHE = TTLCache - KWARGS = {"ttl": 10} - - def test__new__(self): - super().test__new__() - - cache = TTLCache(0, timedelta(minutes=2, seconds=20)) - assert cache.ttl == (2 * 60) + 20 - - with pytest.raises(ValueError): - TTLCache(0, -10) - - def test_policy(self): - obj = self.CACHE(2, 0.5) - assert obj.ttl == 0.5 - - obj.insert(0, 1) - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[0] - - obj = self.CACHE(2, 20) - - obj.insert(0, 0) - obj.insert(1, 1) - obj.insert(2, 2) - - assert 0 not in obj - assert (1, 1) == obj.popitem() - - def test_update_with_ttl(self): - obj = self.CACHE(2, 0.5) - - # obj.update({1: 1, 2: 2, 3: 3}) - obj.update((i + 1, i + 1) for i in range(3)) - - with pytest.raises(KeyError): - obj[1] - - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_policy_ttl_no_care(self): - cache = TTLCache(5, 10) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - - def test_get_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - obj.insert(2, 2) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - def test_items_with_expire(self): - # no need to test completely items_with_expire - # because it's tested in test_iterators - obj = TTLCache(10, 3, {1: 2, 3: 4}) - for key, val, ttl in obj.items_with_expire(): - assert key in obj - assert val == obj[key] - assert isinstance(ttl, float) - - -class TestVTTLCache(_TestMixin): - CACHE = VTTLCache - - def test_policy(self): - obj = VTTLCache(2) - - obj.insert(0, 1, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[0] - - obj.insert("name", "nick", 0.3) - obj.insert("age", 18, None) - time.sleep(0.301) - - with pytest.raises(KeyError): - obj["name"] - - del obj["age"] - - obj.insert(0, 0, 70) - obj.insert(1, 1, 60) - obj.insert(2, 2, 90) - - assert 1 not in obj - assert (0, 0) == obj.popitem() - - def test_update_with_ttl(self): - obj = VTTLCache(3) - - obj.update({1: 1, 2: 2, 3: 3}, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[1] - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_get_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - obj.insert(2, 2, 6) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 6 > dur > 5, "6 > dur > 5 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 0.5) - - time.sleep(0.51) - - c2 = pickle.loads(pickle.dumps(c1)) - - assert len(c2) == len(c1) - assert abs(c2.capacity() - c1.capacity()) < 2 - inner(c1, c2) - - def test_items_with_expire(self): - # no need to test completely items_with_expire - # because it's tested in test_iterators - obj = VTTLCache(10, {1: 2, 3: 4}, ttl=10) - for key, val, ttl in obj.items_with_expire(): - assert key in obj - assert val == obj[key] - assert isinstance(ttl, float) diff --git a/python/tests/test_concurrency.py b/python/tests/test_concurrency.py deleted file mode 100644 index 2935ee1..0000000 --- a/python/tests/test_concurrency.py +++ /dev/null @@ -1,108 +0,0 @@ -from cachebox import cached, LRUCache -from concurrent import futures -import asyncio -import pytest -import time - - -def test_threading_return(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - return "Hello" - - with futures.ThreadPoolExecutor(max_workers=10) as executor: - future_list = [executor.submit(func) for _ in range(10)] - for future in futures.as_completed(future_list): - assert future.result() == "Hello" - - assert calls == 1 - - -def test_threading_exc(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - raise RuntimeError - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 1 - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 2 - - -@pytest.mark.asyncio -async def test_asyncio_return(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - return "Hello" - - await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - ) - - assert calls == 1 - - -@pytest.mark.asyncio -async def test_asyncio_exc(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - raise RuntimeError - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 1 - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 2 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..5e89a3b --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +maturin +pytest +hypothesis +pytest-asyncio +mkdocs-material +mkdocstrings[python] diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..63df91e --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +imports_granularity = "Item" diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs deleted file mode 100644 index 4b52a6b..0000000 --- a/src/bridge/cache.rs +++ /dev/null @@ -1,315 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct Cache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct cache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex< - hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)>, - >, -} - -#[pyo3::pymethods] -impl Cache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::nopolicy::NoPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> usize { - self.raw.lock().observed.get() as usize - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = cache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - unsafe { - let state = { - let mp = pyo3::ffi::PyDict_New(); - - if mp.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for bucket in lock.iter() { - let (key, val, _) = bucket.as_ref(); - // SAFETY: we don't need to check error because we sure about key that is hashable. - pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => mp, - 2 => capacity, - 3 => maxmemory, - )? - }; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().iter() { - let (key, value, _) = unsafe { value.as_ref() }; - visit.call(&key.obj)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl cache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { x.as_ref() }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs deleted file mode 100644 index b09cc3c..0000000 --- a/src/bridge/fifocache.rs +++ /dev/null @@ -1,349 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct FIFOCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct fifocache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl FIFOCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::fifo::FIFOPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity().0 - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let capacity = lock.capacity(); - - capacity.0 * size_of::() - + capacity.1 - * (size_of::() - + size_of::() - + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem(py)? { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = fifocache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option> { - let lock = self.raw.lock(); - - lock.get_index(index) - .map(|(key, _, _)| key.obj.clone_ref(py)) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for (hk, val, _) in lock.entries_iter() { - let tp = tuple!( - py, - 2, - 0 => hk.obj.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().entries_iter() { - visit.call(&value.0.obj)?; - visit.call(&value.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl fifocache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { x.as_ref() }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs deleted file mode 100644 index f480f19..0000000 --- a/src/bridge/lfucache.rs +++ /dev/null @@ -1,377 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct LFUCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct lfucache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl LFUCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::lfu::LFUPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - #[pyo3(signature=(key, value, freq=0usize))] - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value, freq)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some((key, val, _, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - #[pyo3(signature=(key, default, freq=0usize))] - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let node = entry.into_value(); - Ok(unsafe { node.as_ref().1.clone_ref(py) }) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py), freq)?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = lfucache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - pub fn least_frequently_used( - &self, - py: pyo3::Python<'_>, - n: usize, - ) -> Option> { - let mut lock = self.raw.lock(); - lock.least_frequently_used(n) - .map(|x| unsafe { x.as_ref().0.obj.clone_ref(py) }) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let mut lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in lock.iter() { - let node = &(*ptr.as_ptr()); - - let frequency = pyo3::ffi::PyLong_FromSize_t(node.2); - if frequency.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.0.obj.clone_ref(py).into_ptr(), - 1 => node.1.clone_ref(py).into_ptr(), - 2 => frequency, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.0.obj)?; - visit.call(&value.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl lfucache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, freq, _) = unsafe { x.as_ref() }; - - let freq = unsafe { pyo3::ffi::PyLong_FromSize_t(*freq) }; - - tuple!( - slf.py(), - 3, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - 2 => freq, - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs deleted file mode 100644 index 761edad..0000000 --- a/src/bridge/lrucache.rs +++ /dev/null @@ -1,363 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct LRUCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct lrucache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl LRUCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::lru::LRUPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = lrucache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn least_recently_used(&self, py: pyo3::Python<'_>) -> Option> { - let lock = self.raw.lock(); - lock.least_recently_used().map(|x| x.0.obj.clone_ref(py)) - } - - fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option> { - let lock = self.raw.lock(); - lock.most_recently_used().map(|x| x.0.obj.clone_ref(py)) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for node in lock.iter() { - let (hk, val, _) = &(*node.as_ptr()).element; - - let tp = tuple!( - py, - 2, - 0 => hk.obj.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.element.0.obj)?; - visit.call(&value.element.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl lrucache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { &x.as_ref().element }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs deleted file mode 100644 index 40a7c3b..0000000 --- a/src/bridge/mod.rs +++ /dev/null @@ -1,111 +0,0 @@ -use pyo3::create_exception; -use pyo3::types::PyTypeMethods; - -create_exception!(cachebox._core, CoreKeyError, pyo3::exceptions::PyException); - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen, subclass))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type, subclass) -)] -pub struct BaseCacheImpl {} - -#[pyo3::pymethods] -impl BaseCacheImpl { - #[new] - #[pyo3(signature = (*args, **kwargs))] - #[classmethod] - #[allow(unused_variables)] - pub fn __new__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: &pyo3::Bound<'_, pyo3::PyAny>, - kwargs: Option<&pyo3::Bound<'_, pyo3::PyAny>>, - ) -> pyo3::PyResult { - let size = unsafe { pyo3::ffi::PyTuple_Size(cls.mro().as_ptr()) }; - - // This means BaseCacheImpl is used as subclass - // So we shouldn't raise NotImplementedError - if size > 2 { - Ok(Self {}) - } else { - Err(pyo3::PyErr::new::("do not call this constructor, you can subclass this implementation or use other classes.")) - } - } - - #[allow(unused_variables)] - #[classmethod] - pub fn __class_getitem__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: pyo3::Py, - ) -> pyo3::Py { - cls.clone().into() - } -} - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct TTLPair { - key: pyo3::Py, - value: pyo3::Py, - duration: std::time::Duration, -} - -impl TTLPair { - fn clone_from_pair(py: pyo3::Python<'_>, pair: &crate::common::TimeToLivePair) -> Self { - TTLPair { - key: pair.key.obj.clone_ref(py), - value: pair.value.clone_ref(py), - duration: pair.duration().unwrap_or_default(), - } - } -} - -impl From for TTLPair { - fn from(value: crate::common::TimeToLivePair) -> Self { - let duration = value.duration().unwrap_or_default(); - - TTLPair { - key: value.key.obj, - value: value.value, - duration, - } - } -} - -#[pyo3::pymethods] -impl TTLPair { - fn key(slf: pyo3::PyRef<'_, Self>) -> pyo3::Py { - slf.key.clone_ref(slf.py()) - } - - fn value(slf: pyo3::PyRef<'_, Self>) -> pyo3::Py { - slf.value.clone_ref(slf.py()) - } - - fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { - slf.duration.as_secs_f64() - } - - fn pack2(slf: pyo3::PyRef<'_, Self>) -> (pyo3::Py, pyo3::Py) { - (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py())) - } - - fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::Py, pyo3::Py, f64) { - ( - slf.key.clone_ref(slf.py()), - slf.value.clone_ref(slf.py()), - slf.duration.as_secs_f64(), - ) - } -} - -pub mod cache; -pub mod fifocache; -pub mod lfucache; -pub mod lrucache; -pub mod rrcache; -pub mod ttlcache; -pub mod vttlcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs deleted file mode 100644 index a3777c8..0000000 --- a/src/bridge/rrcache.rs +++ /dev/null @@ -1,297 +0,0 @@ -use super::cache::cache_items; -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct RRCache { - raw: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl RRCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::random::RandomPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> usize { - self.raw.lock().observed.get() as usize - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem()? { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = cache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn random_key(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - match lock.random_key() { - Some(x) => Ok(x.obj.clone_ref(py)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - unsafe { - let state = { - let mp = pyo3::ffi::PyDict_New(); - - if mp.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for bucket in lock.iter() { - let (key, val, _) = bucket.as_ref(); - // SAFETY: we don't need to check error because we sure about key that is hashable. - pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => mp, - 2 => capacity, - 3 => maxmemory, - )? - }; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().iter() { - let (key, value, _) = unsafe { value.as_ref() }; - visit.call(&key.obj)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs deleted file mode 100644 index 979205f..0000000 --- a/src/bridge/ttlcache.rs +++ /dev/null @@ -1,375 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct TTLCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core"))] -#[cfg_attr(not(Py_3_9), pyo3::pyclass(module = "cachebox._core", immutable_type))] -pub struct ttlcache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, - pub now: std::time::SystemTime, -} - -#[pyo3::pymethods] -impl TTLCache { - #[new] - #[pyo3(signature=(maxsize, ttl, *, capacity=0, maxmemory=0))] - fn __new__( - maxsize: usize, - ttl: f64, - capacity: usize, - maxmemory: usize, - ) -> pyo3::PyResult { - let raw = crate::policies::ttl::TTLPolicy::new(maxsize, capacity, ttl, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn ttl(&self) -> f64 { - self.raw.lock().ttl().as_secs_f64() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity().0 - } - - fn __len__(&self) -> usize { - self.raw.lock().real_len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let capacity = lock.capacity(); - - capacity.0 * size_of::() - + capacity.1 - * (size_of::() - + size_of::() - + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.remove(); - Ok(super::TTLPair::from(val)) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - - match lock.popitem(py)? { - Some(val) => Ok(super::TTLPair::from(val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, py: pyo3::Python<'_>, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(py); - } - } - - fn shrink_to_fit(&self, py: pyo3::Python<'_>) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(py); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.into_value(); - Ok(val.value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(slf.py()); - - let result = ttlcache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - now: std::time::SystemTime::now(), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option> { - let lock = self.raw.lock(); - lock.get_index(index).map(|pair| pair.key.obj.clone_ref(py)) - } - - fn expire(&self, py: pyo3::Python<'_>) { - let mut lock = self.raw.lock(); - lock.expire(py); - lock.shrink_to_fit(py); - } - - fn __getnewargs__(&self) -> (usize, f64) { - (0, 0.0f64) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for element in lock.entries_iter() { - let tp = tuple!( - py, - 3, - 0 => element.key.obj.clone_ref(py).as_ptr(), - 1 => element.value.clone_ref(py).as_ptr(), - 2 => pyo3::ffi::PyFloat_FromDouble( - element.expire_at.unwrap_unchecked() - .duration_since(std::time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() - ), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); - let ttl = pyo3::ffi::PyFloat_FromDouble(lock.ttl().as_secs_f64()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 5, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => ttl, - 4 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().entries_iter() { - visit.call(&value.key.obj)?; - visit.call(&value.value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl ttlcache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - let mut element: std::ptr::NonNull; - loop { - element = { - if let Some(x) = iter.next() { - x - } else { - return Err(pyo3::PyErr::new::(())); - } - }; - - if unsafe { !element.as_ref().is_expired(slf.now) } { - break; - } - } - - Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { - element.as_ref() - })) - } -} diff --git a/src/bridge/vttlcache.rs b/src/bridge/vttlcache.rs deleted file mode 100644 index e815358..0000000 --- a/src/bridge/vttlcache.rs +++ /dev/null @@ -1,373 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct VTTLCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct vttlcache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, - pub now: std::time::SystemTime, -} - -#[pyo3::pymethods] -impl VTTLCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::vttl::VTTLPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().real_len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - #[pyo3(signature=(key, value, ttl=None))] - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value, ttl)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value, ttl)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - #[pyo3(signature=(iterable, ttl=None))] - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable, ttl) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let mut t1 = slf.raw.lock(); - let mut t2 = other.raw.lock(); - t1.equal(slf.py(), &mut t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let mut t1 = slf.raw.lock(); - let mut t2 = other.raw.lock(); - t1.equal(slf.py(), &mut t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.remove(); - Ok(super::TTLPair::from(val)) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some(val) => Ok(super::TTLPair::from(val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - #[pyo3(signature=(key, default, ttl=None))] - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => unsafe { - let val = entry.into_value(); - Ok(val.as_ref().value.clone_ref(py)) - }, - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py), ttl)?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = vttlcache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - now: std::time::SystemTime::now(), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn expire(&self) { - let mut lock = self.raw.lock(); - lock.expire(); - lock.shrink_to_fit(); - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let mut lock = self.raw.lock(); - lock.expire(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in lock.iter() { - let node = ptr.as_ref(); - - let ttlobject = pyo3::ffi::PyLong_FromDouble(node.expire_at.map_or(0.0, |x| { - x.duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs_f64() - })); - - if ttlobject.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.key.obj.clone_ref(py).as_ptr(), - 1 => node.value.clone_ref(py).as_ptr(), - 2 => ttlobject, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.key.obj)?; - visit.call(&value.value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl vttlcache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - let mut element: std::ptr::NonNull; - loop { - element = { - if let Some(x) = iter.next() { - x - } else { - return Err(pyo3::PyErr::new::(())); - } - }; - - if unsafe { !element.as_ref().is_expired(slf.now) } { - break; - } - } - - Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { - element.as_ref() - })) - } -} diff --git a/src/common.rs b/src/common.rs deleted file mode 100644 index 1065fff..0000000 --- a/src/common.rs +++ /dev/null @@ -1,594 +0,0 @@ -use pyo3::types::PyAnyMethods; - -macro_rules! non_zero_or { - ($num:expr, $_else:expr) => { - unsafe { core::num::NonZeroUsize::new_unchecked(if $num == 0 { $_else } else { $num }) } - }; -} - -macro_rules! new_table { - ($capacity:expr) => {{ - if $capacity > 0 { - hashbrown::raw::RawTable::try_with_capacity($capacity) - .map_err(|_| pyo3::PyErr::new::(())) - } else { - Ok(hashbrown::raw::RawTable::new()) - } - }}; -} - -macro_rules! tuple { - ( - $py:expr, - $len:expr, - $($index:expr => $value:expr,)+ - ) => {{ - #[allow(unused_unsafe)] - let tuple = unsafe { pyo3::ffi::PyTuple_New($len) }; - if tuple.is_null() { - Err(pyo3::PyErr::fetch($py)) - } else { - #[allow(unused_unsafe)] - unsafe { - $( - pyo3::ffi::PyTuple_SetItem(tuple, $index, $value); - )+ - } - - Ok(tuple) - } - }}; - - (check $tuple:expr, size=$size:expr) => {{ - #[allow(unused_unsafe)] - if unsafe { pyo3::ffi::PyTuple_CheckExact($tuple) } == 0 { - Err( - pyo3::PyErr::new::("expected tuple, but got another type") - ) - } else if unsafe {pyo3::ffi::PyTuple_Size($tuple)} != $size { - Err( - pyo3::PyErr::new::("tuple size is invalid") - ) - } else { - Ok(()) - } - }} -} - -macro_rules! extract_pickle_tuple { - ($py:expr, $state:expr => list) => {{ - if pyo3::ffi::PyTuple_CheckExact($state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size($state); - if size != 3 && size != 4 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - - if pyo3::ffi::PyList_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - // Tuple returns borrowed reference - pyo3::Py::::from_borrowed_ptr($py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let maxmemory = if size == 4 { - let obj = pyo3::ffi::PyTuple_GetItem($state, 3); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - result - } else { - 0 - }; - - (maxsize, iterable, capacity, maxmemory) - }}; - - ($py:expr, $state:expr => dict) => {{ - if pyo3::ffi::PyTuple_CheckExact($state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size($state); - if size != 3 && size != 4 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - - if pyo3::ffi::PyDict_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - // Tuple returns borrowed reference - pyo3::Py::::from_borrowed_ptr($py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let maxmemory = if size == 4 { - let obj = pyo3::ffi::PyTuple_GetItem($state, 3); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - result - } else { - 0 - }; - - (maxsize, iterable, capacity, maxmemory) - }}; -} - -#[inline] -#[cfg(not(PyPy))] -pub fn pyobject_size(py: pyo3::Python<'_>, obj: &pyo3::Py) -> pyo3::PyResult { - static SIZEOF_METHOD_NAME: &'static std::ffi::CStr = c"__sizeof__"; - - // PyPy does not support __sizeof__ or sys.getsizeof - let sizeof_method = obj.bind(py).getattr(SIZEOF_METHOD_NAME)?; - - unsafe { - if pyo3::ffi::PyType_Check(obj.as_ptr()) == 1 { - sizeof_method.call1((obj,))?.extract::() - } else { - sizeof_method.call0()?.extract::() - } - } -} - -#[inline] -#[cfg(PyPy)] -pub fn pyobject_size(py: pyo3::Python<'_>, obj: &pyo3::Py) -> pyo3::PyResult { - static SIZEOF_METHOD_NAME: &'static std::ffi::CStr = c"__sizeof__"; - - // PyPy does not support __sizeof__ or sys.getsizeof - let sizeof_method = obj.bind(py).getattr_opt(SIZEOF_METHOD_NAME)?; - - match sizeof_method { - Some(sizeof_method) => unsafe { - if pyo3::ffi::PyType_Check(obj.as_ptr()) == 1 { - sizeof_method.call1((obj,))?.extract::() - } else { - sizeof_method.call0()?.extract::() - } - }, - None => Ok(1), - } -} - -#[inline] -pub fn entry_size( - py: pyo3::Python<'_>, - key: &PreHashObject, - value: &pyo3::Py, -) -> pyo3::PyResult { - let key_size = pyobject_size(py, &key.obj)?; - let value_size = pyobject_size(py, value)?; - - Ok(key_size.saturating_add(value_size)) -} - -#[inline] -pub fn pyobject_equal( - py: pyo3::Python<'_>, - arg1: *mut pyo3::ffi::PyObject, - arg2: *mut pyo3::ffi::PyObject, -) -> pyo3::PyResult { - unsafe { - if std::ptr::eq(arg1, arg2) { - return Ok(true); - } - - let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); - - if boolean < 0 { - Err(pyo3::PyErr::take(py).unwrap_unchecked()) - } else { - Ok(boolean == 1) - } - } -} - -/// Converts an isize value to a u64 value, mapping negative values to the upper half of the u64 range. -/// -/// This function ensures a bijective mapping between isize and u64, preserving the order of values -/// by offsetting negative values to the upper range of u64. -#[inline(always)] -fn convert_isize_to_u64(v: &isize) -> u64 { - const OFFSET: u64 = 0x8000000000000000; // 1 << 63 - - if *v >= 0 { - *v as u64 - } else { - (-(*v + 1)) as u64 + OFFSET - } -} - -/// Precomputed Hash PyObject -/// -/// A precomputed hash is a cryptographic hash value that's calculated in advance -/// and stored for later use, rather than being computed on demand when needed. -pub struct PreHashObject { - pub obj: pyo3::Py, - pub hash: u64, -} - -/// A view into a single entry in a table, which may either be absent or occupied. -/// -/// This is common in policies and will be used by `entry(...)` methods of them. -pub enum Entry { - Occupied(O), - Absent(V), -} - -/// Observe caches' changes -#[derive(Debug)] -pub struct Observed(u16); - -/// Checks the [`Observed`] on iterators -#[derive(Debug)] -pub struct ObservedIterator { - pub ptr: core::ptr::NonNull, - pub statepoint: u16, -} - -pub struct NoLifetimeSliceIter { - pub pointer: std::ptr::NonNull, - pub index: usize, - pub len: usize, -} - -/// A pair representing a key-value entry with a time-to-live (TTL) expiration. -pub struct TimeToLivePair { - pub key: PreHashObject, - pub value: pyo3::Py, - pub expire_at: Option, - pub size: usize, -} - -/// Represents the possible situations when a key is absent in VTTL or TTL policy's data structure. -/// -/// This enum helps track different scenarios during key insertion. -pub enum AbsentSituation { - /// A valid insertion slot is available - Slot(hashbrown::raw::InsertSlot), - - /// An expired entry's bucket is found - Expired(hashbrown::raw::Bucket), - - /// No suitable slot or expired entry is found - None, -} - -impl PreHashObject { - /// Creates a new [`PreHashObject`] - #[inline] - pub fn new(obj: pyo3::Py, hash: u64) -> Self { - Self { obj, hash } - } - - /// Calculates the hash of `object` and creates a new [`PreHashObject`] - #[inline] - pub fn from_pyobject( - py: pyo3::Python<'_>, - object: pyo3::Py, - ) -> pyo3::PyResult { - unsafe { - let py_hash = pyo3::ffi::PyObject_Hash(object.as_ptr()); - - if py_hash == -1 { - // SAFETY: - // PyObject_Hash never returns -1 on success. - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - Ok(Self::new(object, convert_isize_to_u64(&py_hash))) - } - } - - /// Check equality of two objects by using [`pyo3::ffi::PyObject_RichCompareBool`] - #[inline] - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - pyobject_equal(py, self.obj.as_ptr(), other.obj.as_ptr()) - } -} - -impl std::fmt::Debug for PreHashObject { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "PreHashObject({})", self.hash) - } -} - -/// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] -pub trait TryFindMethods { - /// Searches for an element in the table. - fn try_find( - &self, - hash: u64, - compare: impl FnMut(&T) -> Result, - ) -> Result>, E>; - - fn try_find_or_find_insert_slot( - &mut self, - hash: u64, - compare: impl FnMut(&T) -> Result, - hasher: impl Fn(&T) -> u64, - ) -> Result, hashbrown::raw::InsertSlot>, E>; -} - -impl TryFindMethods for hashbrown::raw::RawTable { - #[inline] - fn try_find( - &self, - hash: u64, - mut compare: impl FnMut(&T) -> Result, - ) -> Result>, E> { - let mut error = None; - - let found = self.find(hash, |item| { - match compare(item) { - Ok(boolean) => boolean, - Err(e) => { - error = Some(e); - true // To break checking - } - } - }); - - if let Some(error) = error { - Err(error) - } else { - Ok(found) - } - } - - #[inline] - fn try_find_or_find_insert_slot( - &mut self, - hash: u64, - mut compare: impl FnMut(&T) -> Result, - hasher: impl Fn(&T) -> u64, - ) -> Result, hashbrown::raw::InsertSlot>, E> { - let mut error = None; - - let found = self.find_or_find_insert_slot( - hash, - |item| { - match compare(item) { - Ok(boolean) => boolean, - Err(e) => { - error = Some(e); - true // To break checking - } - } - }, - hasher, - ); - - if let Some(error) = error { - Err(error) - } else { - Ok(found) - } - } -} - -impl Observed { - #[cold] - pub fn new() -> Self { - Self(0) - } - - #[inline(always)] - pub fn change(&mut self) { - if self.0 == u16::MAX { - self.0 = 0; - } else { - self.0 = unsafe { self.0.unchecked_add(1) }; - } - } - - pub fn get(&self) -> u16 { - self.0 - } -} - -#[inline] -unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo3::PyResult { - unsafe fn inner( - py: pyo3::Python<'_>, - ptr: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - cfg_if::cfg_if! { - if #[cfg(all(Py_3_9, not(any(Py_LIMITED_API, PyPy, GraalPy))))] { - use pyo3::IntoPyObject; - - let m_name: pyo3::Bound<'_, pyo3::types::PyString> = "_state".into_pyobject(py)?; - Ok(pyo3::ffi::PyObject_CallMethodNoArgs(ptr, m_name.as_ptr())) - } else { - let state_fn = - pyo3::ffi::PyObject_GetAttrString(ptr, pyo3::ffi::c_str!("_state").as_ptr()); - - if state_fn.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let empty_args = pyo3::ffi::PyTuple_New(0); - let result = pyo3::ffi::PyObject_Call(state_fn, empty_args, std::ptr::null_mut()); - pyo3::ffi::Py_XDECREF(empty_args); - pyo3::ffi::Py_XDECREF(state_fn); - - Ok(result) - } - } - } - - let result = inner(py, ptr)?; - - if result.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let c = pyo3::ffi::PyLong_AsSize_t(result); - pyo3::ffi::Py_XDECREF(result); - - Ok(c as u16) -} - -impl ObservedIterator { - pub fn new(ptr: *mut pyo3::ffi::PyObject, state: u16) -> Self { - unsafe { - pyo3::ffi::Py_XINCREF(ptr); - } - - Self { - ptr: unsafe { core::ptr::NonNull::new(ptr).unwrap_unchecked() }, - statepoint: state, - } - } - - #[inline] - pub fn proceed(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<()> { - let state = unsafe { _get_state(py, self.ptr.as_ptr())? }; - - if state != self.statepoint { - return Err(pyo3::PyErr::new::( - "cache changed during iteration", - )); - } - - Ok(()) - } -} - -impl Drop for ObservedIterator { - fn drop(&mut self) { - unsafe { - pyo3::ffi::Py_XDECREF(self.ptr.as_ptr()); - } - } -} - -unsafe impl Send for ObservedIterator {} -unsafe impl Sync for ObservedIterator {} - -impl NoLifetimeSliceIter { - pub fn new(slice: &[T]) -> Self { - let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); - - Self { - pointer, - index: 0, - len: slice.len(), - } - } -} - -impl Iterator for NoLifetimeSliceIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.index >= self.len { - None - } else { - let value = unsafe { self.pointer.add(self.index) }; - self.index += 1; - Some(value) - } - } -} - -impl TimeToLivePair { - #[inline] - pub fn new( - key: PreHashObject, - value: pyo3::Py, - expire_at: Option, - size: usize, - ) -> Self { - Self { - key, - value, - expire_at, - size, - } - } - - pub fn duration(&self) -> Option { - self.expire_at.map(|x| { - x.duration_since(std::time::SystemTime::now()) - .unwrap_or_default() - }) - } - - #[inline] - pub fn is_expired(&self, now: std::time::SystemTime) -> bool { - match self.expire_at { - Some(x) => x < now, - None => false, - } - } -} - -pub type Mutex = parking_lot::Mutex; diff --git a/src/hashbrown/alloc.rs b/src/hashbrown/alloc.rs new file mode 100644 index 0000000..89feb2b --- /dev/null +++ b/src/hashbrown/alloc.rs @@ -0,0 +1,13 @@ +use core::ptr::NonNull; + +#[cfg(test)] +pub(crate) use std::alloc::AllocError; +use std::alloc::Layout; +pub(crate) use std::alloc::{Allocator, Global}; + +pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + match alloc.allocate(layout) { + Ok(ptr) => Ok(ptr), + Err(_) => Err(()), + } +} diff --git a/src/hashbrown/control/bitmask.rs b/src/hashbrown/control/bitmask.rs new file mode 100644 index 0000000..8370515 --- /dev/null +++ b/src/hashbrown/control/bitmask.rs @@ -0,0 +1,103 @@ +use super::group::{BITMASK_ITER_MASK, BITMASK_STRIDE, BitMaskWord, NonZeroBitMaskWord}; + +/// A bit mask which contains the result of a `Match` operation on a `Group` and +/// allows iterating through them. +/// +/// The bit mask is arranged so that low-order bits represent lower memory +/// addresses for group match results. +/// +/// For implementation reasons, the bits in the set may be sparsely packed with +/// groups of 8 bits representing one element. If any of these bits are non-zero +/// then this element is considered to true in the mask. If this is the +/// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be +/// performed on counts/indices to normalize this difference. `BITMASK_MASK` is +/// similarly a mask of all the actually-used bits. +/// +/// To iterate over a bit mask, it must be converted to a form where only 1 bit +/// is set per element. This is done by applying `BITMASK_ITER_MASK` on the +/// mask bits. +#[derive(Copy, Clone)] +pub(crate) struct BitMask(pub(crate) BitMaskWord); + +#[expect(clippy::use_self)] +impl BitMask { + /// Returns a new `BitMask` with the lowest bit removed. + #[inline] + #[must_use] + fn remove_lowest_bit(self) -> Self { + BitMask(self.0 & (self.0 - 1)) + } + + /// Returns whether the `BitMask` has at least one set bit. + #[inline] + pub(crate) fn any_bit_set(self) -> bool { + self.0 != 0 + } + + /// Returns the first set bit in the `BitMask`, if there is one. + #[inline] + pub(crate) fn lowest_set_bit(self) -> Option { + NonZeroBitMaskWord::new(self.0).map(Self::nonzero_trailing_zeros) + } + + /// Returns the number of trailing zeroes in the `BitMask`. + #[inline] + pub(crate) fn trailing_zeros(self) -> usize { + // ARM doesn't have a trailing_zeroes instruction, and instead uses + // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM + // versions (pre-ARMv7) don't have RBIT and need to emulate it + // instead. Since we only have 1 bit set in each byte on ARM, we can + // use swap_bytes (REV) + leading_zeroes instead. + if cfg!(target_arch = "arm") && BITMASK_STRIDE.is_multiple_of(8) { + self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE + } else { + self.0.trailing_zeros() as usize / BITMASK_STRIDE + } + } + + /// Same as above but takes a `NonZeroBitMaskWord`. + #[inline] + fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { + if cfg!(target_arch = "arm") && BITMASK_STRIDE.is_multiple_of(8) { + // SAFETY: A byte-swapped non-zero value is still non-zero. + let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) }; + swapped.leading_zeros() as usize / BITMASK_STRIDE + } else { + nonzero.trailing_zeros() as usize / BITMASK_STRIDE + } + } + + /// Returns the number of leading zeroes in the `BitMask`. + #[inline] + pub(crate) fn leading_zeros(self) -> usize { + self.0.leading_zeros() as usize / BITMASK_STRIDE + } +} + +impl IntoIterator for BitMask { + type Item = usize; + type IntoIter = BitMaskIter; + + #[inline] + fn into_iter(self) -> BitMaskIter { + // A BitMask only requires each element (group of bits) to be non-zero. + // However for iteration we need each element to only contain 1 bit. + BitMaskIter(BitMask(self.0 & BITMASK_ITER_MASK)) + } +} + +/// Iterator over the contents of a `BitMask`, returning the indices of set +/// bits. +#[derive(Clone)] +pub(crate) struct BitMaskIter(pub(crate) BitMask); + +impl Iterator for BitMaskIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + let bit = self.0.lowest_set_bit()?; + self.0 = self.0.remove_lowest_bit(); + Some(bit) + } +} diff --git a/src/hashbrown/control/group/generic.rs b/src/hashbrown/control/group/generic.rs new file mode 100644 index 0000000..09d5cd8 --- /dev/null +++ b/src/hashbrown/control/group/generic.rs @@ -0,0 +1,152 @@ +use super::super::{BitMask, Tag}; +use core::{mem, ptr}; + +// Use the native word size as the group size. Using a 64-bit group size on +// a 32-bit architecture will just end up being more expensive because +// shifts and multiplies will need to be emulated. + +cfg_if! { + if #[cfg(any( + target_pointer_width = "64", + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "wasm32", + ))] { + type GroupWord = u64; + type NonZeroGroupWord = core::num::NonZeroU64; + } else { + type GroupWord = u32; + type NonZeroGroupWord = core::num::NonZeroU32; + } +} + +pub(crate) type BitMaskWord = GroupWord; +pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord; +pub(crate) const BITMASK_STRIDE: usize = 8; +// We only care about the highest bit of each tag for the mask. +const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Helper function to replicate a tag across a `GroupWord`. +#[inline] +fn repeat(tag: Tag) -> GroupWord { + GroupWord::from_ne_bytes([tag.0; Group::WIDTH]) +} + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a word-sized integer. +#[derive(Copy, Clone)] +pub(crate) struct Group(GroupWord); + +// We perform all operations in the native endianness, and convert to +// little-endian just before creating a BitMask. The can potentially +// enable the compiler to eliminate unnecessary byte swaps if we are +// only checking whether a BitMask is empty. +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(ptr::read_unaligned(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(ptr::read(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + ptr::write(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which *may* + /// have the given value. + /// + /// This function may return a false positive in certain cases where + /// the tag in the group differs from the searched value only in its + /// lowest bit. This is fine because: + /// - This never happens for `EMPTY` and `DELETED`, only full entries. + /// - The check for key equality will catch these. + /// - This only happens if there is at least 1 true match. + /// - The chance of this happening is very low (< 1% chance per tag). + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + // This algorithm is derived from + // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord + let cmp = self.0 ^ repeat(tag); + BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + // If the high bit is set, then the tag must be either: + // 1111_1111 (EMPTY) or 1000_0000 (DELETED). + // So we can just check if the top two bits are 1 by ANDing them. + BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask((self.0 & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(self) -> BitMask { + BitMask(self.match_empty_or_deleted().0 ^ BITMASK_MASK) + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let full = 1000_0000 (true) or 0000_0000 (false) + // !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry) + // !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry) + let full = !self.0 & repeat(Tag::DELETED); + Group(!full + (full >> 7)) + } +} diff --git a/src/hashbrown/control/group/lsx.rs b/src/hashbrown/control/group/lsx.rs new file mode 100644 index 0000000..7da098a --- /dev/null +++ b/src/hashbrown/control/group/lsx.rs @@ -0,0 +1,124 @@ +use super::super::{BitMask, Tag}; +use core::mem; +use core::num::NonZeroU16; + +use core::arch::loongarch64::*; + +pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; +pub(crate) const BITMASK_STRIDE: usize = 1; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 128-bit LSX value. +#[derive(Copy, Clone)] +pub(crate) struct Group(m128i); + +// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(lsx_vld::<0>(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(lsx_vld::<0>(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + lsx_vst::<0>(self.0, ptr.cast()); + } + } + + /// Returns a `BitMask` indicating all tags in the group which have + /// the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + unsafe { + let cmp = lsx_vseq_b(self.0, lsx_vreplgr2vr_b(tag.0 as i32)); + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(cmp)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + unsafe { + let cmp = lsx_vseqi_b::<{ Tag::EMPTY.0 as i8 as i32 }>(self.0); + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(cmp)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(self.0)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(&self) -> BitMask { + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskgez_b(self.0)) as u16) + } + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let special = lsx_vslti_b::<0>(self.0); + Group(lsx_vori_b::<{ Tag::DELETED.0 as u32 }>(special)) + } + } +} diff --git a/src/hashbrown/control/group/mod.rs b/src/hashbrown/control/group/mod.rs new file mode 100644 index 0000000..8975f94 --- /dev/null +++ b/src/hashbrown/control/group/mod.rs @@ -0,0 +1,47 @@ +// TESTING NOTE: +// +// Because this module uses `cfg(..)` to select an implementation, it will not +// be linted without being run on targets that actually load each of these +// modules. Be sure to edit `ci/tools.sh` to add in the necessary cfgs if you +// change these, so that your implementation gets properly linted. + +cfg_if::cfg_if! { + // Use the SSE2 implementation if possible: it allows us to scan 16 buckets + // at once instead of 8. We don't bother with AVX since it would require + // runtime dispatch and wouldn't gain us much anyways: the probability of + // finding a match drops off drastically after the first few buckets. + // + // I attempted an implementation on ARM using NEON instructions, but it + // turns out that most NEON instructions have multi-cycle latency, which in + // the end outweighs any gains over the generic implementation. + if #[cfg(all( + target_feature = "sse2", + any(target_arch = "x86", target_arch = "x86_64"), + not(miri), + ))] { + mod sse2; + use sse2 as imp; + } else if #[cfg(all( + target_arch = "aarch64", + target_feature = "neon", + // NEON intrinsics are currently broken on big-endian targets. + // See https://github.com/rust-lang/stdarch/issues/1484. + target_endian = "little", + not(miri), + ))] { + mod neon; + use neon as imp; + } else if #[cfg(all( + target_arch = "loongarch64", + target_feature = "lsx", + not(miri), + ))] { + mod lsx; + use lsx as imp; + } else { + mod generic; + use generic as imp; + } +} +pub(crate) use self::imp::Group; +pub(super) use self::imp::{BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_STRIDE}; diff --git a/src/hashbrown/control/group/neon.rs b/src/hashbrown/control/group/neon.rs new file mode 100644 index 0000000..c64b891 --- /dev/null +++ b/src/hashbrown/control/group/neon.rs @@ -0,0 +1,119 @@ +use super::super::{BitMask, Tag}; +use core::arch::aarch64 as neon; +use core::mem; +use core::num::NonZeroU64; + +pub(crate) type BitMaskWord = u64; +pub(crate) type NonZeroBitMaskWord = NonZeroU64; +pub(crate) const BITMASK_STRIDE: usize = 8; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 64-bit NEON value. +#[derive(Copy, Clone)] +pub(crate) struct Group(neon::uint8x8_t); + +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(neon::vld1_u8(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(neon::vld1_u8(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + neon::vst1_u8(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which *may* + /// have the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + unsafe { + let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(tag.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + self.match_tag(Tag::EMPTY) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + unsafe { + let cmp = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(self) -> BitMask { + unsafe { + let cmp = neon::vcgez_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let special = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + Group(neon::vorr_u8(special, neon::vdup_n_u8(0x80))) + } + } +} diff --git a/src/hashbrown/control/group/sse2.rs b/src/hashbrown/control/group/sse2.rs new file mode 100644 index 0000000..2b12c01 --- /dev/null +++ b/src/hashbrown/control/group/sse2.rs @@ -0,0 +1,143 @@ +use super::super::{BitMask, Tag}; +use core::mem; +use core::num::NonZeroU16; + +#[cfg(target_arch = "x86")] +use core::arch::x86; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64 as x86; + +pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; +pub(crate) const BITMASK_STRIDE: usize = 1; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 128-bit SSE value. +#[derive(Copy, Clone)] +pub(crate) struct Group(x86::__m128i); + +// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(x86::_mm_loadu_si128(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(x86::_mm_load_si128(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + x86::_mm_store_si128(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which have + /// the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + #[expect( + clippy::cast_possible_wrap, // tag.0: Tag as i8 + // tag: i32 as u16 + // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the + // upper 16-bits of the i32 are zeroed: + clippy::cast_sign_loss, + clippy::cast_possible_truncation + )] + unsafe { + let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8)); + BitMask(x86::_mm_movemask_epi8(cmp) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + self.match_tag(Tag::EMPTY) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + #[expect( + // tag: i32 as u16 + // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the + // upper 16-bits of the i32 are zeroed: + clippy::cast_sign_loss, + clippy::cast_possible_truncation + )] + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(x86::_mm_movemask_epi8(self.0) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(&self) -> BitMask { + BitMask(!self.match_empty_or_deleted().0) + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + #[expect( + clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8 + )] + unsafe { + let zero = x86::_mm_setzero_si128(); + let special = x86::_mm_cmpgt_epi8(zero, self.0); + Group(x86::_mm_or_si128( + special, + x86::_mm_set1_epi8(Tag::DELETED.0 as i8), + )) + } + } +} diff --git a/src/hashbrown/control/mod.rs b/src/hashbrown/control/mod.rs new file mode 100644 index 0000000..62ef8bf --- /dev/null +++ b/src/hashbrown/control/mod.rs @@ -0,0 +1,10 @@ +mod bitmask; +mod group; +mod tag; + +use self::bitmask::BitMask; +pub(crate) use self::{ + bitmask::BitMaskIter, + group::Group, + tag::{Tag, TagSliceExt}, +}; diff --git a/src/hashbrown/control/tag.rs b/src/hashbrown/control/tag.rs new file mode 100644 index 0000000..486bbba --- /dev/null +++ b/src/hashbrown/control/tag.rs @@ -0,0 +1,82 @@ +use core::{fmt, mem}; + +/// Single tag in a control group. +#[derive(Copy, Clone, PartialEq, Eq)] +#[repr(transparent)] +pub(crate) struct Tag(pub(super) u8); +impl Tag { + /// Control tag value for an empty bucket. + pub(crate) const EMPTY: Tag = Tag(0b1111_1111); + + /// Control tag value for a deleted bucket. + pub(crate) const DELETED: Tag = Tag(0b1000_0000); + + /// Checks whether a control tag represents a full bucket (top bit is clear). + #[inline] + pub(crate) const fn is_full(self) -> bool { + self.0 & 0x80 == 0 + } + + /// Checks whether a control tag represents a special value (top bit is set). + #[inline] + pub(crate) const fn is_special(self) -> bool { + self.0 & 0x80 != 0 + } + + /// Checks whether a special control value is EMPTY (just check 1 bit). + #[inline] + pub(crate) const fn special_is_empty(self) -> bool { + debug_assert!(self.is_special()); + self.0 & 0x01 != 0 + } + + /// Creates a control tag representing a full bucket with the given hash. + #[inline] + pub(crate) const fn full(hash: u64) -> Tag { + // Constant for function that grabs the top 7 bits of the hash. + const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { + mem::size_of::() + } else { + mem::size_of::() + }; + + // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit + // value, some hash functions (such as FxHash) produce a usize result + // instead, which means that the top 32 bits are 0 on 32-bit platforms. + // So we use MIN_HASH_LEN constant to handle this. + let top7 = hash >> (MIN_HASH_LEN * 8 - 7); + Tag((top7 & 0x7f) as u8) // truncation + } +} +impl fmt::Debug for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_special() { + if self.special_is_empty() { + f.pad("EMPTY") + } else { + f.pad("DELETED") + } + } else { + f.debug_tuple("full").field(&(self.0 & 0x7F)).finish() + } + } +} + +/// Extension trait for slices of tags. +pub(crate) trait TagSliceExt { + /// Fills the control with the given tag. + fn fill_tag(&mut self, tag: Tag); + + /// Clears out the control. + #[inline] + fn fill_empty(&mut self) { + self.fill_tag(Tag::EMPTY); + } +} +impl TagSliceExt for [mem::MaybeUninit] { + #[inline] + fn fill_tag(&mut self, tag: Tag) { + // SAFETY: We have access to the entire slice, so, we can write to the entire slice. + unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) } + } +} diff --git a/src/hashbrown/mod.rs b/src/hashbrown/mod.rs new file mode 100644 index 0000000..f45d3fb --- /dev/null +++ b/src/hashbrown/mod.rs @@ -0,0 +1,37 @@ +#![allow(dead_code)] + +pub mod alloc; +pub mod control; +pub mod raw; +pub mod scopeguard; +pub mod util; + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum TryReserveError { + /// Error due to the computed capacity exceeding the collection's maximum + /// (usually `isize::MAX` bytes). + CapacityOverflow, + + /// The memory allocator returned an error + AllocError { + /// The layout of the allocation request that failed. + layout: std::alloc::Layout, + }, +} + +// matches stdalloc::collections::TryReserveError +impl core::fmt::Display for TryReserveError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("memory allocation failed")?; + let reason = match self { + TryReserveError::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveError::AllocError { .. } => " because the memory allocator returned an error", + }; + f.write_str(reason) + } +} + +impl core::error::Error for TryReserveError {} diff --git a/src/hashbrown/raw.rs b/src/hashbrown/raw.rs new file mode 100644 index 0000000..f6b2e69 --- /dev/null +++ b/src/hashbrown/raw.rs @@ -0,0 +1,4575 @@ +use super::control::BitMaskIter; +use super::control::Group; +use super::control::Tag; +use super::control::TagSliceExt; +use super::scopeguard::guard; +use super::scopeguard::ScopeGuard; +use super::util::invalid_mut; +use super::util::likely; +use super::util::unlikely; +use super::TryReserveError; +use core::array; +use core::iter::FusedIterator; +use core::marker::PhantomData; +use core::mem; +use core::ptr; +use core::ptr::NonNull; +use core::slice; +use std::alloc::handle_alloc_error; +use std::alloc::Layout; + +use super::alloc::do_alloc; +#[cfg(test)] +use super::alloc::AllocError; +use super::alloc::Allocator; +use super::alloc::Global; + +#[inline] +unsafe fn offset_from(to: *const T, from: *const T) -> usize { + unsafe { to.offset_from(from) as usize } +} + +/// Whether memory allocation errors should return an error or abort. +#[derive(Copy, Clone)] +enum Fallibility { + Fallible, + Infallible, +} + +impl Fallibility { + /// Error to return on capacity overflow. + #[cfg_attr(feature = "inline-more", inline)] + fn capacity_overflow(self) -> TryReserveError { + match self { + Fallibility::Fallible => TryReserveError::CapacityOverflow, + Fallibility::Infallible => panic!("Hash table capacity overflow"), + } + } + + /// Error to return on allocation error. + #[cfg_attr(feature = "inline-more", inline)] + fn alloc_err(self, layout: Layout) -> TryReserveError { + match self { + Fallibility::Fallible => TryReserveError::AllocError { layout }, + Fallibility::Infallible => handle_alloc_error(layout), + } + } +} + +trait SizedTypeProperties: Sized { + const IS_ZERO_SIZED: bool = mem::size_of::() == 0; + const NEEDS_DROP: bool = mem::needs_drop::(); +} + +impl SizedTypeProperties for T {} + +/// Primary hash function, used to select the initial bucket to probe from. +#[inline] +#[expect(clippy::cast_possible_truncation)] +fn h1(hash: u64) -> usize { + // On 32-bit platforms we simply ignore the higher hash bits. + hash as usize +} + +/// Probe sequence based on triangular numbers, which is guaranteed (since our +/// table size is a power of two) to visit every group of elements exactly once. +/// +/// A triangular probe has us jump by 1 more group every time. So first we +/// jump by 1 group (meaning we just continue our linear scan), then 2 groups +/// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on. +/// +/// Proof that the probe will visit every group in the table: +/// +#[derive(Clone)] +struct ProbeSeq { + pos: usize, + stride: usize, +} + +impl ProbeSeq { + #[inline] + fn move_next(&mut self, bucket_mask: usize) { + // We should have found an empty bucket by now and ended the probe. + debug_assert!( + self.stride <= bucket_mask, + "Went past end of probe sequence" + ); + + self.stride += Group::WIDTH; + self.pos += self.stride; + self.pos &= bucket_mask; + } +} + +/// Returns the number of buckets needed to hold the given number of items, +/// taking the maximum load factor into account. +/// +/// Returns `None` if an overflow occurs. +/// +/// This ensures that `buckets * table_layout.size >= table_layout.ctrl_align`. +// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258 +#[cfg_attr(target_os = "emscripten", inline(never))] +#[cfg_attr(not(target_os = "emscripten"), inline)] +fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { + debug_assert_ne!(cap, 0); + + // For small tables we require at least 1 empty bucket so that lookups are + // guaranteed to terminate if an element doesn't exist in the table. + if cap < 15 { + // Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a + // platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small + // bucket sizes, this ends up wasting quite a few bytes just to pad to + // the relatively larger ctrl_align: + // + // | capacity | buckets | bytes allocated | bytes per item | + // | -------- | ------- | --------------- | -------------- | + // | 3 | 4 | 36 | (Yikes!) 12.0 | + // | 7 | 8 | 40 | (Poor) 5.7 | + // | 14 | 16 | 48 | 3.4 | + // | 28 | 32 | 80 | 3.3 | + // + // In general, buckets * table_layout.size >= table_layout.ctrl_align + // must be true to avoid these edges. This is implemented by adjusting + // the minimum capacity upwards for small items. This code only needs + // to handle ctrl_align which are less than or equal to Group::WIDTH, + // because valid layout sizes are always a multiple of the alignment, + // so anything with alignment over the Group::WIDTH won't hit this edge + // case. + + // This is brittle, e.g. if we ever add 32 byte groups, it will select + // 3 regardless of the table_layout.size. + let min_cap = match (Group::WIDTH, table_layout.size) { + (16, 0..=1) => 14, + (16, 2..=3) | (8, 0..=1) => 7, + _ => 3, + }; + let cap = min_cap.max(cap); + // We don't bother with a table size of 2 buckets since that can only + // hold a single element. Instead, we skip directly to a 4 bucket table + // which can hold 3 elements. + let buckets = if cap < 4 { + 4 + } else if cap < 8 { + 8 + } else { + 16 + }; + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + return Some(buckets); + } + + // Otherwise require 1/8 buckets to be empty (87.5% load) + // + // Be careful when modifying this, calculate_layout relies on the + // overflow check here. + let adjusted_cap = cap.checked_mul(8)? / 7; + + // Any overflows will have been caught by the checked_mul. Also, any + // rounding errors from the division above will be cleaned up by + // next_power_of_two (which can't overflow because of the previous division). + let buckets = adjusted_cap.next_power_of_two(); + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + Some(buckets) +} + +// `maximum_buckets_in` relies on the property that for non-ZST `T`, any +// chosen `buckets` will satisfy `buckets * table_layout.size >= +// table_layout.ctrl_align`, so `calculate_layout_for` does not need to add +// extra padding beyond `table_layout.size * buckets`. If small-table bucket +// selection or growth policy changes, revisit `maximum_buckets_in`. +#[inline] +fn ensure_bucket_bytes_at_least_ctrl_align(table_layout: TableLayout, buckets: usize) { + if table_layout.size != 0 { + let prod = table_layout.size.saturating_mul(buckets); + debug_assert!(prod >= table_layout.ctrl_align); + } +} + +/// Returns the maximum effective capacity for the given bucket mask, taking +/// the maximum load factor into account. +#[inline] +fn bucket_mask_to_capacity(bucket_mask: usize) -> usize { + if bucket_mask < 8 { + // For tables with 1/2/4/8 buckets, we always reserve one empty slot. + // Keep in mind that the bucket mask is one less than the bucket count. + bucket_mask + } else { + // For larger tables we reserve 12.5% of the slots as empty. + ((bucket_mask + 1) / 8) * 7 + } +} + +/// Helper which allows the max calculation for `ctrl_align` to be statically computed for each `T` +/// while keeping the rest of `calculate_layout_for` independent of `T` +#[derive(Copy, Clone)] +struct TableLayout { + size: usize, + ctrl_align: usize, +} + +impl TableLayout { + #[inline] + const fn new() -> Self { + let layout = Layout::new::(); + Self { + size: layout.size(), + ctrl_align: if layout.align() > Group::WIDTH { + layout.align() + } else { + Group::WIDTH + }, + } + } + + #[inline] + fn calculate_layout_for(self, buckets: usize) -> Option<(Layout, usize)> { + debug_assert!(buckets.is_power_of_two()); + + let TableLayout { size, ctrl_align } = self; + // Manual layout calculation since Layout methods are not yet stable. + let ctrl_offset = + size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); + let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; + + // We need an additional check to ensure that the allocation doesn't + // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295). + if len > isize::MAX as usize - (ctrl_align - 1) { + return None; + } + + Some(( + unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, + ctrl_offset, + )) + } +} + +/// A reference to a hash table bucket containing a `T`. +/// +/// This is usually just a pointer to the element itself. However if the element +/// is a ZST, then we instead track the index of the element in the table so +/// that `erase` works properly. +pub struct Bucket { + // Actually it is pointer to next element than element itself + // this is needed to maintain pointer arithmetic invariants + // keeping direct pointer to element introduces difficulty. + // Using `NonNull` for variance and niche layout + ptr: NonNull, +} + +// This Send impl is needed for rayon support. This is safe since Bucket is +// never exposed in a public API. +unsafe impl Send for Bucket {} + +impl Clone for Bucket { + #[inline] + fn clone(&self) -> Self { + Self { ptr: self.ptr } + } +} + +impl Bucket { + /// Creates a [`Bucket`] that contain pointer to the data. + /// The pointer calculation is performed by calculating the + /// offset from given `base` pointer (convenience for + /// `base.as_ptr().sub(index)`). + /// + /// `index` is in units of `T`; e.g., an `index` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// If the `T` is a ZST, then we instead track the index of the element + /// in the table so that `erase` works properly (return + /// `NonNull::new_unchecked((index + 1) as *mut T)`) + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and the safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for the [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * the `base` pointer must not be `dangling` and must points to the + /// end of the first `value element` from the `data part` of the table, i.e. + /// must be the pointer that returned by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + #[inline] + unsafe fn from_base_index(base: NonNull, index: usize) -> Self { + // If mem::size_of::() != 0 then return a pointer to an `element` in + // the data part of the table (we start counting from "0", so that + // in the expression T[last], the "last" index actually one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"): + // + // `from_base_index(base, 1).as_ptr()` returns a pointer that + // points here in the data part of the table + // (to the start of T1) + // | + // | `base: NonNull` must point here + // | (to the end of T0 or to the start of C0) + // v v + // [Padding], Tlast, ..., |T1|, T0, |C0, C1, ..., Clast + // ^ + // `from_base_index(base, 1)` returns a pointer + // that points here in the data part of the table + // (to the end of T1) + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes + // or metadata for data. + let ptr = if T::IS_ZERO_SIZED { + // won't overflow because index must be less than length (bucket_mask) + // and bucket_mask is guaranteed to be less than `isize::MAX` + // (see TableLayout::calculate_layout_for method) + invalid_mut(index + 1) + } else { + unsafe { base.as_ptr().sub(index) } + }; + Self { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + } + } + + /// Calculates the index of a [`Bucket`] as distance between two pointers + /// (convenience for `base.as_ptr().offset_from(self.ptr.as_ptr()) as usize`). + /// The returned value is in units of T: the distance in bytes divided by + /// [`core::mem::size_of::()`]. + /// + /// If the `T` is a ZST, then we return the index of the element in + /// the table so that `erase` works properly (return `self.ptr.as_ptr() as usize - 1`). + /// + /// This function is the inverse of [`from_base_index`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*const T>::offset_from`] method of `*const T`. + /// + /// Thus, in order to uphold the safety contracts for [`<*const T>::offset_from`] + /// method, as well as for the correct logic of the work of this crate, the + /// following rules are necessary and sufficient: + /// + /// * `base` contained pointer must not be `dangling` and must point to the + /// end of the first `element` from the `data part` of the table, i.e. + /// must be a pointer that returns by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `self` also must not contain dangling pointer; + /// + /// * both `self` and `base` must be created from the same [`RawTable`] + /// (or [`RawTableInner`]). + /// + /// If `mem::size_of::() == 0`, this function is always safe. + #[inline] + unsafe fn to_base_index(&self, base: NonNull) -> usize { + // If mem::size_of::() != 0 then return an index under which we used to store the + // `element` in the data part of the table (we start counting from "0", so + // that in the expression T[last], the "last" index actually is one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"). + // For example for 5th element in table calculation is performed like this: + // + // mem::size_of::() + // | + // | `self = from_base_index(base, 5)` that returns pointer + // | that points here in the data part of the table + // | (to the end of T5) + // | | `base: NonNull` must point here + // v | (to the end of T0 or to the start of C0) + // /???\ v v + // [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast + // \__________ __________/ + // \/ + // `bucket.to_base_index(base)` = 5 + // (base.as_ptr() as usize - self.ptr.as_ptr() as usize) / mem::size_of::() + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes or metadata for data. + if T::IS_ZERO_SIZED { + // this can not be UB + self.ptr.as_ptr() as usize - 1 + } else { + unsafe { offset_from(base.as_ptr(), self.ptr.as_ptr()) } + } + } + + /// Acquires the underlying raw pointer `*mut T` to `data`. + /// + /// # Note + /// + /// If `T` is not [`Copy`], do not use `*mut T` methods that can cause calling the + /// destructor of `T` (for example the [`<*mut T>::drop_in_place`] method), because + /// for properly dropping the data we also need to clear `data` control bytes. If we + /// drop data, but do not clear `data control byte` it leads to double drop when + /// [`RawTable`] goes out of scope. + /// + /// If you modify an already initialized `value`, so [`Hash`] and [`Eq`] on the new + /// `T` value and its borrowed form *must* match those for the old `T` value, as the map + /// will not re-evaluate where the new value should go, meaning the value may become + /// "lost" if their location does not reflect their state. + #[inline] + pub fn as_ptr(&self) -> *mut T { + if T::IS_ZERO_SIZED { + // Just return an arbitrary ZST pointer which is properly aligned + // invalid pointer is good enough for ZST + invalid_mut(mem::align_of::()) + } else { + unsafe { self.ptr.as_ptr().sub(1) } + } + } + + /// Acquires the underlying non-null pointer `*mut T` to `data`. + #[inline] + fn as_non_null(&self) -> NonNull { + // SAFETY: `self.ptr` is already a `NonNull` + unsafe { NonNull::new_unchecked(self.as_ptr()) } + } + + /// Create a new [`Bucket`] that is offset from the `self` by the given + /// `offset`. The pointer calculation is performed by calculating the + /// offset from `self` pointer (convenience for `self.ptr.as_ptr().sub(offset)`). + /// This function is used for iterators. + /// + /// `offset` is in units of `T`; e.g., a `offset` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * `self` contained pointer must not be `dangling`; + /// + /// * `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other + /// words, `self.to_base_index() + offset + 1` must be no greater than the number returned + /// by the function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other words, + /// `self.to_base_index() + offset + 1` must be no greater than the number returned by the + /// function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + #[inline] + unsafe fn next_n(&self, offset: usize) -> Self { + let ptr = if T::IS_ZERO_SIZED { + // invalid pointer is good enough for ZST + invalid_mut(self.ptr.as_ptr() as usize + offset) + } else { + unsafe { self.ptr.as_ptr().sub(offset) } + }; + Self { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + } + } + + /// Executes the destructor (if any) of the pointed-to `data`. + /// + /// # Safety + /// + /// See [`ptr::drop_in_place`] for safety concerns. + /// + /// You should use [`RawTable::erase`] instead of this function, + /// or be careful with calling this function directly, because for + /// properly dropping the data we need also clear `data` control bytes. + /// If we drop data, but do not erase `data control byte` it leads to + /// double drop when [`RawTable`] goes out of scope. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn drop(&self) { + unsafe { + self.as_ptr().drop_in_place(); + } + } + + /// Reads the `value` from `self` without moving it. This leaves the + /// memory in `self` unchanged. + /// + /// # Safety + /// + /// See [`ptr::read`] for safety concerns. + /// + /// You should use [`RawTable::remove`] instead of this function, + /// or be careful with calling this function directly, because compiler + /// calls its destructor when the read `value` goes out of scope. It + /// can cause double dropping when [`RawTable`] goes out of scope, + /// because of not erased `data control byte`. + #[inline] + pub unsafe fn read(&self) -> T { + unsafe { self.as_ptr().read() } + } + + /// Overwrites a memory location with the given `value` without reading + /// or dropping the old value (like [`ptr::write`] function). + /// + /// # Safety + /// + /// See [`ptr::write`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + #[inline] + pub unsafe fn write(&self, val: T) { + unsafe { + self.as_ptr().write(val); + } + } + + /// Returns a shared immutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_ref`] for safety concerns. + #[inline] + pub unsafe fn as_ref<'a>(&self) -> &'a T { + unsafe { &*self.as_ptr() } + } + + /// Returns a unique mutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_mut`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + #[inline] + pub unsafe fn as_mut<'a>(&self) -> &'a mut T { + unsafe { &mut *self.as_ptr() } + } +} + +/// A raw hash table with an unsafe API. +pub struct RawTable { + table: RawTableInner, + alloc: A, + // Tell dropck that we own instances of T. + marker: PhantomData, +} + +/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless +/// of how many different key-value types are used. +struct RawTableInner { + // Mask to get an index from a hash value. The value is one less than the + // number of buckets in the table. + bucket_mask: usize, + + // [Padding], T_n, ..., T1, T0, C0, C1, ... + // ^ points here + ctrl: NonNull, + + // Number of elements that can be inserted before we need to grow the table + growth_left: usize, + + // Number of elements in the table, only really used by len() + items: usize, +} + +impl RawTable { + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + pub const fn new() -> Self { + Self { + table: RawTableInner::NEW, + alloc: Global, + marker: PhantomData, + } + } + + /// Allocates a new hash table with at least enough capacity for inserting + /// the given number of elements without reallocating. + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } +} + +impl RawTable { + const TABLE_LAYOUT: TableLayout = TableLayout::new::(); + + /// Creates a new empty hash table without allocating any memory, using the + /// given allocator. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + pub const fn new_in(alloc: A) -> Self { + Self { + table: RawTableInner::NEW, + alloc, + marker: PhantomData, + } + } + + /// Allocates a new hash table with the given number of buckets. + /// + /// The control bytes are left uninitialized. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new_uninitialized( + alloc: A, + buckets: usize, + fallibility: Fallibility, + ) -> Result { + debug_assert!(buckets.is_power_of_two()); + + Ok(Self { + table: unsafe { + RawTableInner::new_uninitialized(&alloc, Self::TABLE_LAYOUT, buckets, fallibility) + }?, + alloc, + marker: PhantomData, + }) + } + + /// Allocates a new hash table using the given allocator, with at least enough capacity for + /// inserting the given number of elements without reallocating. + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self { + table: RawTableInner::with_capacity(&alloc, Self::TABLE_LAYOUT, capacity), + alloc, + marker: PhantomData, + } + } + + /// Returns a reference to the underlying allocator. + #[inline] + pub fn allocator(&self) -> &A { + &self.alloc + } + + /// Returns pointer to one past last `data` element in the table as viewed from + /// the start point of the allocation. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub fn data_end(&self) -> NonNull { + // `self.table.ctrl.cast()` returns pointer that + // points here (to the end of `T0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTable::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + self.table.ctrl.cast() + } + + /// Returns pointer to start of data table. + #[inline] + pub unsafe fn data_start(&self) -> NonNull { + unsafe { NonNull::new_unchecked(self.data_end().as_ptr().wrapping_sub(self.num_buckets())) } + } + + /// Returns the total amount of memory allocated internally by the hash + /// table, in bytes. + /// + /// The returned number is informational only. It is intended to be + /// primarily used for memory profiling. + #[inline] + pub fn allocation_size(&self) -> usize { + // SAFETY: We use the same `table_layout` that was used to allocate + // this table. + unsafe { self.table.allocation_size_or_zero(Self::TABLE_LAYOUT) } + } + + /// Returns the index of a bucket from a `Bucket`. + #[inline] + pub unsafe fn bucket_index(&self, bucket: &Bucket) -> usize { + unsafe { bucket.to_base_index(self.data_end()) } + } + + /// Returns a pointer to an element in the table. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the caller of this function must observe the + /// following safety rules: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTable::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e. + /// `(index + 1) <= self.num_buckets()`. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub unsafe fn bucket(&self, index: usize) -> Bucket { + // If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + // the "buckets" number of our `RawTable`, i.e. "n = RawTable::num_buckets() - 1"): + // + // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`) + // | + // | `base = self.data_end()` points here + // | (to the start of CT0 or to the end of T0) + // v v + // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + // ^ \__________ __________/ + // `table.bucket(3)` returns a pointer that points \/ + // here in the `data` part of the `RawTable` (to additional control bytes + // the end of T3) `m = Group::WIDTH - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`; + // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask` + // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.table.bucket_mask = self.num_buckets() - 1`. + debug_assert_ne!(self.table.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { Bucket::from_base_index(self.data_end(), index) } + } + + /// Erases an element from the table without dropping it. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn erase_no_drop(&mut self, item: &Bucket) { + unsafe { + let index = self.bucket_index(item); + self.table.erase(index); + } + } + + /// Erases an element from the table, dropping it in place. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub unsafe fn erase(&mut self, item: Bucket) { + unsafe { + // Erase the element from the table first since drop might panic. + self.erase_no_drop(&item); + item.drop(); + } + } + + /// Removes an element from the table, returning it. + /// + /// This also returns an index to the newly free bucket. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub unsafe fn remove(&mut self, item: Bucket) -> (T, usize) { + unsafe { + self.erase_no_drop(&item); + (item.read(), self.bucket_index(&item)) + } + } + + /// Removes an element from the table, returning it. + /// + /// This also returns an index to the newly free bucket + /// and the former `Tag` for that bucket. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub(crate) unsafe fn remove_tagged(&mut self, item: Bucket) -> (T, usize, Tag) { + unsafe { + let index = self.bucket_index(&item); + let tag = *self.table.ctrl(index); + self.table.erase(index); + (item.read(), index, tag) + } + } + + /// Finds and removes an element from the table, returning it. + #[cfg_attr(feature = "inline-more", inline)] + pub fn remove_entry( + &mut self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { self.remove(bucket).0 })), + None => Ok(None), + } + } + + /// Marks all table buckets as empty without dropping their contents. + #[cfg_attr(feature = "inline-more", inline)] + pub fn clear_no_drop(&mut self) { + self.table.clear_no_drop(); + } + + /// Removes all elements from the table without freeing the backing memory. + #[cfg_attr(feature = "inline-more", inline)] + pub fn clear(&mut self) { + if self.is_empty() { + // Special case empty table to avoid surprising O(capacity) time. + return; + } + // Ensure that the table is reset even if one of the drops panic + let mut self_ = guard(self, |self_| self_.clear_no_drop()); + unsafe { + // SAFETY: ScopeGuard sets to zero the `items` field of the table + // even in case of panic during the dropping of the elements so + // that there will be no double drop of the elements. + self_.table.drop_elements::(); + } + } + + /// Shrinks the table to fit `max(self.len(), min_size)` elements. + #[cfg_attr(feature = "inline-more", inline)] + pub fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) { + // Calculate the minimal number of elements that we need to reserve + // space for. + let min_size = usize::max(self.table.items, min_size); + if min_size == 0 { + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + return; + } + + // Calculate the number of buckets that we need for this number of + // elements. If the calculation overflows then the requested bucket + // count must be larger than what we have right and nothing needs to be + // done. + let Some(min_buckets) = capacity_to_buckets(min_size, Self::TABLE_LAYOUT) else { + return; + }; + + // If we have more buckets than we need, shrink the table. + if min_buckets < self.num_buckets() { + // Fast path if the table is empty + if self.table.items == 0 { + let new_inner = + RawTableInner::with_capacity(&self.alloc, Self::TABLE_LAYOUT, min_size); + let mut old_inner = mem::replace(&mut self.table, new_inner); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } else { + // SAFETY: + // 1. We know for sure that `min_size >= self.table.items`. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + let result = unsafe { self.resize(min_size, hasher, Fallibility::Infallible) }; + + // SAFETY: The result of calling the `resize` function cannot be an error + // because `fallibility == Fallibility::Infallible. + unsafe { result.unwrap_unchecked() }; + } + } + } + + /// Ensures that at least `additional` items can be inserted into the table + /// without reallocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { + if likely(additional > self.table.growth_left) { + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + let result = + unsafe { self.reserve_rehash(additional, hasher, Fallibility::Infallible) }; + + // SAFETY: All allocation errors will be caught inside `RawTableInner::reserve_rehash`. + unsafe { result.unwrap_unchecked() }; + } + } + + /// Tries to ensure that at least `additional` items can be inserted into + /// the table without reallocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn try_reserve( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + ) -> Result<(), TryReserveError> { + if additional > self.table.growth_left { + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + unsafe { self.reserve_rehash(additional, hasher, Fallibility::Fallible) } + } else { + Ok(()) + } + } + + /// Out-of-line slow path for `reserve` and `try_reserve`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cold] + #[inline(never)] + unsafe fn reserve_rehash( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + fallibility: Fallibility, + ) -> Result<(), TryReserveError> { + unsafe { + // SAFETY: + // 1. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 2. The `drop` function is the actual drop function of the elements stored in + // the table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + self.table.reserve_rehash_inner( + &self.alloc, + additional, + &|table, index| hasher(table.bucket::(index).as_ref()), + fallibility, + Self::TABLE_LAYOUT, + if T::NEEDS_DROP { + Some(|ptr| ptr::drop_in_place(ptr.cast::())) + } else { + None + }, + ) + } + } + + /// Allocates a new table of a different size and moves the contents of the + /// current table into it. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// The caller of this function must ensure that `capacity >= self.table.items` + /// otherwise: + /// + /// * If `self.table.items != 0`, calling of this function with `capacity` + /// equal to 0 (`capacity == 0`) results in [`undefined behavior`]. + /// + /// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)` + /// calling this function are never return (will loop infinitely). + /// + /// See [`RawTableInner::find_insert_index`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn resize( + &mut self, + capacity: usize, + hasher: impl Fn(&T) -> u64, + fallibility: Fallibility, + ) -> Result<(), TryReserveError> { + // SAFETY: + // 1. The caller of this function guarantees that `capacity >= self.table.items`. + // 2. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.table.resize_inner( + &self.alloc, + capacity, + &|table, index| hasher(table.bucket::(index).as_ref()), + fallibility, + Self::TABLE_LAYOUT, + ) + } + } + + /// Inserts a new element into the table, and returns its raw bucket. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { + unsafe { + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose `RawTable::new_uninitialized` in a public API. + // + // 2. We reserve additional space (if necessary) right after calling this function. + let mut index = self.table.find_insert_index(hash); + + // We can avoid growing the table once we have reached our load factor if we are replacing + // a tombstone. This works since the number of EMPTY slots does not change in this case. + // + // SAFETY: The function is guaranteed to return an index in the range `0..=self.num_buckets()`. + let old_ctrl = *self.table.ctrl(index); + if unlikely(self.table.growth_left == 0 && old_ctrl.special_is_empty()) { + self.reserve(1, hasher); + // SAFETY: We know for sure that `RawTableInner` has control bytes + // initialized and that there is extra space in the table. + index = self.table.find_insert_index(hash); + } + + self.insert_at_index(hash, index, value) + } + } + + /// Inserts a new element into the table, and returns a mutable reference to it. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert_entry(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> &mut T { + unsafe { self.insert(hash, value, hasher).as_mut() } + } + + /// Inserts a new element into the table, without growing the table. + /// + /// There must be enough space in the table to insert the new element. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { + unsafe { + let (index, old_ctrl) = self.table.prepare_insert_index(hash); + let bucket = self.table.bucket(index); + + // If we are replacing a DELETED entry then we don't need to update + // the load counter. + self.table.growth_left -= old_ctrl.special_is_empty() as usize; + + bucket.write(value); + self.table.items += 1; + bucket + } + } + + /// Temporarily removes a bucket, applying the given function to the removed + /// element and optionally put back the returned value in the same bucket. + /// + /// Returns tag for bucket if the bucket is emptied out. + /// + /// This does not check if the given bucket is actually occupied. + #[cfg_attr(feature = "inline-more", inline)] + pub(crate) unsafe fn replace_bucket_with(&mut self, bucket: Bucket, f: F) -> Option + where + F: FnOnce(T) -> Option, + { + unsafe { + let index = self.bucket_index(&bucket); + let old_ctrl = *self.table.ctrl(index); + debug_assert!(self.is_bucket_full(index)); + let old_growth_left = self.table.growth_left; + let item = self.remove(bucket).0; + if let Some(new_item) = f(item) { + self.table.growth_left = old_growth_left; + self.table.set_ctrl(index, old_ctrl); + self.table.items += 1; + self.bucket(index).write(new_item); + None + } else { + Some(old_ctrl) + } + } + } + + /// Searches for an element in the table. If the element is not found, + /// returns `Err` with the position of a slot where an element with the + /// same hash could be inserted. + /// + /// This function may resize the table if additional space is required for + /// inserting an element. + #[inline] + pub fn find_or_find_insert_index( + &mut self, + hash: u64, + mut eq: impl FnMut(&T) -> Result, + hasher: impl Fn(&T) -> u64, + ) -> Result, usize>, E> { + self.reserve(1, hasher); + + unsafe { + // SAFETY: + // 1. We know for sure that there is at least one empty `bucket` in the table. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will + // never expose `RawTable::new_uninitialized` in a public API. + // 3. The `find_or_find_insert_index_inner` function returns the `index` of only the full bucket, + // which is in the range `0..self.num_buckets()` (since there is at least one empty `bucket` in + // the table), so calling `self.bucket(index)` and `Bucket::as_ref` is safe. + let indexes = self + .table + .find_or_find_insert_index_inner(hash, &mut |index| { + eq(self.bucket(index).as_ref()) + })?; + + match indexes { + // SAFETY: See explanation above. + Ok(index) => Ok(Ok(self.bucket(index))), + Err(index) => Ok(Err(index)), + } + } + } + + /// Inserts a new element into the table at the given index with the given hash, + /// and returns its raw bucket. + /// + /// # Safety + /// + /// `index` must point to a slot previously returned by + /// `find_or_find_insert_index`, and no mutation of the table must have + /// occurred since that call. + #[inline] + pub unsafe fn insert_at_index(&mut self, hash: u64, index: usize, value: T) -> Bucket { + unsafe { self.insert_tagged_at_index(Tag::full(hash), index, value) } + } + + /// Inserts a new element into the table at the given index with the given tag, + /// and returns its raw bucket. + /// + /// # Safety + /// + /// `index` must point to a slot previously returned by + /// `find_or_find_insert_index`, and no mutation of the table must have + /// occurred since that call. + #[inline] + pub(crate) unsafe fn insert_tagged_at_index( + &mut self, + tag: Tag, + index: usize, + value: T, + ) -> Bucket { + unsafe { + let old_ctrl = *self.table.ctrl(index); + self.table.record_item_insert_at(index, old_ctrl, tag); + + let bucket = self.bucket(index); + bucket.write(value); + bucket + } + } + + /// Searches for an element in the table. + #[inline] + pub fn find( + &self, + hash: u64, + mut eq: impl FnMut(&T) -> Result, + ) -> Result>, E> { + unsafe { + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since we + // will never expose `RawTable::new_uninitialized` in a public API. + // 1. The `find_inner` function returns the `index` of only the full bucket, which is in + // the range `0..self.num_buckets()`, so calling `self.bucket(index)` and `Bucket::as_ref` + // is safe. + let result = self + .table + .find_inner(hash, &mut |index| eq(self.bucket(index).as_ref()))?; + + // Avoid `Option::map` because it bloats LLVM IR. + match result { + // SAFETY: See explanation above. + Some(index) => Ok(Some(self.bucket(index))), + None => Ok(None), + } + } + } + + /// Gets a reference to an element in the table. + #[inline] + pub fn get( + &self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { bucket.as_ref() })), + None => Ok(None), + } + } + + /// Gets a mutable reference to an element in the table. + #[inline] + #[allow(clippy::mut_from_ref)] + pub fn get_mut( + &self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { bucket.as_mut() })), + None => Ok(None), + } + } + + /// Gets a reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket(&self, index: usize) -> Option<&T> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_ref()) + } else { + None + } + } + } + + /// Gets a mutable reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket_mut(&mut self, index: usize) -> Option<&mut T> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_mut()) + } else { + None + } + } + } + + /// Returns a pointer to an element in the table, but only after verifying that + /// the index is in-bounds and the bucket is occupied. + #[inline] + pub fn checked_bucket(&self, index: usize) -> Option> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index)) + } else { + None + } + } + } + + /// Attempts to get mutable references to `N` entries in the table at once. + /// + /// Returns an array of length `N` with the results of each query. + /// + /// At most one mutable reference will be returned to any entry. `None` will be returned if any + /// of the hashes are duplicates. `None` will be returned if the hash is not found. + /// + /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to + /// the `i`th key to be looked up. + pub fn get_disjoint_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> [Option<&'_ mut T>; N] { + unsafe { + let ptrs = self.get_disjoint_mut_pointers(hashes, eq); + + for (i, cur) in ptrs.iter().enumerate() { + if cur.is_some() && ptrs[..i].contains(cur) { + panic!("duplicate keys found"); + } + } + // All bucket are distinct from all previous buckets so we're clear to return the result + // of the lookup. + + ptrs.map(|ptr| ptr.map(|mut ptr| ptr.as_mut())) + } + } + + pub unsafe fn get_disjoint_unchecked_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> [Option<&'_ mut T>; N] { + let ptrs = unsafe { self.get_disjoint_mut_pointers(hashes, eq) }; + ptrs.map(|ptr| ptr.map(|mut ptr| unsafe { ptr.as_mut() })) + } + + unsafe fn get_disjoint_mut_pointers( + &mut self, + hashes: [u64; N], + mut eq: impl FnMut(usize, &T) -> bool, + ) -> [Option>; N] { + array::from_fn(|i| { + self.find(hashes[i], |k| Ok::<_, ()>(eq(i, k))) + .unwrap() + .map(|cur| cur.as_non_null()) + }) + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the table might be able to hold + /// more, but is guaranteed to be able to hold at least this many. + #[inline] + pub fn capacity(&self) -> usize { + self.table.items + self.table.growth_left + } + + /// Returns the number of elements in the table. + #[inline] + pub fn len(&self) -> usize { + self.table.items + } + + /// Returns `true` if the table contains no elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the number of buckets in the table. + #[inline] + pub fn num_buckets(&self) -> usize { + self.table.bucket_mask + 1 + } + + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + pub unsafe fn is_bucket_full(&self, index: usize) -> bool { + unsafe { self.table.is_bucket_full(index) } + } + + /// Returns an iterator over every element in the table. It is up to + /// the caller to ensure that the `RawTable` outlives the `RawIter`. + /// Because we cannot make the `next` method unsafe on the `RawIter` + /// struct, we have to make the `iter` method unsafe. + #[inline] + pub unsafe fn iter(&self) -> RawIter { + // SAFETY: + // 1. The caller must uphold the safety contract for `iter` method. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + unsafe { self.table.iter() } + } + + /// Returns an iterator over occupied buckets that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHash`. Because we cannot make the `next` method unsafe on the + /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash { + unsafe { RawIterHash::new(self, hash) } + } + + /// Returns an iterator over occupied bucket indices that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHashIndices`. Because we cannot make the `next` method unsafe on the + /// `RawIterHashIndices` struct, we have to make the `iter_hash_buckets` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn iter_hash_buckets(&self, hash: u64) -> RawIterHashIndices { + unsafe { RawIterHashIndices::new(&self.table, hash) } + } + + /// Returns an iterator over full buckets indices in the table. + /// + /// See [`RawTableInner::full_buckets_indices`] for safety conditions. + #[inline(always)] + pub unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + unsafe { self.table.full_buckets_indices() } + } + + /// Returns an iterator which removes all elements from the table without + /// freeing the memory. + #[cfg_attr(feature = "inline-more", inline)] + pub fn drain(&mut self) -> RawDrain<'_, T, A> { + unsafe { + let iter = self.iter(); + self.drain_iter_from(iter) + } + } + + /// Returns an iterator which removes all elements from the table without + /// freeing the memory. + /// + /// Iteration starts at the provided iterator's current location. + /// + /// It is up to the caller to ensure that the iterator is valid for this + /// `RawTable` and covers all items that remain in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn drain_iter_from(&mut self, iter: RawIter) -> RawDrain<'_, T, A> { + debug_assert_eq!(iter.len(), self.len()); + RawDrain { + iter, + table: mem::replace(&mut self.table, RawTableInner::NEW), + orig_table: NonNull::from(&mut self.table), + marker: PhantomData, + } + } + + /// Returns an iterator which consumes all elements from the table. + /// + /// Iteration starts at the provided iterator's current location. + /// + /// It is up to the caller to ensure that the iterator is valid for this + /// `RawTable` and covers all items that remain in the table. + pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { + debug_assert_eq!(iter.len(), self.len()); + + let allocation = self.into_allocation(); + RawIntoIter { + iter, + allocation, + marker: PhantomData, + } + } + + /// Converts the table into a raw allocation. The contents of the table + /// should be dropped using a `RawIter` before freeing the allocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn into_allocation(self) -> Option<(NonNull, Layout, A)> { + let alloc = if self.table.is_empty_singleton() { + None + } else { + let (layout, ctrl_offset) = { + let option = Self::TABLE_LAYOUT.calculate_layout_for(self.table.num_buckets()); + unsafe { option.unwrap_unchecked() } + }; + Some(( + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset).cast()) }, + layout, + unsafe { ptr::read(&raw const self.alloc) }, + )) + }; + mem::forget(self); + alloc + } +} + +unsafe impl Send for RawTable +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawTable +where + T: Sync, + A: Sync, +{ +} + +impl RawTableInner { + const NEW: Self = RawTableInner::new(); + + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never accessed + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + const fn new() -> Self { + Self { + // Be careful to cast the entire slice to a raw pointer. + ctrl: unsafe { + NonNull::new_unchecked(Group::static_empty().as_ptr().cast_mut().cast()) + }, + bucket_mask: 0, + items: 0, + growth_left: 0, + } + } +} + +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +pub fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +/// Finds the largest number of buckets that can fit in `allocation_size` +/// provided the given TableLayout. +/// +/// This relies on some invariants of `capacity_to_buckets`, so only feed in +/// an `allocation_size` calculated from `capacity_to_buckets`. +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + // It may be helpful to understand this if you remember that: + // ctrl_offset = align(x * y, ctrl_align) + let x = (allocation_size - group_width) / (table_layout.size + 1); + prev_pow2(x) +} + +impl RawTableInner { + /// Allocates a new [`RawTableInner`] with the given number of buckets. + /// The control bytes and buckets are left uninitialized. + /// + /// # Safety + /// + /// The caller of this function must ensure that the `buckets` is power of two + /// and also initialize all control bytes of the length `self.bucket_mask + 1 + + /// Group::WIDTH` with the [`Tag::EMPTY`] bytes. + /// + /// See also [`Allocator`] API for other safety concerns. + /// + /// [`Allocator`]: stdalloc::alloc::Allocator + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new_uninitialized( + alloc: &A, + table_layout: TableLayout, + mut buckets: usize, + fallibility: Fallibility, + ) -> Result + where + A: Allocator, + { + debug_assert!(buckets.is_power_of_two()); + + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let Some((layout, mut ctrl_offset)) = table_layout.calculate_layout_for(buckets) else { + return Err(fallibility.capacity_overflow()); + }; + + let ptr: NonNull = match do_alloc(alloc, layout) { + Ok(block) => { + // The allocator can't return a value smaller than was + // requested, so this can be != instead of >=. + if block.len() != layout.size() { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (oversized_layout, oversized_ctrl_offset) = { + let option = table_layout.calculate_layout_for(x); + unsafe { option.unwrap_unchecked() } + }; + debug_assert!(oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + } + + block.cast() + } + Err(_) => return Err(fallibility.alloc_err(layout)), + }; + + // SAFETY: null pointer will be caught in above check + let ctrl = unsafe { NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)) }; + Ok(Self { + ctrl, + bucket_mask: buckets - 1, + items: 0, + growth_left: bucket_mask_to_capacity(buckets - 1), + }) + } + + /// Attempts to allocate a new [`RawTableInner`] with at least enough + /// capacity for inserting the given number of elements without reallocating. + /// + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. + #[inline] + fn fallible_with_capacity( + alloc: &A, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result + where + A: Allocator, + { + if capacity == 0 { + Ok(Self::NEW) + } else { + // SAFETY: We checked that we could successfully allocate the new table, and then + // initialized all control bytes with the constant `Tag::EMPTY` byte. + unsafe { + let buckets = capacity_to_buckets(capacity, table_layout) + .ok_or_else(|| fallibility.capacity_overflow())?; + + let mut result = + Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. + result.ctrl_slice().fill_empty(); + + Ok(result) + } + } + } + + /// Allocates a new [`RawTableInner`] with at least enough capacity for inserting + /// the given number of elements without reallocating. + /// + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to + /// handle memory allocation failure. + /// + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. + /// + /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity + /// [`abort`]: stdalloc::abort::handle_alloc_error + fn with_capacity(alloc: &A, table_layout: TableLayout, capacity: usize) -> Self + where + A: Allocator, + { + let result = + Self::fallible_with_capacity(alloc, table_layout, capacity, Fallibility::Infallible); + + // SAFETY: All allocation errors will be caught inside `RawTableInner::new_uninitialized`. + unsafe { result.unwrap_unchecked() } + } + + /// Fixes up an insertion index returned by the [`RawTableInner::find_insert_index_in_group`] method. + /// + /// In tables smaller than the group width (`self.num_buckets() < Group::WIDTH`), trailing control + /// bytes outside the range of the table are filled with [`Tag::EMPTY`] entries. These will unfortunately + /// trigger a match of [`RawTableInner::find_insert_index_in_group`] function. This is because + /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking + /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. + /// We detect this situation here and perform a second scan starting at the beginning of the table. + /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the + /// trailing control bytes (containing [`Tag::EMPTY`] bytes). + /// + /// If this function is called correctly, it is guaranteed to return an index of an empty or + /// deleted bucket in the range `0..self.num_buckets()` (see `Warning` and `Safety`). + /// + /// # Warning + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise if the table is less than + /// the group width (`self.num_buckets() < Group::WIDTH`) this function returns an index outside of the + /// table indices range `0..self.num_buckets()` (`0..=self.bucket_mask`). Attempt to write data at that + /// index will cause immediate [`undefined behavior`]. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::ctrl`] method. + /// Thus, in order to uphold those safety contracts, as well as for the correct logic of the work + /// of this crate, the following rules are necessary and sufficient: + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise calling this + /// function results in [`undefined behavior`]. + /// + /// * This function must only be used on insertion indices found by [`RawTableInner::find_insert_index_in_group`] + /// (after the `find_insert_index_in_group` function, but before insertion into the table). + /// + /// * The `index` must not be greater than the `self.bucket_mask`, i.e. `(index + 1) <= self.num_buckets()` + /// (this one is provided by the [`RawTableInner::find_insert_index_in_group`] function). + /// + /// Calling this function with an index not provided by [`RawTableInner::find_insert_index_in_group`] + /// may result in [`undefined behavior`] even if the index satisfies the safety rules of the + /// [`RawTableInner::ctrl`] function (`index < self.bucket_mask + 1 + Group::WIDTH`). + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn fix_insert_index(&self, mut index: usize) -> usize { + // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`. + if unlikely(unsafe { self.is_bucket_full(index) }) { + debug_assert!(self.bucket_mask < Group::WIDTH); + // SAFETY: + // + // * Since the caller of this function ensures that the control bytes are properly + // initialized and `ptr = self.ctrl(0)` points to the start of the array of control + // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH` + // and points to the properly initialized control bytes (see also + // `TableLayout::calculate_layout_for` and `ptr::read`); + // + // * Because the caller of this function ensures that the index was provided by the + // `self.find_insert_index_in_group()` function, so for for tables larger than the + // group width (self.num_buckets() >= Group::WIDTH), we will never end up in the given + // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_index_in_group` + // cannot return a full bucket index. For tables smaller than the group width, calling + // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside + // the range of the table are filled with EMPTY bytes (and we know for sure that there + // is at least one FULL bucket), so this second scan either finds an empty slot (due to + // the load factor) or hits the trailing control bytes (containing EMPTY). + index = unsafe { + Group::load_aligned(self.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit() + .unwrap_unchecked() + }; + } + index + } + + /// Finds the position to insert something in a group. + /// + /// **This may have false positives and must be fixed up with `fix_insert_index` + /// before it's used.** + /// + /// The function is guaranteed to return the index of an empty or deleted [`Bucket`] + /// in the range `0..self.num_buckets()` (`0..=self.bucket_mask`). + #[inline] + fn find_insert_index_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option { + let bit = group.match_empty_or_deleted().lowest_set_bit(); + + if likely(bit.is_some()) { + // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask) + } else { + None + } + } + + /// Searches for an element in the table, or a potential slot where that element could + /// be inserted (an empty or deleted [`Bucket`] index). + /// + /// This uses dynamic dispatch to reduce the amount of code generated, but that is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element (as `Ok(index)`). If the element is not found and there is at least 1 + /// empty or deleted [`Bucket`] in the table, the function is guaranteed to return + /// an index in the range `0..self.num_buckets()`, but in any case, if this function + /// returns `Err`, it will contain an index in the range `0..=self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the index returned by this function when the table is less than + /// the group width and if there was not at least one empty or deleted bucket in the table + /// will cause immediate [`undefined behavior`]. This is because in this case the function + /// will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] control + /// bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn find_or_find_insert_index_inner( + &self, + hash: u64, + eq: &mut dyn FnMut(usize) -> Result, + ) -> Result, E> { + let mut insert_index = None; + + let tag_hash = Tag::full(hash); + let mut probe_seq = self.probe_seq(hash); + + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because the number + // of buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + for bit in group.match_tag(tag_hash) { + let index = (probe_seq.pos + bit) & self.bucket_mask; + + if likely(eq(index)?) { + return Ok(Ok(index)); + } + } + + // We didn't find the element we were looking for in the group, try to get an + // insertion slot from the group if we don't have one yet. + if likely(insert_index.is_none()) { + insert_index = self.find_insert_index_in_group(&group, &probe_seq); + } + + if let Some(insert_index) = insert_index { + // Only stop the search if the group contains at least one empty element. + // Otherwise, the element that we are looking for might be in a following group. + if likely(group.match_empty().any_bit_set()) { + // We must have found a insert slot by now, since the current group contains at + // least one. For tables smaller than the group width, there will still be an + // empty element in the current (and only) group due to the load factor. + unsafe { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the index found by `self.find_insert_index_in_group` + return Ok(Err(self.fix_insert_index(insert_index))); + } + } + } + + probe_seq.move_next(self.bucket_mask); + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting a new + /// element and sets the hash for that slot. Returns an index of that slot and the + /// old control byte stored in the found index. + /// + /// This function does not check if the given element exists in the table. Also, + /// this function does not check if there is enough space in the table to insert + /// a new element. The caller of the function must make sure that the table has at + /// least 1 empty or deleted `bucket`, otherwise this function will never return + /// (will go into an infinite loop) for tables larger than the group width, or + /// return an index outside of the table indices range if the table is less than + /// the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return an `index` in the range `0..self.num_buckets()`, but in any case, + /// if this function returns an `index` it will be in the range `0..=self.num_buckets()`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for the + /// [`RawTableInner::set_ctrl_hash`] and [`RawTableInner::find_insert_index`] methods. + /// Thus, in order to uphold the safety contracts for that methods, as well as for + /// the correct logic of the work of this crate, you must observe the following rules + /// when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated and has properly initialized + /// control bytes otherwise calling this function results in [`undefined behavior`]. + /// + /// * The caller of this function must ensure that the "data" parts of the table + /// will have an entry in the returned index (matching the given hash) right + /// after calling this function. + /// + /// Attempt to write data at the `index` returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] + /// control bytes outside the table range. + /// + /// The caller must independently increase the `items` field of the table, and also, + /// if the old control byte was [`Tag::EMPTY`], then decrease the table's `growth_left` + /// field, and do not change it if the old control byte was [`Tag::DELETED`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn prepare_insert_index(&mut self, hash: u64) -> (usize, Tag) { + unsafe { + // SAFETY: Caller of this function ensures that the control bytes are properly initialized. + let index: usize = self.find_insert_index(hash); + // SAFETY: + // 1. The `find_insert_index` function either returns an `index` less than or + // equal to `self.num_buckets() = self.bucket_mask + 1` of the table, or never + // returns if it cannot find an empty or deleted slot. + // 2. The caller of this function guarantees that the table has already been + // allocated + let old_ctrl = *self.ctrl(index); + self.set_ctrl_hash(index, hash); + (index, old_ctrl) + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element, returning the `index` for the new [`Bucket`]. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return an index in the range `0..self.num_buckets()`, but in any case, + /// it will contain an index in the range `0..=self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the index returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] + /// control bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn find_insert_index(&self, hash: u64) -> usize { + let mut probe_seq = self.probe_seq(hash); + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because the number + // of buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + let index = self.find_insert_index_in_group(&group, &probe_seq); + if likely(index.is_some()) { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the slot / index found by `self.find_insert_index_in_group` + unsafe { + return self.fix_insert_index(index.unwrap_unchecked()); + } + } + probe_seq.move_next(self.bucket_mask); + } + } + + /// Searches for an element in a table, returning the `index` of the found element. + /// This uses dynamic dispatch to reduce the amount of code generated, but it is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, + /// this function will also never return (will go into an infinite loop). + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element as `Some(index)`, so the index will always be in the range + /// `0..self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn find_inner( + &self, + hash: u64, + eq: &mut dyn FnMut(usize) -> Result, + ) -> Result, E> { + let tag_hash = Tag::full(hash); + let mut probe_seq = self.probe_seq(hash); + + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask`. + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new_in). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + for bit in group.match_tag(tag_hash) { + // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index = (probe_seq.pos + bit) & self.bucket_mask; + + if likely(eq(index)?) { + return Ok(Some(index)); + } + } + + if likely(group.match_empty().any_bit_set()) { + return Ok(None); + } + + probe_seq.move_next(self.bucket_mask); + } + } + + /// Prepares for rehashing data in place (that is, without allocating new memory). + /// Converts all full index `control bytes` to `Tag::DELETED` and all `Tag::DELETED` control + /// bytes to `Tag::EMPTY`, i.e. performs the following conversion: + /// + /// - `Tag::EMPTY` control bytes -> `Tag::EMPTY`; + /// - `Tag::DELETED` control bytes -> `Tag::EMPTY`; + /// - `FULL` control bytes -> `Tag::DELETED`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The caller of this function must convert the `Tag::DELETED` bytes back to `FULL` + /// bytes when re-inserting them into their ideal position (which was impossible + /// to do during the first insert due to tombstones). If the caller does not do + /// this, then calling this function may result in a memory leak. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise + /// calling this function results in [`undefined behavior`]. + /// + /// Calling this function on a table that has not been allocated results in + /// [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn prepare_rehash_in_place(&mut self) { + // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY. + // This effectively frees up all buckets containing a DELETED entry. + // + // SAFETY: + // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`; + // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned` + // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`; + // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated; + // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0 + // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for). + unsafe { + for i in (0..self.num_buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + } + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if unlikely(self.num_buckets() < Group::WIDTH) { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe + unsafe { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.num_buckets()); + } + } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.num_buckets() == self.bucket_mask + 1` is safe + unsafe { + self.ctrl(0) + .copy_to(self.ctrl(self.num_buckets()), Group::WIDTH); + } + } + } + + /// Returns an iterator over every element in the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result + /// is [`undefined behavior`]: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `RawIter`. Because we cannot make the `next` method unsafe on + /// the `RawIter` struct, we have to make the `iter` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise using the returned [`RawIter`] results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn iter(&self) -> RawIter { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.data_end()` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. `data` bucket index in the table is equal to the `ctrl` index (i.e. + // equal to zero). + // 3. We pass the exact value of buckets of the table to the function. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + unsafe { + let data = Bucket::from_base_index(self.data_end(), 0); + RawIter { + // SAFETY: See explanation above + iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.num_buckets()), + items: self.items, + } + } + } + + /// Executes the destructors (if any) of the values stored in the table. + /// + /// # Note + /// + /// This function does not erase the control bytes of the table and does + /// not make any changes to the `items` or `growth_left` fields of the + /// table. If necessary, the caller of this function must manually set + /// up these table fields, for example using the [`clear_no_drop`] function. + /// + /// Be careful during calling this function, because drop function of + /// the elements can panic, and this can leave table in an inconsistent + /// state. + /// + /// # Safety + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise calling this function may result in [`undefined behavior`]. + /// + /// If `T` is a type that should be dropped and **the table is not empty**, + /// calling this function more than once results in [`undefined behavior`]. + /// + /// If `T` is not [`Copy`], attempting to use values stored in the table after + /// calling this function may result in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`Bucket::drop`] / [`Bucket::as_ptr`] methods, for more information + /// about of properly removing or saving `element` from / into the [`RawTable`] / + /// [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_elements(&mut self) { + // Check that `self.items != 0`. Protects against the possibility + // of creating an iterator on an table with uninitialized control bytes. + if T::NEEDS_DROP && self.items != 0 { + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `RawIter` iterator, and the caller of this function + // must uphold the safety contract for `drop_elements` method. + unsafe { + for item in self.iter::() { + // SAFETY: The caller must uphold the safety contract for + // `drop_elements` method. + item.drop(); + } + } + } + } + + /// Executes the destructors (if any) of the values stored in the table and than + /// deallocates the table. + /// + /// # Note + /// + /// Calling this function automatically makes invalid (dangling) all instances of + /// buckets ([`Bucket`]) and makes invalid (dangling) the `ctrl` field of the table. + /// + /// This function does not make any changes to the `bucket_mask`, `items` or `growth_left` + /// fields of the table. If necessary, the caller of this function must manually set + /// up these table fields. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * Calling this function more than once; + /// + /// * The type `T` must be the actual type of the elements stored in the table. + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that + /// was used to allocate this table. + /// + /// The caller of this function should pay attention to the possibility of the + /// elements' drop function panicking, because this: + /// + /// * May leave the table in an inconsistent state; + /// + /// * Memory is never deallocated, so a memory leak may occur. + /// + /// Attempt to use the `ctrl` field of the table (dereference) after calling this + /// function results in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`RawTableInner::drop_elements`] or [`RawTableInner::free_buckets`] + /// for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_inner_table(&mut self, alloc: &A, table_layout: TableLayout) { + if !self.is_empty_singleton() { + // SAFETY: The caller must uphold the safety contract for `drop_inner_table` method. + unsafe { + self.drop_elements::(); + } + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller must uphold the safety contract for `drop_inner_table` method. + unsafe { + self.free_buckets(alloc, table_layout); + } + } + } + + /// Returns a pointer to an element in the table (convenience for + /// `Bucket::from_base_index(self.data_end::(), index)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived from the + /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling + /// this function, the following safety rules must be observed: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`. + /// + /// * The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`Bucket`] may result in [`undefined behavior`]. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e. + /// `(index + 1) <= self.num_buckets()`. + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"): + /// + /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`]) + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// ^ \__________ __________/ + /// `table.bucket(3)` returns a pointer that points \/ + /// here in the `data` part of the `RawTableInner` additional control bytes + /// (to the end of T3) `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn bucket(&self, index: usize) -> Bucket { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { Bucket::from_base_index(self.data_end(), index) } + } + + /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table + /// (convenience for `self.data_end::().as_ptr().sub((index + 1) * size_of)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`, + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`; + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"): + /// + /// `table.bucket_ptr(3, mem::size_of::())` returns a pointer that points here in the + /// `data` part of the `RawTableInner`, i.e. to the start of T3 + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// \__________ __________/ + /// \/ + /// additional control bytes + /// `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { + let base: *mut u8 = self.data_end().as_ptr(); + base.sub((index + 1) * size_of) + } + } + + /// Returns pointer to one past last `data` element in the table as viewed from + /// the start point of the allocation (convenience for `self.ctrl.cast()`). + /// + /// This function actually returns a pointer to the end of the `data element` at + /// index "0" (zero). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Note + /// + /// The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`NonNull`] may result in [`undefined behavior`]. + /// + /// ```none + /// `table.data_end::()` returns pointer that points here + /// (to the end of `T0`) + /// ∨ + /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + /// \________ ________/ + /// \/ + /// `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`. + /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + /// with loading `Group` bytes from the heap works properly, even if the result + /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + /// `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + fn data_end(&self) -> NonNull { + self.ctrl.cast() + } + + /// Returns an iterator-like object for a probe sequence on the table. + /// + /// This iterator never terminates, but is guaranteed to visit each bucket + /// group exactly once. The loop using `probe_seq` must terminate upon + /// reaching a group containing an empty bucket. + #[inline] + fn probe_seq(&self, hash: u64) -> ProbeSeq { + ProbeSeq { + // This is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + pos: h1(hash) & self.bucket_mask, + stride: 0, + } + } + + #[inline] + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, new_ctrl: Tag) { + self.growth_left -= usize::from(old_ctrl.special_is_empty()); + unsafe { + self.set_ctrl(index, new_ctrl); + } + self.items += 1; + } + + #[inline] + fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool { + let probe_seq_pos = self.probe_seq(hash).pos; + let probe_index = + |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; + probe_index(i) == probe_index(new_i) + } + + /// Sets a control byte to the hash, and possibly also the replicated control byte at + /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl`] + /// method. Thus, in order to uphold the safety contracts for the method, you must observe the + /// following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn set_ctrl_hash(&mut self, index: usize, hash: u64) { + unsafe { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_hash`] + self.set_ctrl(index, Tag::full(hash)); + } + } + + /// Replaces the hash in the control byte at the given index with the provided one, + /// and possibly also replicates the new control byte at the end of the array of control + /// bytes, returning the old control byte. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_hash`] + /// and [`RawTableInner::ctrl`] methods. Thus, in order to uphold the safety contracts for both + /// methods, you must observe the following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn replace_ctrl_hash(&mut self, index: usize, hash: u64) -> Tag { + unsafe { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_hash`] + let prev_ctrl = *self.ctrl(index); + self.set_ctrl_hash(index, hash); + prev_ctrl + } + } + + /// Sets a control byte, and possibly also the replicated control byte at + /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn set_ctrl(&mut self, index: usize, ctrl: Tag) { + // Replicate the first Group::WIDTH control bytes at the end of + // the array without using a branch. If the tables smaller than + // the group width (self.num_buckets() < Group::WIDTH), + // `index2 = Group::WIDTH + index`, otherwise `index2` is: + // + // - If index >= Group::WIDTH then index == index2. + // - Otherwise index2 == self.bucket_mask + 1 + index. + // + // The very last replicated control byte is never actually read because + // we mask the initial index for unaligned loads, but we write it + // anyways because it makes the set_ctrl implementation simpler. + // + // If there are fewer buckets than Group::WIDTH then this code will + // replicate the buckets at the end of the trailing group. For example + // with 2 buckets and a group size of 4, the control bytes will look + // like this: + // + // Real | Replicated + // --------------------------------------------- + // | [A] | [B] | [Tag::EMPTY] | [EMPTY] | [A] | [B] | + // --------------------------------------------- + + // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.num_buckets() + Group::WIDTH` + // because the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl`] + unsafe { + *self.ctrl(index) = ctrl; + *self.ctrl(index2) = ctrl; + } + } + + /// Returns a pointer to a control byte. + /// + /// # Safety + /// + /// For the allocated [`RawTableInner`], the result is [`Undefined Behavior`], + /// if the `index` is greater than the `self.bucket_mask + 1 + Group::WIDTH`. + /// In that case, calling this function with `index == self.bucket_mask + 1 + Group::WIDTH` + /// will return a pointer to the end of the allocated table and it is useless on its own. + /// + /// Calling this function with `index >= self.bucket_mask + 1 + Group::WIDTH` on a + /// table that has not been allocated results in [`Undefined Behavior`]. + /// + /// So to satisfy both requirements you should always follow the rule that + /// `index < self.bucket_mask + 1 + Group::WIDTH` + /// + /// Calling this function on [`RawTableInner`] that are not already allocated is safe + /// for read-only purpose. + /// + /// See also [`Bucket::as_ptr()`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn ctrl(&self, index: usize) -> *mut Tag { + debug_assert!(index < self.num_ctrl_bytes()); + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::ctrl`] + unsafe { self.ctrl.as_ptr().add(index).cast() } + } + + /// Gets the slice of all control bytes, as possibily uninitialized tags. + fn ctrl_slice(&mut self) -> &mut [mem::MaybeUninit] { + // SAFETY: We have the correct number of control bytes. + unsafe { slice::from_raw_parts_mut(self.ctrl.as_ptr().cast(), self.num_ctrl_bytes()) } + } + + #[inline] + fn num_buckets(&self) -> usize { + self.bucket_mask + 1 + } + + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + unsafe fn is_bucket_full(&self, index: usize) -> bool { + debug_assert!(index < self.num_buckets()); + unsafe { (*self.ctrl(index)).is_full() } + } + + #[inline] + fn num_ctrl_bytes(&self) -> usize { + self.bucket_mask + 1 + Group::WIDTH + } + + #[inline] + fn is_empty_singleton(&self) -> bool { + self.bucket_mask == 0 + } + + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating, + /// and return it inside `ScopeGuard` to protect against panic in the hash + /// function. + /// + /// # Note + /// + /// It is recommended (but not required): + /// + /// * That the new table's `capacity` be greater than or equal to `self.items`. + /// + /// * The `alloc` is the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `table_layout` is the same [`TableLayout`] as the `TableLayout` used + /// to allocate this table. + /// + /// If `table_layout` does not match the `TableLayout` that was used to allocate + /// this table, then using `mem::swap` with the `self` and the new table returned + /// by this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + fn prepare_resize<'a, A>( + &self, + alloc: &'a A, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result, TryReserveError> + where + A: Allocator, + { + debug_assert!(self.items <= capacity); + + // Allocate and initialize the new table. + let new_table = + RawTableInner::fallible_with_capacity(alloc, table_layout, capacity, fallibility)?; + + // The hash function may panic, in which case we simply free the new + // table without dropping any elements that may have been copied into + // it. + // + // This guard is also used to free the old table on success, see + // the comment at the bottom of this function. + Ok(guard(new_table, move |self_| { + if !self_.is_empty_singleton() { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that the `alloc` and `table_layout` matches the + // [`Allocator`] and [`TableLayout`] used to allocate this table. + unsafe { self_.free_buckets(alloc, table_layout) }; + } + })) + } + + /// Reserves or rehashes to make room for `additional` more elements. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table. + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::inline_always)] + #[inline(always)] + unsafe fn reserve_rehash_inner( + &mut self, + alloc: &A, + additional: usize, + hasher: &dyn Fn(&mut Self, usize) -> u64, + fallibility: Fallibility, + layout: TableLayout, + drop: Option, + ) -> Result<(), TryReserveError> + where + A: Allocator, + { + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let Some(new_items) = self.items.checked_add(additional) else { + return Err(fallibility.capacity_overflow()); + }; + let full_capacity = bucket_mask_to_capacity(self.bucket_mask); + if new_items <= full_capacity / 2 { + // Rehash in-place without re-allocating if we have plenty of spare + // capacity that is locked up due to DELETED entries. + + // SAFETY: + // 1. We know for sure that `[`RawTableInner`]` has already been allocated + // (since new_items <= full_capacity / 2); + // 2. The caller ensures that `drop` function is the actual drop function of + // the elements stored in the table. + // 3. The caller ensures that `layout` matches the [`TableLayout`] that was + // used to allocate this table. + // 4. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.rehash_in_place(hasher, layout.size, drop); + } + Ok(()) + } else { + // Otherwise, conservatively resize to at least the next size up + // to avoid churning deletes into frequent rehashes. + // + // SAFETY: + // 1. We know for sure that `capacity >= self.items`. + // 2. The caller ensures that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.resize_inner( + alloc, + usize::max(new_items, full_capacity + 1), + hasher, + fallibility, + layout, + ) + } + } + } + + /// Returns an iterator over full buckets indices in the table. + /// + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `FullBucketsIndices`. Because we cannot make the `next` method + /// unsafe on the `FullBucketsIndices` struct, we have to make the + /// `full_buckets_indices` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + #[inline(always)] + unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.ctrl(0)` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. The value of `items` is equal to the amount of data (values) added + // to the table. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + unsafe { + let ctrl = NonNull::new_unchecked(self.ctrl(0).cast::()); + + FullBucketsIndices { + // Load the first group + // SAFETY: See explanation above. + current_group: Group::load_aligned(ctrl.as_ptr().cast()) + .match_full() + .into_iter(), + group_first_index: 0, + ctrl, + items: self.items, + } + } + } + + /// Allocates a new table of a different size and moves the contents of the + /// current table into it. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table; + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The caller of this function must ensure that `capacity >= self.items` + /// otherwise: + /// + /// * If `self.items != 0`, calling of this function with `capacity == 0` + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// are never return (will go into an infinite loop). + /// + /// Note: It is recommended (but not required) that the new table's `capacity` + /// be greater than or equal to `self.items`. In case if `capacity <= self.items` + /// this function can never return. See [`RawTableInner::find_insert_index`] for + /// more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::inline_always)] + #[inline(always)] + unsafe fn resize_inner( + &mut self, + alloc: &A, + capacity: usize, + hasher: &dyn Fn(&mut Self, usize) -> u64, + fallibility: Fallibility, + layout: TableLayout, + ) -> Result<(), TryReserveError> + where + A: Allocator, + { + // SAFETY: We know for sure that `alloc` and `layout` matches the [`Allocator`] and [`TableLayout`] + // that were used to allocate this table. + let mut new_table = self.prepare_resize(alloc, layout, capacity, fallibility)?; + + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `FullBucketsIndices` iterator, and the caller of this + // function ensures that the control bytes are properly initialized. + unsafe { + for full_byte_index in self.full_buckets_indices() { + // This may panic. + let hash = hasher(self, full_byte_index); + + // SAFETY: + // We can use a simpler version of insert() here since: + // 1. There are no DELETED entries. + // 2. We know there is enough space in the table. + // 3. All elements are unique. + // 4. The caller of this function guarantees that `capacity > 0` + // so `new_table` must already have some allocated memory. + // 5. We set `growth_left` and `items` fields of the new table + // after the loop. + // 6. We insert into the table, at the returned index, the data + // matching the given hash immediately after calling this function. + let (new_index, _) = new_table.prepare_insert_index(hash); + + // SAFETY: + // + // * `src` is valid for reads of `layout.size` bytes, since the + // table is alive and the `full_byte_index` is guaranteed to be + // within bounds (see `FullBucketsIndices::next_impl`); + // + // * `dst` is valid for writes of `layout.size` bytes, since the + // caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate old table and we have the `new_index` + // returned by `prepare_insert_index`. + // + // * Both `src` and `dst` are properly aligned. + // + // * Both `src` and `dst` point to different region of memory. + ptr::copy_nonoverlapping( + self.bucket_ptr(full_byte_index, layout.size), + new_table.bucket_ptr(new_index, layout.size), + layout.size, + ); + } + } + + // The hash function didn't panic, so we can safely set the + // `growth_left` and `items` fields of the new table. + new_table.growth_left -= self.items; + new_table.items = self.items; + + // We successfully copied all elements without panicking. Now replace + // self with the new table. The old table will have its memory freed but + // the items will not be dropped (since they have been moved into the + // new table). + // SAFETY: The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. + mem::swap(self, &mut new_table); + + Ok(()) + } + + /// Rehashes the contents of the table in place (i.e. without changing the + /// allocation). + /// + /// If `hasher` panics then some the table's contents may be lost. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg_attr(feature = "inline-more", expect(clippy::inline_always))] + #[cfg_attr(feature = "inline-more", inline(always))] + #[cfg_attr(not(feature = "inline-more"), inline)] + unsafe fn rehash_in_place( + &mut self, + hasher: &dyn Fn(&mut Self, usize) -> u64, + size_of: usize, + drop: Option, + ) { + // If the hash function panics then properly clean up any elements + // that we haven't rehashed yet. We unfortunately can't preserve the + // element since we lost their hash and have no way of recovering it + // without risking another panic. + unsafe { + self.prepare_rehash_in_place(); + } + + let mut guard = guard(self, move |self_| { + for i in 0..self_.num_buckets() { + unsafe { + // Any elements that haven't been rehashed yet have a + // DELETED tag. These need to be dropped and have their tag + // reset to EMPTY. + if *self_.ctrl(i) == Tag::DELETED { + self_.set_ctrl(i, Tag::EMPTY); + if let Some(drop) = drop { + drop(self_.bucket_ptr(i, size_of)); + } + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }); + + // At this point, DELETED elements are elements that we haven't + // rehashed yet. Find them and re-insert them at their ideal + // position. + 'outer: for i in 0..guard.num_buckets() { + unsafe { + if *guard.ctrl(i) != Tag::DELETED { + continue; + } + } + + let i_p = unsafe { guard.bucket_ptr(i, size_of) }; + + loop { + // Hash the current item + let hash = hasher(*guard, i); + + // Search for a suitable place to put it + // + // SAFETY: Caller of this function ensures that the control bytes + // are properly initialized. + let new_i = unsafe { guard.find_insert_index(hash) }; + + // Probing works by scanning through all of the control + // bytes in groups, which may not be aligned to the group + // size. If both the new and old position fall within the + // same unaligned group, then there is no benefit in moving + // it and we can just continue to the next item. + if likely(guard.is_in_same_group(i, new_i, hash)) { + unsafe { guard.set_ctrl_hash(i, hash) }; + continue 'outer; + } + + let new_i_p = unsafe { guard.bucket_ptr(new_i, size_of) }; + + // We are moving the current item to a new position. Write + // our H2 to the control byte of the new position. + let prev_ctrl = unsafe { guard.replace_ctrl_hash(new_i, hash) }; + if prev_ctrl == Tag::EMPTY { + unsafe { guard.set_ctrl(i, Tag::EMPTY) }; + // If the target slot is empty, simply move the current + // element into the new slot and clear the old control + // byte. + unsafe { + ptr::copy_nonoverlapping(i_p, new_i_p, size_of); + } + continue 'outer; + } + + // If the target slot is occupied, swap the two elements + // and then continue processing the element that we just + // swapped into the old slot. + debug_assert_eq!(prev_ctrl, Tag::DELETED); + unsafe { + ptr::swap_nonoverlapping(i_p, new_i_p, size_of); + } + } + } + + guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items; + + mem::forget(guard); + } + + /// Deallocates the table without dropping any entries. + /// + /// # Note + /// + /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements), + /// else it can lead to leaking of memory. Also calling this function automatically + /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid + /// (dangling) the `ctrl` field of the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that was used + /// to allocate this table. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: stdalloc::alloc::GlobalAlloc::dealloc + /// [`Allocator::deallocate`]: stdalloc::alloc::Allocator::deallocate + #[inline] + unsafe fn free_buckets(&mut self, alloc: &A, table_layout: TableLayout) + where + A: Allocator, + { + unsafe { + // SAFETY: The caller must uphold the safety contract for `free_buckets` + // method. + let (ptr, layout) = self.allocation_info(table_layout); + alloc.deallocate(ptr, layout); + } + } + + /// Returns a pointer to the allocated memory and the layout that was used to + /// allocate the table. + /// + /// # Safety + /// + /// Caller of this function must observe the following safety rules: + /// + /// * The [`RawTableInner`] has already been allocated, otherwise + /// calling this function results in [`undefined behavior`] + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: stdalloc::GlobalAlloc::dealloc + /// [`Allocator::deallocate`]: stdalloc::Allocator::deallocate + #[inline] + unsafe fn allocation_info(&self, table_layout: TableLayout) -> (NonNull, Layout) { + debug_assert!( + !self.is_empty_singleton(), + "this function can only be called on non-empty tables" + ); + + let (layout, ctrl_offset) = { + let option = table_layout.calculate_layout_for(self.num_buckets()); + unsafe { option.unwrap_unchecked() } + }; + ( + // SAFETY: The caller must uphold the safety contract for `allocation_info` method. + unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) }, + layout, + ) + } + + /// Returns the total amount of memory allocated internally by the hash + /// table, in bytes. + /// + /// The returned number is informational only. It is intended to be + /// primarily used for memory profiling. + /// + /// # Safety + /// + /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn allocation_size_or_zero(&self, table_layout: TableLayout) -> usize { + if self.is_empty_singleton() { + 0 + } else { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. + unsafe { self.allocation_info(table_layout).1.size() } + } + } + + /// Marks all table buckets as empty without dropping their contents. + #[inline] + fn clear_no_drop(&mut self) { + if !self.is_empty_singleton() { + self.ctrl_slice().fill_empty(); + } + self.items = 0; + self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + } + + /// Erases the [`Bucket`]'s control byte at the given index so that it does not + /// triggered as full, decreases the `items` of the table and, if it can be done, + /// increases `self.growth_left`. + /// + /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it + /// does not make any changes to the `data` parts of the table. The caller of this + /// function must take care to properly drop the `data`, otherwise calling this + /// function may result in a memory leak. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * It must be the full control byte at the given position; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// Calling this function on a table with no elements is unspecified, but calling subsequent + /// functions is likely to result in [`undefined behavior`] due to overflow subtraction + /// (`self.items -= 1 cause overflow when self.items == 0`). + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn erase(&mut self, index: usize) { + unsafe { + debug_assert!(self.is_bucket_full(index)); + } + + // This is the same as `index.wrapping_sub(Group::WIDTH) % self.num_buckets()` because + // the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + // SAFETY: + // - The caller must uphold the safety contract for `erase` method; + // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask` + let (empty_before, empty_after) = unsafe { + ( + Group::load(self.ctrl(index_before)).match_empty(), + Group::load(self.ctrl(index)).match_empty(), + ) + }; + + // Inserting and searching in the map is performed by two key functions: + // + // - The `find_insert_index` function that looks up the index of any `Tag::EMPTY` or `Tag::DELETED` + // slot in a group to be able to insert. If it doesn't find an `Tag::EMPTY` or `Tag::DELETED` + // slot immediately in the first group, it jumps to the next `Group` looking for it, + // and so on until it has gone through all the groups in the control bytes. + // + // - The `find_inner` function that looks for the index of the desired element by looking + // at all the `FULL` bytes in the group. If it did not find the element right away, and + // there is no `Tag::EMPTY` byte in the group, then this means that the `find_insert_index` + // function may have found a suitable slot in the next group. Therefore, `find_inner` + // jumps further, and if it does not find the desired element and again there is no `Tag::EMPTY` + // byte, then it jumps further, and so on. The search stops only if `find_inner` function + // finds the desired element or hits an `Tag::EMPTY` slot/byte. + // + // Accordingly, this leads to two consequences: + // + // - The map must have `Tag::EMPTY` slots (bytes); + // + // - You can't just mark the byte to be erased as `Tag::EMPTY`, because otherwise the `find_inner` + // function may stumble upon an `Tag::EMPTY` byte before finding the desired element and stop + // searching. + // + // Thus it is necessary to check all bytes after and before the erased element. If we are in + // a contiguous `Group` of `FULL` or `Tag::DELETED` bytes (the number of `FULL` or `Tag::DELETED` bytes + // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as + // `Tag::DELETED` in order for the `find_inner` function to go further. On the other hand, if there + // is at least one `Tag::EMPTY` slot in the `Group`, then the `find_inner` function will still stumble + // upon an `Tag::EMPTY` byte, so we can safely mark our erased byte as `Tag::EMPTY` as well. + // + // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index` + // and given all of the above, tables smaller than the group width (self.num_buckets() < Group::WIDTH) + // cannot have `Tag::DELETED` bytes. + // + // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while + // `trailing_zeros` refers to the bytes at the beginning of a group. + let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { + Tag::DELETED + } else { + self.growth_left += 1; + Tag::EMPTY + }; + // SAFETY: the caller must uphold the safety contract for `erase` method. + unsafe { + self.set_ctrl(index, ctrl); + } + self.items -= 1; + } +} + +impl Clone for RawTable { + fn clone(&self) -> Self { + if self.table.is_empty_singleton() { + Self::new_in(self.alloc.clone()) + } else { + // SAFETY: This is safe as we are taking the size of an already allocated table + // and therefore capacity overflow cannot occur, `self.table.num_buckets()` is power + // of two and all allocator errors will be caught inside `RawTableInner::new_uninitialized`. + let result = unsafe { + Self::new_uninitialized( + self.alloc.clone(), + self.table.num_buckets(), + Fallibility::Infallible, + ) + }; + + // SAFETY: The result of calling the `new_uninitialized` function cannot be an error + // because `fallibility == Fallibility::Infallible. + let mut new_table = unsafe { result.unwrap_unchecked() }; + + // SAFETY: + // Cloning elements may fail (the clone function may panic). But we don't + // need to worry about uninitialized control bits, since: + // 1. The number of items (elements) in the table is zero, which means that + // the control bits will not be read by Drop function. + // 2. The `clone_from_spec` method will first copy all control bits from + // `self` (thus initializing them). But this will not affect the `Drop` + // function, since the `clone_from_spec` function sets `items` only after + // successfully cloning all elements. + unsafe { new_table.clone_from_spec(self) }; + new_table + } + } + + fn clone_from(&mut self, source: &Self) { + if source.table.is_empty_singleton() { + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } else { + unsafe { + // Make sure that if any panics occurs, we clear the table and + // leave it in an empty state. + let mut self_ = guard(self, |self_| { + self_.clear_no_drop(); + }); + + // First, drop all our elements without clearing the control + // bytes. If this panics then the scope guard will clear the + // table, leaking any elements that were not dropped yet. + // + // This leak is unavoidable: we can't try dropping more elements + // since this could lead to another panic and abort the process. + // + // SAFETY: If something gets wrong we clear our table right after + // dropping the elements, so there is no double drop, since `items` + // will be equal to zero. + self_.table.drop_elements::(); + + // If necessary, resize our table to match the source. + if self_.num_buckets() != source.num_buckets() { + let new_inner = { + let result = RawTableInner::new_uninitialized( + &self_.alloc, + Self::TABLE_LAYOUT, + source.num_buckets(), + Fallibility::Infallible, + ); + result.unwrap_unchecked() + }; + // Replace the old inner with new uninitialized one. It's ok, since if something gets + // wrong `ScopeGuard` will initialize all control bytes and leave empty table. + let mut old_inner = mem::replace(&mut self_.table, new_inner); + if !old_inner.is_empty_singleton() { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that `alloc` and `table_layout` matches + // the [`Allocator`] and [`TableLayout`] that were used to allocate this table. + old_inner.free_buckets(&self_.alloc, Self::TABLE_LAYOUT); + } + } + + // Cloning elements may fail (the clone function may panic), but the `ScopeGuard` + // inside the `clone_from_impl` function will take care of that, dropping all + // cloned elements if necessary. Our `ScopeGuard` will clear the table. + self_.clone_from_spec(source); + + // Disarm the scope guard if cloning was successful. + ScopeGuard::into_inner(self_); + } + } + } +} + +/// Specialization of `clone_from` for `Copy` types +trait RawTableClone { + unsafe fn clone_from_spec(&mut self, source: &Self); +} +impl RawTableClone for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_spec(&mut self, source: &Self) { + unsafe { + self.clone_from_impl(source); + } + } +} + +impl RawTable { + /// Common code for `clone` and `clone_from`. Assumes: + /// - `self.num_buckets() == source.num_buckets()`. + /// - Any existing elements have been dropped. + /// - The control bytes are not initialized yet. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_impl(&mut self, source: &Self) { + // Copy the control bytes unchanged. We do this in a single pass + unsafe { + source + .table + .ctrl(0) + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); + } + + // The cloning of elements may panic, in which case we need + // to make sure we drop only the elements that have been + // cloned so far. + let mut guard = guard((0, &mut *self), |(index, self_)| { + if T::NEEDS_DROP { + for i in 0..*index { + unsafe { + if self_.is_bucket_full(i) { + self_.bucket(i).drop(); + } + } + } + } + }); + + unsafe { + for from in source.iter() { + let index = source.bucket_index(&from); + let to = guard.1.bucket(index); + to.write(from.as_ref().clone()); + + // Update the index in case we need to unwind. + guard.0 = index + 1; + } + } + + // Successfully cloned all items, no need to clean up. + mem::forget(guard); + + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; + } +} + +impl Default for RawTable { + #[inline] + fn default() -> Self { + Self::new_in(Default::default()) + } +} + +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If the drop function of any elements fails, then only a memory leak will occur, + // and we don't care because we are inside the `Drop` function of the `RawTable`, + // so there won't be any table left in an inconsistent state. + unsafe { + self.table + .drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } +} + +impl IntoIterator for RawTable { + type Item = T; + type IntoIter = RawIntoIter; + + #[cfg_attr(feature = "inline-more", inline)] + fn into_iter(self) -> RawIntoIter { + unsafe { + let iter = self.iter(); + self.into_iter_from(iter) + } + } +} + +/// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does +/// not track an item count. +pub struct RawIterRange { + // Mask of full buckets in the current group. Bits are cleared from this + // mask as each element is processed. + current_group: BitMaskIter, + + // Pointer to the buckets for the current group. + data: Bucket, + + // Pointer to the next group of control bytes, + // Must be aligned to the group size. + next_ctrl: *const u8, + + // Pointer one past the last control byte of this range. + end: *const u8, +} + +impl RawIterRange { + /// Returns a `RawIterRange` covering a subset of a table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * `ctrl` must be valid for reads, i.e. table outlives the `RawIterRange`; + /// + /// * `ctrl` must be properly aligned to the group size (`Group::WIDTH`); + /// + /// * `ctrl` must point to the array of properly initialized control bytes; + /// + /// * `data` must be the [`Bucket`] at the `ctrl` index in the table; + /// + /// * the value of `len` must be less than or equal to the number of table buckets, + /// and the returned value of `ctrl.as_ptr().add(len).offset_from(ctrl.as_ptr())` + /// must be positive. + /// + /// * The `ctrl.add(len)` pointer must be either in bounds or one + /// byte past the end of the same [allocated table]. + /// + /// * The `len` must be a power of two. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { + debug_assert_ne!(len, 0); + debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] + let end = unsafe { ctrl.add(len) }; + + // Load the first group and advance ctrl to point to the next group + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] + let (current_group, next_ctrl) = unsafe { + ( + Group::load_aligned(ctrl.cast()).match_full(), + ctrl.add(Group::WIDTH), + ) + }; + + Self { + current_group: current_group.into_iter(), + data, + next_ctrl, + end, + } + } + + /// # Safety + /// If `DO_CHECK_PTR_RANGE` is false, caller must ensure that we never try to iterate + /// after yielding all elements. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn next_impl(&mut self) -> Option> { + loop { + if let Some(index) = self.current_group.next() { + return Some(unsafe { self.data.next_n(index) }); + } + + if DO_CHECK_PTR_RANGE && self.next_ctrl >= self.end { + return None; + } + + // We might read past self.end up to the next group boundary, + // but this is fine because it only occurs on tables smaller + // than the group size where the trailing control bytes are all + // EMPTY. On larger tables self.end is guaranteed to be aligned + // to the group size (since tables are power-of-two sized). + unsafe { + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } + } + + /// Folds every element into an accumulator by applying an operation, + /// returning the final result. + /// + /// `fold_impl()` takes three arguments: the number of items remaining in + /// the iterator, an initial value, and a closure with two arguments: an + /// 'accumulator', and an element. The closure returns the value that the + /// accumulator should have for the next iteration. + /// + /// The initial value is the value the accumulator will have on the first call. + /// + /// After applying this closure to every element of the iterator, `fold_impl()` + /// returns the accumulator. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `RawIterRange`; + /// + /// * The provided `n` value must match the actual number of items + /// in the table. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::while_let_on_iterator)] + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn fold_impl(mut self, mut n: usize, mut acc: B, mut f: F) -> B + where + F: FnMut(B, Bucket) -> B, + { + loop { + while let Some(index) = self.current_group.next() { + // The returned `index` will always be in the range `0..Group::WIDTH`, + // so that calling `self.data.next_n(index)` is safe (see detailed explanation below). + debug_assert!(n != 0); + let bucket = unsafe { self.data.next_n(index) }; + acc = f(acc, bucket); + n -= 1; + } + + if n == 0 { + return acc; + } + + // SAFETY: The caller of this function ensures that: + // + // 1. The provided `n` value matches the actual number of items in the table; + // 2. The table is alive and did not moved. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.current_group` will read bytes + // from the `self.num_buckets() - Group::WIDTH` index. We know also that + // `self.current_group.next()` will always return indices within the range + // `0..Group::WIDTH`. + // + // Knowing all of the above and taking into account that we are synchronizing + // the `self.data` index with the index we used to read the `self.current_group`, + // the subsequent `self.data.next_n(index)` will always return a bucket with + // an index number less than `self.num_buckets()`. + // + // The last `self.next_ctrl`, whose index would be `self.num_buckets()`, will never + // actually be read, since we should have already yielded all the elements of + // the table. + unsafe { + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } + } +} + +// We make raw iterators unconditionally Send and Sync, and let the PhantomData +// in the actual iterator implementations determine the real Send/Sync bounds. +unsafe impl Send for RawIterRange {} +unsafe impl Sync for RawIterRange {} + +impl Clone for RawIterRange { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + data: self.data.clone(), + next_ctrl: self.next_ctrl, + current_group: self.current_group.clone(), + end: self.end, + } + } +} + +impl Iterator for RawIterRange { + type Item = Bucket; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option> { + unsafe { + // SAFETY: We set checker flag to true. + self.next_impl::() + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + // We don't have an item count, so just guess based on the range size. + let remaining_buckets = if self.end > self.next_ctrl { + unsafe { offset_from(self.end, self.next_ctrl) } + } else { + 0 + }; + + // Add a group width to include the group we are currently processing. + (0, Some(Group::WIDTH + remaining_buckets)) + } +} + +impl FusedIterator for RawIterRange {} + +/// Iterator which returns a raw pointer to every full bucket in the table. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket (unless `reflect_remove` is called). +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator (unless `reflect_insert` is called). +/// - The order in which the iterator yields bucket is unspecified and may +/// change in the future. +pub struct RawIter { + pub iter: RawIterRange, + items: usize, +} + +impl RawIter { + unsafe fn drop_elements(&mut self) { + unsafe { + if T::NEEDS_DROP && self.items != 0 { + for item in self { + item.drop(); + } + } + } + } +} + +impl Clone for RawIter { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + iter: self.iter.clone(), + items: self.items, + } + } +} +impl Default for RawIter { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawTableInner::NEW.iter() } + } +} + +impl Iterator for RawIter { + type Item = Bucket; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option> { + // Inner iterator iterates over buckets + // so it can do unnecessary work if we already yielded all items. + if self.items == 0 { + return None; + } + + let nxt = unsafe { + // SAFETY: We check number of items to yield using `items` field. + self.iter.next_impl::() + }; + + debug_assert!(nxt.is_some()); + self.items -= 1; + + nxt + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } + + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + unsafe { self.iter.fold_impl(self.items, init, f) } + } +} + +impl ExactSizeIterator for RawIter {} +impl FusedIterator for RawIter {} + +/// Iterator which returns an index of every full bucket in the table. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding index of that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields indices of the buckets is unspecified +/// and may change in the future. +#[derive(Clone)] +pub struct FullBucketsIndices { + // Mask of full buckets in the current group. Bits are cleared from this + // mask as each element is processed. + current_group: BitMaskIter, + + // Initial value of the bytes' indices of the current group (relative + // to the start of the control bytes). + group_first_index: usize, + + // Pointer to the current group of control bytes, + // Must be aligned to the group size (Group::WIDTH). + ctrl: NonNull, + + // Number of elements in the table. + items: usize, +} + +impl Default for FullBucketsIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawTableInner::NEW.full_buckets_indices() } + } +} + +impl FullBucketsIndices { + /// Advances the iterator and returns the next value. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `FullBucketsIndices`; + /// + /// * It never tries to iterate after getting all elements. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn next_impl(&mut self) -> Option { + loop { + if let Some(index) = self.current_group.next() { + // The returned `self.group_first_index + index` will always + // be in the range `0..self.num_buckets()`. See explanation below. + return Some(self.group_first_index + index); + } + + // SAFETY: The caller of this function ensures that: + // + // 1. It never tries to iterate after getting all the elements; + // 2. The table is alive and did not moved; + // 3. The first `self.ctrl` pointed to the start of the array of control bytes. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // the start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.ctrl` will be equal to + // the `self.num_buckets() - Group::WIDTH`, so `self.current_group.next()` + // will always contains indices within the range `0..Group::WIDTH`, + // and subsequent `self.group_first_index + index` will always return a + // number less than `self.num_buckets()`. + unsafe { + self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH)); + } + + // SAFETY: See explanation above. + unsafe { + self.current_group = Group::load_aligned(self.ctrl.as_ptr().cast()) + .match_full() + .into_iter(); + self.group_first_index += Group::WIDTH; + } + } + } +} + +impl Iterator for FullBucketsIndices { + type Item = usize; + + /// Advances the iterator and returns the next value. It is up to + /// the caller to ensure that the `RawTable` outlives the `FullBucketsIndices`, + /// because we cannot make the `next` method unsafe. + #[inline(always)] + fn next(&mut self) -> Option { + // Return if we already yielded all items. + if self.items == 0 { + return None; + } + + // SAFETY: + // 1. We check number of items to yield using `items` field. + // 2. The caller ensures that the table is alive and has not moved. + let nxt = unsafe { self.next_impl() }; + + debug_assert!(nxt.is_some()); + self.items -= 1; + + nxt + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } +} + +impl ExactSizeIterator for FullBucketsIndices {} +impl FusedIterator for FullBucketsIndices {} + +/// Iterator which consumes a table and returns elements. +pub struct RawIntoIter { + iter: RawIter, + allocation: Option<(NonNull, Layout, A)>, + marker: PhantomData, +} + +impl RawIntoIter { + #[cfg_attr(feature = "inline-more", inline)] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl Send for RawIntoIter +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawIntoIter +where + T: Sync, + A: Sync, +{ +} + +impl Drop for RawIntoIter { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + unsafe { + // Drop all remaining elements + self.iter.drop_elements(); + + // Free the table + if let Some((ptr, layout, ref alloc)) = self.allocation { + alloc.deallocate(ptr, layout); + } + } + } +} + +impl Default for RawIntoIter { + fn default() -> Self { + Self { + iter: Default::default(), + allocation: None, + marker: PhantomData, + } + } +} +impl Iterator for RawIntoIter { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option { + unsafe { Some(self.iter.next()?.read()) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for RawIntoIter {} +impl FusedIterator for RawIntoIter {} + +/// Iterator which consumes elements without freeing the table storage. +pub struct RawDrain<'a, T, A: Allocator = Global> { + iter: RawIter, + + // The table is moved into the iterator for the duration of the drain. This + // ensures that an empty table is left if the drain iterator is leaked + // without dropping. + table: RawTableInner, + orig_table: NonNull, + + // We don't use a &'a mut RawTable because we want RawDrain to be + // covariant over T. + marker: PhantomData<&'a RawTable>, +} + +impl RawDrain<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl Send for RawDrain<'_, T, A> +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawDrain<'_, T, A> +where + T: Sync, + A: Sync, +{ +} + +impl Drop for RawDrain<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + unsafe { + // Drop all remaining elements. Note that this may panic. + self.iter.drop_elements(); + + // Reset the contents of the table now that all elements have been + // dropped. + self.table.clear_no_drop(); + + // Move the now empty table back to its original location. + self.orig_table + .as_ptr() + .copy_from_nonoverlapping(&raw const self.table, 1); + } + } +} + +impl Iterator for RawDrain<'_, T, A> { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option { + unsafe { + let item = self.iter.next()?; + Some(item.read()) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for RawDrain<'_, T, A> {} +impl FusedIterator for RawDrain<'_, T, A> {} + +/// Iterator over occupied buckets that could match a given hash. +/// +/// `RawTable` only stores 7 bits of the hash value, so this iterator may return +/// items that have a hash value different than the one provided. You should +/// always validate the returned values before using them. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields buckets is unspecified and may +/// change in the future. +pub struct RawIterHash { + inner: RawIterHashIndices, + _marker: PhantomData, +} + +#[derive(Clone)] +pub struct RawIterHashIndices { + // See `RawTableInner`'s corresponding fields for details. + // We can't store a `*const RawTableInner` as it would get + // invalidated by the user calling `&mut` methods on `RawTable`. + bucket_mask: usize, + ctrl: NonNull, + + // The top 7 bits of the hash. + tag_hash: Tag, + + // The sequence of groups to probe in the search. + probe_seq: ProbeSeq, + + group: Group, + + // The elements within the group with a matching tag-hash. + bitmask: BitMaskIter, +} + +impl RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTable, hash: u64) -> Self { + RawIterHash { + inner: unsafe { RawIterHashIndices::new(&table.table, hash) }, + _marker: PhantomData, + } + } +} + +impl Clone for RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + _marker: PhantomData, + } + } +} + +impl Default for RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + Self { + inner: RawIterHashIndices::default(), + _marker: PhantomData, + } + } +} + +impl Default for RawIterHashIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawIterHashIndices::new(&RawTableInner::NEW, 0) } + } +} + +impl RawIterHashIndices { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTableInner, hash: u64) -> Self { + let tag_hash = Tag::full(hash); + let probe_seq = table.probe_seq(hash); + let group = unsafe { Group::load(table.ctrl(probe_seq.pos)) }; + let bitmask = group.match_tag(tag_hash).into_iter(); + + RawIterHashIndices { + bucket_mask: table.bucket_mask, + ctrl: table.ctrl, + tag_hash, + probe_seq, + group, + bitmask, + } + } +} + +impl Iterator for RawIterHash { + type Item = Bucket; + + fn next(&mut self) -> Option> { + unsafe { + match self.inner.next() { + Some(index) => { + // Can't use `RawTable::bucket` here as we don't have + // an actual `RawTable` reference to use. + debug_assert!(index <= self.inner.bucket_mask); + let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index); + Some(bucket) + } + None => None, + } + } + } +} + +impl Iterator for RawIterHashIndices { + type Item = usize; + + fn next(&mut self) -> Option { + unsafe { + loop { + if let Some(bit) = self.bitmask.next() { + let index = (self.probe_seq.pos + bit) & self.bucket_mask; + return Some(index); + } + if likely(self.group.match_empty().any_bit_set()) { + return None; + } + self.probe_seq.move_next(self.bucket_mask); + + // Can't use `RawTableInner::ctrl` here as we don't have + // an actual `RawTableInner` reference to use. + let index = self.probe_seq.pos; + debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); + let group_ctrl = self.ctrl.as_ptr().add(index).cast(); + + self.group = Group::load(group_ctrl); + self.bitmask = self.group.match_tag(self.tag_hash).into_iter(); + } + } + } +} + +pub struct RawExtractIf<'a, T, A: Allocator> { + pub iter: RawIter, + pub table: &'a mut RawTable, +} + +impl RawExtractIf<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub fn next(&mut self, mut f: F) -> Option + where + F: FnMut(&mut T) -> bool, + { + unsafe { + for item in &mut self.iter { + if f(item.as_mut()) { + return Some(self.table.remove(item).0); + } + } + } + None + } +} + +#[cfg(test)] +mod test_map { + use super::*; + + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + + #[test] + fn test_minimum_capacity_for_small_types() { + #[track_caller] + fn test_t() { + let raw_table: RawTable = RawTable::with_capacity(1); + let actual_buckets = raw_table.num_buckets(); + let min_buckets = Group::WIDTH / core::mem::size_of::(); + assert!( + actual_buckets >= min_buckets, + "expected at least {min_buckets} buckets, got {actual_buckets} buckets" + ); + } + + test_t::(); + + // This is only "small" for some platforms, like x86_64 with SSE2, but + // there's no harm in running it on other platforms. + test_t::(); + } + + fn rehash_in_place(table: &mut RawTable, hasher: impl Fn(&T) -> u64) { + unsafe { + table.table.rehash_in_place( + &|table, index| hasher(table.bucket::(index).as_ref()), + mem::size_of::(), + if mem::needs_drop::() { + Some(|ptr| ptr::drop_in_place(ptr.cast::())) + } else { + None + }, + ); + } + } + + #[test] + fn rehash() { + let mut table = RawTable::new(); + let hasher = |i: &u64| *i; + for i in 0..100 { + table.insert(i, i, hasher); + } + + for i in 0..100 { + unsafe { + assert_eq!( + table + .find(i, |x| Ok::<_, ()>(*x == i)) + .unwrap() + .map(|b| b.read()), + Some(i) + ); + } + assert!(table + .find(i + 100, |x| Ok::<_, ()>(*x == i + 100)) + .unwrap() + .is_none()); + } + + rehash_in_place(&mut table, hasher); + + for i in 0..100 { + unsafe { + assert_eq!( + table + .find(i, |x| Ok::<_, ()>(*x == i)) + .unwrap() + .map(|b| b.read()), + Some(i) + ); + } + assert!(table + .find(i + 100, |x| Ok::<_, ()>(*x == i + 100)) + .unwrap() + .is_none()); + } + } + + /// CHECKING THAT WE ARE NOT TRYING TO READ THE MEMORY OF + /// AN UNINITIALIZED TABLE DURING THE DROP + #[test] + fn test_drop_uninitialized() { + use std::vec::Vec; + + let table = unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap() + }; + drop(table); + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + fn test_drop_zero_items() { + use std::vec::Vec; + unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + let mut table = + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap(); + + // WE SIMULATE, AS IT WERE, A FULL TABLE. + + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `table.table.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. + table.table.ctrl_slice().fill_empty(); + + // SAFETY: table.capacity() is guaranteed to be smaller than table.num_buckets() + table.table.ctrl(0).write_bytes(0, table.capacity()); + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if table.num_buckets() < Group::WIDTH { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(Group::WIDTH), table.table.num_buckets()); + } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.num_buckets() == self.bucket_mask + 1` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(table.table.num_buckets()), Group::WIDTH); + } + drop(table); + } + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + #[cfg(panic = "unwind")] + fn test_catch_panic_clone_from() { + use super::AllocError; + use super::Allocator; + use super::Global; + use core::sync::atomic::AtomicI8; + use core::sync::atomic::Ordering; + use std::sync::Arc; + use std::thread; + use std::vec::Vec; + + struct MyAllocInner { + drop_count: Arc, + } + + #[derive(Clone)] + struct MyAlloc { + _inner: Arc, + } + + impl Drop for MyAllocInner { + fn drop(&mut self) { + println!("MyAlloc freed."); + self.drop_count.fetch_sub(1, Ordering::SeqCst); + } + } + + unsafe impl Allocator for MyAlloc { + fn allocate(&self, layout: Layout) -> std::result::Result, AllocError> { + let g = Global; + g.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + unsafe { + let g = Global; + g.deallocate(ptr, layout); + } + } + } + + const DISARMED: bool = false; + const ARMED: bool = true; + + struct CheckedCloneDrop { + panic_in_clone: bool, + dropped: bool, + need_drop: Vec, + } + + impl Clone for CheckedCloneDrop { + fn clone(&self) -> Self { + if self.panic_in_clone { + panic!("panic in clone") + } + Self { + panic_in_clone: self.panic_in_clone, + dropped: self.dropped, + need_drop: self.need_drop.clone(), + } + } + } + + impl Drop for CheckedCloneDrop { + fn drop(&mut self) { + if self.dropped { + panic!("double drop"); + } + self.dropped = true; + } + } + + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + let mut table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + + for (idx, panic_in_clone) in core::iter::repeat_n(DISARMED, 7).enumerate() { + let idx = idx as u64; + table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx], + }, + ), + |(k, _)| *k, + ); + } + + assert_eq!(table.len(), 7); + + thread::scope(|s| { + let result = s.spawn(|| { + let armed_flags = [ + DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED, + ]; + let mut scope_table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + for (idx, &panic_in_clone) in armed_flags.iter().enumerate() { + let idx = idx as u64; + scope_table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx + 100], + }, + ), + |(k, _)| *k, + ); + } + table.clone_from(&scope_table); + }); + assert!(result.join().is_err()); + }); + + // Let's check that all iterators work fine and do not return elements + // (especially `RawIterRange`, which does not depend on the number of + // elements in the table, but looks directly at the control bytes) + // + // SAFETY: We know for sure that `RawTable` will outlive + // the returned `RawIter / RawIterRange` iterator. + assert_eq!(table.len(), 0); + assert_eq!(unsafe { table.iter().count() }, 0); + assert_eq!(unsafe { table.iter().iter.count() }, 0); + + for idx in 0..table.num_buckets() { + let idx = idx as u64; + assert!( + table + .find(idx, |(k, _)| Ok::<_, ()>(*k == idx)) + .unwrap() + .is_none(), + "Index: {idx}" + ); + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 1); + } +} diff --git a/src/hashbrown/scopeguard.rs b/src/hashbrown/scopeguard.rs new file mode 100644 index 0000000..26532b8 --- /dev/null +++ b/src/hashbrown/scopeguard.rs @@ -0,0 +1,72 @@ +// Extracted from the scopeguard crate +use core::{ + mem::ManuallyDrop, + ops::{Deref, DerefMut}, + ptr, +}; + +pub(crate) struct ScopeGuard +where + F: FnMut(&mut T), +{ + dropfn: F, + value: T, +} + +#[inline] +pub(crate) fn guard(value: T, dropfn: F) -> ScopeGuard +where + F: FnMut(&mut T), +{ + ScopeGuard { dropfn, value } +} + +impl ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + pub(crate) fn into_inner(guard: Self) -> T { + // Cannot move out of Drop-implementing types, so + // ptr::read the value out of a ManuallyDrop + // Don't use mem::forget as that might invalidate value + let guard = ManuallyDrop::new(guard); + unsafe { + let value = ptr::read(&raw const guard.value); + // read the closure so that it is dropped + let _ = ptr::read(&raw const guard.dropfn); + value + } + } +} + +impl Deref for ScopeGuard +where + F: FnMut(&mut T), +{ + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} + +impl Drop for ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + fn drop(&mut self) { + (self.dropfn)(&mut self.value); + } +} diff --git a/src/hashbrown/util.rs b/src/hashbrown/util.rs new file mode 100644 index 0000000..880913b --- /dev/null +++ b/src/hashbrown/util.rs @@ -0,0 +1,9 @@ +pub(crate) use std::hint::likely; +pub(crate) use std::hint::unlikely; + +// FIXME: use strict provenance functions once they are stable. +// Implement it with a transmute for now. +#[inline(always)] +pub(crate) fn invalid_mut(addr: usize) -> *mut T { + unsafe { core::mem::transmute(addr) } +} diff --git a/src/internal/alias.rs b/src/internal/alias.rs new file mode 100644 index 0000000..3694619 --- /dev/null +++ b/src/internal/alias.rs @@ -0,0 +1,15 @@ +//! There are type aliases that are used across the library + +/// Type alias for `pyo3::Py` +pub type PyObject = pyo3::Py; + +/// Type alias for `pyo3::Bound<'a, pyo3::PyAny>` +pub type BoundObject<'a> = pyo3::Bound<'a, pyo3::PyAny>; + +/// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyTuple>`. +/// Use it directly as `*args` argument type. +pub type ArgsType<'a> = &'a pyo3::Bound<'a, pyo3::types::PyTuple>; + +/// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyDict>`. +/// Use it directly as `**kwds` argument type. +pub type KwdsType<'a> = &'a pyo3::Bound<'a, pyo3::types::PyDict>; diff --git a/src/internal/lazyheap.rs b/src/internal/lazyheap.rs new file mode 100644 index 0000000..7317c1b --- /dev/null +++ b/src/internal/lazyheap.rs @@ -0,0 +1,350 @@ +use std::ptr::NonNull; + +use crate::internal::utils; + +/// A collection that defers sorting until an ordered operation is requested. +/// +/// Unlike a classic binary heap, `LazyHeap` does not maintain a heap +/// invariant after every insertion. Instead it tracks a dirty flag and +/// re-sorts the entire backing buffer the first time an ordered operation is +/// needed. This amortises well when many insertions occur before any removal, +/// because one `O(n log n)` sort is cheaper than repeated `O(log n)` sift-ups. +/// +/// # Ownership model +/// `LazyHeap` is the **sole owner** of every element it holds. Cursors are +/// purely non-owning handles and must never be used to free the backing +/// allocation. +pub struct LazyHeap { + data: std::collections::VecDeque>, + is_sorted: bool, + _marker: std::marker::PhantomData>, +} + +impl LazyHeap { + /// Pops and owns the front allocation. Does **not** sort. + #[inline] + fn unlink_front(&mut self) -> Option { + let ptr = self.data.pop_front()?; + // SAFETY: LazyHeap owns the sole Box for every pointer it stores. + Some(*unsafe { Box::from_raw(ptr.as_ptr()) }) + } + + /// Pops and owns the back allocation. Does **not** sort. + #[inline] + fn unlink_back(&mut self) -> Option { + let ptr = self.data.pop_back()?; + // SAFETY: LazyHeap owns the sole Box for every pointer it stores. + Some(*unsafe { Box::from_raw(ptr.as_ptr()) }) + } +} + +impl LazyHeap { + /// Creates a new, empty `LazyHeap`. + pub fn new() -> Self { + Self { + data: std::collections::VecDeque::new(), + is_sorted: true, + _marker: std::marker::PhantomData, + } + } + + /// Returns the number of elements in the heap. + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// Returns `true` if the heap contains no elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Inserts `value` into the heap and returns a [`Cursor`] to it. + /// + /// The returned cursor is **non-owning**. Store it in an external structure + /// (e.g. a `hashbrown::RawTable`) for later removal via [`remove`](Self::remove). + /// Never reconstruct a `Box` from it. + /// + /// This call marks the heap as unsorted; the next ordered operation + /// triggers a full sort. + /// + /// # Complexity + /// Amortised O(1). + #[inline] + pub fn push(&mut self, value: T) -> Cursor { + // SAFETY: Box::into_raw is guaranteed non-null. + let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; + self.data.push_back(ptr); + self.is_sorted = false; + Cursor(ptr) + } + + /// Marks the heap's order as invalid without re-sorting immediately. + /// + /// Call this after mutating an element's sort key through [`Cursor::as_mut`]. + /// The next ordered operation will then re-sort before proceeding. + #[inline] + pub fn mark_unsorted(&mut self) { + self.is_sorted = false; + } + + /// Sorts the backing buffer with `compare` if it is not already sorted and + /// then returns `true`. + /// + /// All ordered operations call this automatically. You can call it + /// manually to amortise the sort cost before a batch of [`front`](Self::front) / + /// [`get`](Self::get) accesses. + /// + /// # Complexity + /// O(n log n) when unsorted; O(1) when already sorted. + #[inline] + pub fn sort_by(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> bool { + if self.is_sorted { + return false; + } + if self.data.len() > 1 { + // SAFETY: every pointer in `self.data` is a live, heap-owned allocation. + unsafe { + self.data + .make_contiguous() + .sort_by(|a, b| compare(a.as_ref(), b.as_ref())); + } + } + self.is_sorted = true; + true + } + + /// Returns a cursor to the smallest (front) element without removing it, + /// or `None` if the heap is empty. + /// + /// Sorts the heap first if necessary. + #[inline] + pub fn front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option> { + self.sort_by(compare); + self.data.front().copied().map(Cursor) + } + + /// Returns a cursor to the largest (back) element without removing it, + /// or `None` if the heap is empty. + /// + /// Sorts the heap first if necessary. + #[inline] + pub fn back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option> { + self.sort_by(compare); + self.data.back().copied().map(Cursor) + } + + /// Returns a cursor to the element at position `index`, or `None` if out + /// of bounds. + /// + /// The index is only meaningful after the heap has been sorted — consider + /// calling [`sort_by`](Self::sort_by) first. + #[inline] + pub fn get(&self, index: usize) -> Option> { + self.data.get(index).copied().map(Cursor) + } + + /// Removes and returns the smallest (front) element, or `None` if empty. + /// + /// Sorts the heap first if necessary. + /// + /// # Complexity + /// O(n log n) when unsorted; O(n) when already sorted (front removal from + /// a `VecDeque` shifts elements). + #[inline] + pub fn pop_front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_front() + } + + /// Removes and returns the largest (back) element, or `None` if empty. + /// + /// Sorts the heap first if necessary. + /// + /// # Complexity + /// O(n log n) when unsorted; O(1) when already sorted. + #[inline] + pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_back() + } + + /// Removes and returns the element identified by `cursor`. + /// + /// Sorts the heap first if necessary, then performs a linear scan to + /// locate the element by pointer identity. + /// + /// # Complexity + /// O(n log n) when unsorted; O(n) when already sorted. + pub fn remove( + &mut self, + cursor: Cursor, + compare: impl Fn(&T, &T) -> std::cmp::Ordering, + ) -> T { + debug_assert!(!self.data.is_empty()); + + // Fast path: single element — no need to sort or scan. + if self.data.len() == 1 { + return self.unlink_back().unwrap(); + } + + self.sort_by(compare); + + let index = self + .data + .iter() + .position(|ptr| cursor.0 == *ptr) + .expect("cursor does not belong to this LazyHeap"); + + // SAFETY: `index` was just returned by `position`, so it is in bounds. + // LazyHeap holds the sole Box for this pointer; the cursor is non-owning. + let ptr = unsafe { self.data.remove(index).unwrap_unchecked() }; + *unsafe { Box::from_raw(ptr.as_ptr()) } + } + + /// Returns an iterator that yields a [`Cursor`] for each element in sorted + /// order. + /// + /// Sorts the heap first if necessary. The returned [`Iter`] holds raw + /// pointers into the backing buffer; do not mutate or drop the heap while + /// it is alive. + #[inline] + pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> RawIter { + self.sort_by(compare); + let (a, b) = self.data.as_slices(); + RawIter { + first: utils::RawSliceIter::new(a), + second: utils::RawSliceIter::new(b), + } + } + + /// Removes all elements, dropping each one. + /// + /// The heap is empty and considered sorted after this call. + #[inline] + pub fn clear(&mut self) { + while self.unlink_back().is_some() {} + self.is_sorted = true; + } + + /// Shrinks the backing buffer's capacity as close to its current length + /// as possible. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.data.shrink_to_fit(); + } +} + +impl Default for LazyHeap { + fn default() -> Self { + Self::new() + } +} + +unsafe impl<#[may_dangle] T> Drop for LazyHeap { + fn drop(&mut self) { + struct DropGuard<'a, T>(&'a mut LazyHeap); + + impl<'a, T> Drop for DropGuard<'a, T> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.unlink_back().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.unlink_back().is_some() {} + std::mem::forget(guard); + } +} + +/// A non-owning, pointer-sized handle to an element stored in a [`LazyHeap`]. +/// +/// Think of `Cursor` as a stable address you can cache in an external data +/// structure (e.g. `hashbrown::raw::RawTable`) and later hand back to +/// [`LazyHeap::remove`] for cheap lookup and removal. It carries **no +/// ownership**: every allocation is owned exclusively by the heap that +/// produced the cursor. +/// +/// Using a stale cursor is undefined behaviour. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct Cursor(NonNull); + +impl Cursor { + /// Returns a shared reference to the value this cursor points to. + /// + /// # Safety + /// The cursor must be valid (see the [type-level docs](Self)). + #[inline] + pub unsafe fn element(&self) -> &T { + self.0.as_ref() + } + + /// Returns a mutable reference to the value this cursor points to. + /// + /// If the mutation changes any field that affects sort order, you **must** + /// call [`LazyHeap::invalidate`] afterwards so the heap re-sorts before + /// the next ordered operation. + /// + /// # Safety + /// - The cursor must be valid (see the [type-level docs](Self)). + /// - No other reference to the same element may be alive simultaneously. + #[inline] + pub unsafe fn element_mut(&mut self) -> &mut T { + self.0.as_mut() + } + + /// Returns the raw pointer underlying this cursor. + /// + /// Prefer [`as_ref`](Self::as_ref) or [`as_mut`](Self::as_mut) for + /// element access. This exists for interoperability with APIs that require + /// a raw pointer (e.g. hashing into a `RawTable` by address). + /// + /// **Never** reconstruct a `Box` from this pointer — doing so transfers + /// ownership out of the heap and causes a double-free. + #[inline] + pub fn as_ptr(&self) -> *mut T { + self.0.as_ptr() + } +} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawIter { + first: utils::RawSliceIter>, + second: utils::RawSliceIter>, +} + +impl Iterator for RawIter { + type Item = Cursor; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some( + // SAFETY: `val` is a valid `NonNull>` pointing into the + // first slice of the `VecDeque`. The pointee is `Copy` and remains + // valid as long as the `VecDeque` is alive and unmodified, which the + // caller is required to uphold per this type's safety contract. + Cursor(unsafe { val.read() }), + ), + None => { + std::mem::swap(&mut self.first, &mut self.second); + // SAFETY: same as above. + self.first.next().map(|val| Cursor(unsafe { val.read() })) + } + } + } +} + +unsafe impl Send for LazyHeap {} +unsafe impl Sync for LazyHeap {} +unsafe impl Send for RawIter {} +unsafe impl Sync for RawIter {} +unsafe impl Send for Cursor {} +unsafe impl Sync for Cursor {} diff --git a/src/internal/linked_list.rs b/src/internal/linked_list.rs new file mode 100644 index 0000000..fc48941 --- /dev/null +++ b/src/internal/linked_list.rs @@ -0,0 +1,455 @@ +use std::marker::PhantomData; +use std::mem; +use std::ptr::NonNull; + +/// [`LinkedList`]'s node +pub struct Node { + next: Option>>, + prev: Option>>, + element: T, +} + +impl Node { + fn new(element: T) -> Self { + Node { + next: None, + prev: None, + element, + } + } + + #[allow(clippy::boxed_local)] + fn into_element(self: Box) -> T { + self.element + } + + pub fn element(&self) -> &T { + &self.element + } +} + +/// A doubly-linked list with owned nodes. +/// +/// The `LinkedList` allows pushing and popping elements at either end +/// in constant time. +pub struct LinkedList { + head: Option>>, + tail: Option>>, + len: usize, + _marker: PhantomData>>, +} + +// private methods +impl LinkedList { + /// Adds the given node to the front of the list. + /// + /// # Safety + /// `node` must point to a valid node that was boxed and leaked using the list's allocator. + /// This method takes ownership of the node, so the pointer should not be used again. + #[inline] + unsafe fn push_front_node(&mut self, node: NonNull>) { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + unsafe { + (*node.as_ptr()).next = self.head; + (*node.as_ptr()).prev = None; + let node = Some(node); + + match self.head { + None => self.tail = node, + // Not creating new mutable (unique!) references overlapping `element`. + Some(head) => (*head.as_ptr()).prev = node, + } + + self.head = node; + self.len += 1; + } + } + + /// Removes and returns the node at the front of the list. + #[inline] + fn pop_front_node(&mut self) -> Option>> { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + self.head.map(|node| unsafe { + let node = Box::from_raw(node.as_ptr()); + self.head = node.next; + + match self.head { + None => self.tail = None, + // Not creating new mutable (unique!) references overlapping `element`. + Some(head) => (*head.as_ptr()).prev = None, + } + + self.len -= 1; + node + }) + } + + /// Adds the given node to the back of the list. + /// + /// # Safety + /// `node` must point to a valid node that was boxed and leaked using the list's allocator. + /// This method takes ownership of the node, so the pointer should not be used again. + #[inline] + unsafe fn push_back_node(&mut self, node: NonNull>) { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + unsafe { + (*node.as_ptr()).next = None; + (*node.as_ptr()).prev = self.tail; + let node = Some(node); + + match self.tail { + None => self.head = node, + // Not creating new mutable (unique!) references overlapping `element`. + Some(tail) => (*tail.as_ptr()).next = node, + } + + self.tail = node; + self.len += 1; + } + } + + /// Removes and returns the node at the back of the list. + #[inline] + fn pop_back_node(&mut self) -> Option>> { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + self.tail.map(|node| unsafe { + let node = Box::from_raw(node.as_ptr()); + self.tail = node.prev; + + match self.tail { + None => self.head = None, + // Not creating new mutable (unique!) references overlapping `element`. + Some(tail) => (*tail.as_ptr()).next = None, + } + + self.len -= 1; + node + }) + } + + /// Unlinks the specified node from the current list. + /// + /// Warning: this will not check that the provided node belongs to the current list. + /// + /// This method takes care not to create mutable references to `element`, to + /// maintain validity of aliasing pointers. + #[inline] + unsafe fn unlink_node(&mut self, mut node: NonNull>) { + let node = unsafe { node.as_mut() }; // this one is ours now, we can create an &mut. + + // Not creating new mutable (unique!) references overlapping `element`. + match node.prev { + Some(prev) => unsafe { (*prev.as_ptr()).next = node.next }, + // this node is the head node + None => self.head = node.next, + }; + + match node.next { + Some(next) => unsafe { (*next.as_ptr()).prev = node.prev }, + // this node is the tail node + None => self.tail = node.prev, + }; + + self.len -= 1; + } + + /// Unlinks the specified node from the current list and returns the item. + /// + /// # Safety + /// This will not check that the provided node belongs to the current list. + unsafe fn remove_node(&mut self, node: NonNull>) -> T { + unsafe { + self.unlink_node(node); + let node = Box::from_raw(node.as_ptr()); + node.element + } + } +} + +impl Default for LinkedList { + /// Creates an empty `LinkedList`. + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl LinkedList { + /// Creates an empty `LinkedList`. + #[inline] + #[must_use] + pub const fn new() -> Self { + LinkedList { + head: None, + tail: None, + len: 0, + _marker: PhantomData, + } + } + + /// Returns `true` if the `LinkedList` is empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.head.is_none() + } + + /// Returns the length of the `LinkedList`. + /// + /// This operation should compute in *O*(1) time. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.len + } + + /// Removes all elements from the `LinkedList`. + /// + /// This operation should compute in *O*(*n*) time. + #[inline] + pub fn clear(&mut self) { + drop(LinkedList { + head: self.head.take(), + tail: self.tail.take(), + len: mem::take(&mut self.len), + _marker: PhantomData, + }); + } + + /// Returns a [`Cursor`] to the front node, or `None` if the list is empty. + #[inline] + #[must_use] + pub fn cursor_front(&self) -> Option> { + self.head.map(Cursor::new) + } + + /// Returns a [`Cursor`] to the back node, or `None` if the list is empty. + #[inline] + #[must_use] + pub fn cursor_back(&self) -> Option> { + self.tail.map(Cursor::new) + } + + /// Adds an element to the front of the list and returns a [`Cursor`] to it. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn push_front(&mut self, elt: T) -> Cursor { + let node = Box::new(Node::new(elt)); + let node_ptr = NonNull::from(Box::leak(node)); + + // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked + unsafe { + self.push_front_node(node_ptr); + } + Cursor::new(node_ptr) + } + + /// Removes the first element and returns it, or `None` if the list is + /// empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn pop_front(&mut self) -> Option { + self.pop_front_node().map(Node::into_element) + } + + /// Adds an element to the back of the list and returns a [`Cursor`] to it. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn push_back(&mut self, elt: T) -> Cursor { + let node = Box::new(Node::new(elt)); + let node_ptr = NonNull::from(Box::leak(node)); + + // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked + unsafe { + self.push_back_node(node_ptr); + } + Cursor::new(node_ptr) + } + + /// Removes the last element from a list and returns it, or `None` if + /// it is empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn pop_back(&mut self) -> Option { + self.pop_back_node().map(Node::into_element) + } + + /// Returns a raw, lifetime-free iterator over the nodes of a LinkedList. + /// + /// # Safety + /// The iterator must not outlive the list it was created from, and the list must not be structurally modified. + pub unsafe fn iter(&self) -> RawIter { + RawIter { + head: self.head, + len: self.len, + } + } +} + +unsafe impl<#[may_dangle] T> Drop for LinkedList { + fn drop(&mut self) { + struct DropGuard<'a, T>(&'a mut LinkedList); + + impl<'a, T> Drop for DropGuard<'a, T> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.pop_front_node().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.pop_front_node().is_some() {} + mem::forget(guard); + } +} + +/// An opaque handle to a node in a [`LinkedList`]. +/// +/// Obtained via [`LinkedList::push_front`], [`LinkedList::push_back`], +/// [`LinkedList::cursor_front`], or [`LinkedList::cursor_back`]. +/// +/// `Cursor` is `Copy`; cloning or copying it produces a second handle to the +/// *same* node. Two cursors compare equal iff they point at the same node. +/// +/// # Safety invariant +/// Every `unsafe` method on `Cursor` requires that: +/// - the cursor was obtained from the list it is passed to, **and** +/// - the node has not yet been removed from that list. +/// +/// Violating either condition is undefined behaviour. +#[repr(transparent)] +pub struct Cursor(NonNull>); + +// `NonNull>` is just a pointer; copying it is always safe. +impl Clone for Cursor { + #[inline] + fn clone(&self) -> Self { + *self + } +} +impl Copy for Cursor {} + +// Pointer equality: two cursors are equal if they point at the same node. +impl PartialEq for Cursor { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} +impl Eq for Cursor {} + +impl Cursor { + #[inline] + fn new(node: NonNull>) -> Self { + Cursor(node) + } + + /// Returns a shared reference to the element this cursor points to. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + /// The returned reference borrows for `'a`, which the caller must + /// ensure does not outlive the node or the list. + #[inline] + pub unsafe fn element<'a>(&self) -> &'a T { + &(*self.0.as_ptr()).element + } + + /// Returns a mutable reference to the element this cursor points to. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + /// In addition, no other reference to this element may exist for the + /// duration of the returned `'a` borrow. + #[inline] + pub unsafe fn element_mut<'a>(&mut self) -> &'a mut T { + &mut (*self.0.as_ptr()).element + } + + /// Moves this node to the front of `list`. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn move_to_front(self, list: &mut LinkedList) { + list.unlink_node(self.0); + list.push_front_node(self.0); + } + + /// Moves this node to the back of `list`. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn move_to_back(self, list: &mut LinkedList) { + list.unlink_node(self.0); + list.push_back_node(self.0); + } + + /// Unlinks this node from `list` and returns its element. + /// + /// Consumes the cursor so it cannot be used after removal. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn unlink(self, list: &mut LinkedList) -> T { + list.remove_node(self.0) + } +} + +/// A raw, lifetime-free iterator over the nodes of a [`LinkedList`]. +/// +/// Yields a [`Cursor`] for each node, from front to back. +/// +/// Obtained via [`LinkedList::iter`]. +/// +/// # Safety invariant +/// The iterator must not outlive the list it was created from, and the list +/// must not be structurally modified (nodes added or removed) while iterating. +/// Violating either condition is undefined behaviour. +pub struct RawIter { + head: Option>>, + len: usize, +} + +impl Iterator for RawIter { + type Item = Cursor; + + #[inline] + fn next(&mut self) -> Option> { + if self.len == 0 { + return None; + } + self.head.map(|node| { + self.len -= 1; + // SAFETY: node is a valid, live pointer for as long as the list lives. + self.head = unsafe { (*node.as_ptr()).next }; + Cursor::new(node) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.len, Some(self.len)) + } +} + +unsafe impl Send for LinkedList {} +unsafe impl Sync for LinkedList {} +unsafe impl Send for RawIter {} +unsafe impl Sync for RawIter {} +unsafe impl Send for Cursor {} +unsafe impl Sync for Cursor {} diff --git a/src/internal/mod.rs b/src/internal/mod.rs new file mode 100644 index 0000000..937b4df --- /dev/null +++ b/src/internal/mod.rs @@ -0,0 +1,6 @@ +pub mod alias; +pub mod lazyheap; +pub mod linked_list; +pub mod onceinit; +pub mod pickle; +pub mod utils; diff --git a/src/internal/onceinit.rs b/src/internal/onceinit.rs new file mode 100644 index 0000000..a365c2b --- /dev/null +++ b/src/internal/onceinit.rs @@ -0,0 +1,158 @@ +//! According to PyO3 updates, we can write `__init__` methods inside the Rust, which allows developers +//! to use classes as subclass in Python. +//! +//! All of classes must implement `__new__` and `__init__` methods. +//! - In `__new__` methods, we should allocate memory for the type; +//! - And in `__init__` methods, we should initialize and constrcut the type, according to parameters. +//! +//! There are types that help us to create these methods completely thread-safe. + +use std::cell; +use std::mem; +use std::sync::atomic; +use std::sync::Arc; + +const UNINIT: u8 = 0; +const RUNNING: u8 = 1; +const INIT: u8 = 2; + +pub struct OnceInitInner { + /// Tracks the lifecycle of the inner value: + /// `UNINIT` → `RUNNING` (mid-write) → `INIT` (ready). + state: atomic::AtomicU8, + /// Heap-allocated storage that is uninitialized until [`set`](OnceInit::set) completes. + /// Wrapped in a [`std::sync::Mutex`] so that post-init access is safe across threads. + value: cell::UnsafeCell>, +} + +/// A thread-safe, write-once container for PyO3 `__new__` / `__init__` two-phase construction. +/// +/// PyO3 splits Python object creation into two steps: +/// - `__new__` allocates the Rust-side storage (calls [`OnceInit::uninit`]), +/// - `__init__` fills it in exactly once (calls [`OnceInit::set`]). +/// +/// After initialisation the inner value is accessible through a [`std::sync::MutexGuard`] +/// via [`OnceInit::lock`], which is safe to call from multiple threads simultaneously. +#[repr(transparent)] +pub struct OnceInit(Arc>); + +impl OnceInit { + /// Creates a new, **uninitialized** [`OnceInit`]. + /// + /// Intended to be called from the PyO3 `__new__` handler to allocate the + /// object slot before Python passes arguments to `__init__`. + /// + /// The returned value must not be accessed via [`lock`](Self::lock) + /// until [`set`](Self::set) has been called. + #[inline] + pub fn uninit() -> Self { + OnceInitInner { + state: atomic::AtomicU8::new(UNINIT), + value: cell::UnsafeCell::new(mem::MaybeUninit::uninit()), + } + .into() + } + + /// Creates a new **initialized** [`OnceInit`]. + #[inline] + pub fn new(val: T) -> Self { + OnceInitInner { + state: atomic::AtomicU8::new(INIT), + value: cell::UnsafeCell::new(mem::MaybeUninit::new(val)), + } + .into() + } + + #[inline] + pub fn is_initialized(&self) -> bool { + self.0.state.load(atomic::Ordering::Acquire) == INIT + } + + /// Initializes the container with `val`, transitioning state from `UNINIT` to `INIT`. + /// + /// Intended to be called from the PyO3 `__init__` handler once the Python-side + /// arguments have been validated and the Rust value can be constructed. + /// + /// # Panics + /// + /// Panics if `set` has already been called on this instance. + #[inline] + pub fn set(&self, val: T) { + if self + .0 + .state + .compare_exchange( + UNINIT, + RUNNING, + atomic::Ordering::Acquire, + atomic::Ordering::Relaxed, + ) + .is_err() + { + already_init_panic(); + } + // SAFETY: we own the RUNNING token — no other thread can write value. + unsafe { (*self.0.value.get()).write(val) }; + self.0.state.store(INIT, atomic::Ordering::Release); + } + + /// Returns an immutable reference to initialized value. + /// + /// # Panics + /// + /// Panics if called before [`set`](Self::set) has completed. + #[inline] + pub fn get(&self) -> &T { + if std::hint::likely(self.0.state.load(atomic::Ordering::Acquire) == INIT) { + // SAFETY: state == INIT guarantees `value` was fully written and is valid. + unsafe { (*self.0.value.get()).assume_init_ref() } + } else { + not_init_panic() + } + } +} + +impl Clone for OnceInit { + fn clone(&self) -> Self { + Self(Arc::clone(&self.0)) + } +} + +impl From> for OnceInit { + fn from(value: OnceInitInner) -> Self { + Self(Arc::new(value)) + } +} + +// SAFETY: Mutex is Send+Sync when T: Send; we uphold the init invariant ourselves. +unsafe impl Send for OnceInit {} +unsafe impl Sync for OnceInit {} + +impl Drop for OnceInit { + /// Drops the inner value if and only if [`set`](OnceInit::set) was called. + /// + /// Checks the state flag without any atomic synchronisation since `drop` + /// requires `&mut self`, guaranteeing exclusive access. + fn drop(&mut self) { + if unsafe { *self.0.state.as_ptr() == INIT } { + // SAFETY: state == INIT means value was written and not yet dropped. + unsafe { (*self.0.value.get()).assume_init_drop() } + } + } +} + +/// Marked `#[cold]` and `#[inline(never)]` so it is compiled as a separate, +/// rarely-executed stub and does not bloat the hot path of [`lock`](OnceInit::lock). +#[cold] +#[inline(never)] +fn not_init_panic() -> ! { + panic!("Object not initialized (__init__ not called)") +} + +/// Marked `#[cold]` and `#[inline(never)]` so it is compiled as a separate, +/// rarely-executed stub and does not bloat the hot path of [`set`](OnceInit::set). +#[cold] +#[inline(never)] +fn already_init_panic() -> ! { + panic!("Object already initialized") +} diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs new file mode 100644 index 0000000..07d8a85 --- /dev/null +++ b/src/internal/pickle.rs @@ -0,0 +1,621 @@ +use std::ptr; + +use pyo3::IntoPyObject; + +use crate::internal::alias; + +pub enum PyPickleVal<'a> { + Owned(alias::PyObject), + Borrowed(&'a alias::PyObject), + Str(&'a str), + UnsignedBig(u128), + Unsigned(usize), + Signed(isize), + Float(f64), + Bool(bool), + None, +} + +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: usize) -> Self { + PyPickleVal::Unsigned(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: u128) -> Self { + PyPickleVal::UnsignedBig(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: isize) -> Self { + PyPickleVal::Signed(v) + } +} +impl From for PyPickleVal<'static> { + fn from(v: f64) -> Self { + PyPickleVal::Float(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: std::time::Duration) -> Self { + v.as_secs_f64().into() + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: bool) -> Self { + PyPickleVal::Bool(v) + } +} +impl<'a> From<&'a str> for PyPickleVal<'a> { + #[inline] + fn from(v: &'a str) -> Self { + PyPickleVal::Str(v) + } +} +impl<'a> From<&'a alias::PyObject> for PyPickleVal<'a> { + #[inline] + fn from(v: &'a alias::PyObject) -> Self { + PyPickleVal::Borrowed(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: alias::PyObject) -> Self { + PyPickleVal::Owned(v) + } +} +impl<'a, I> From> for PyPickleVal<'a> +where + I: Into>, +{ + #[inline] + fn from(value: Option) -> Self { + match value { + Some(x) => x.into(), + None => Self::None, + } + } +} + +// private methods +impl<'a> PyPickleVal<'a> { + /// Allocate a fresh owned Python object. + /// + /// # Safety + /// The caller is responsible for exactly one `Py_DECREF` (or transferring ownership to a container). + unsafe fn into_raw(self, py: pyo3::Python<'_>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let ptr = match self { + Self::Owned(v) => v.into_ptr(), + Self::Borrowed(v) => { + let ptr = v.as_ptr(); + pyo3::ffi::Py_INCREF(ptr); + ptr + } + Self::UnsignedBig(v) => v.into_pyobject(py)?.into_ptr(), + Self::Unsigned(v) => pyo3::ffi::PyLong_FromSize_t(v), + Self::Signed(v) => pyo3::ffi::PyLong_FromSsize_t(v), + Self::Float(v) => pyo3::ffi::PyFloat_FromDouble(v), + Self::Bool(v) => { + // Py_True / Py_False are singletons; INCREF to hand out our own ref. + let raw = if v { + pyo3::ffi::Py_True() + } else { + pyo3::ffi::Py_False() + }; + pyo3::ffi::Py_INCREF(raw); + raw + } + Self::Str(v) => pyo3::ffi::PyUnicode_FromStringAndSize( + v.as_ptr() as *const std::os::raw::c_char, + v.len() as isize, + ), + Self::None => { + let none = pyo3::ffi::Py_None(); + pyo3::ffi::Py_INCREF(none); + none + } + }; + + if ptr.is_null() { + Err(pyo3::PyErr::fetch(py)) + } else { + Ok(ptr) + } + } +} + +/// A finalised pickle state - an immutable wrapper around a Python tuple. +/// +/// Construct with [`Pickle::builder`]. +#[repr(transparent)] +pub struct Pickle(alias::PyObject); + +impl Pickle { + /// Begin building a top-level pickle tuple with exactly `size` slots. + #[inline] + pub fn builder<'py>(py: pyo3::Python<'py>, size: usize) -> pyo3::PyResult> { + PickleBuilder::new(py, size) + } + + /// Borrow the inner [`alias::PyObject`] without consuming `self`. + #[inline] + pub fn as_object(&self) -> &alias::PyObject { + &self.0 + } +} + +impl std::ops::Deref for Pickle { + type Target = alias::PyObject; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef for Pickle { + #[inline] + fn as_ref(&self) -> &alias::PyObject { + &self.0 + } +} + +impl From for alias::PyObject { + #[inline] + fn from(v: Pickle) -> Self { + v.0 + } +} + +mod sealed { + /// Accepts a single raw owned pointer from a finished child builder. + pub trait Receive { + /// # Safety + /// `item` must have refcount == 1; ownership is fully transferred. + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()>; + } +} + +pub trait Builder: Sized + sealed::Receive { + fn py(&self) -> pyo3::Python<'_>; + + fn push<'a, V: Into>>(&mut self, val: V) -> pyo3::PyResult<&mut Self> { + let raw = unsafe { val.into().into_raw(self.py())? }; + unsafe { + self.receive(raw)?; + } + + Ok(self) + } + + fn begin_tuple<'a>(&'a mut self, size: usize) -> pyo3::PyResult> { + TupleBuilder::new(self, size) + } + + fn begin_list<'a>(&'a mut self) -> pyo3::PyResult> { + ListBuilder::new(self) + } + + fn begin_dict<'a>(&'a mut self) -> pyo3::PyResult> { + DictBuilder::new(self) + } +} + +/// Builds the top-level Python tuple that represents a pickle state. +/// +/// All slots **must** be filled before calling [`finish`](PickleBuilder::finish). +/// In debug builds an assertion verifies this; the tuple is otherwise valid but +/// partially initialised (CPython represents unfilled slots as `NULL`). +/// +/// If the builder is dropped before `finish` is called, the partially-built +/// tuple is correctly decreffed and all already-inserted items are released. +pub struct PickleBuilder<'py> { + py: pyo3::Python<'py>, + inner: Option>, + size: isize, + current: isize, +} + +impl<'py> PickleBuilder<'py> { + fn new(py: pyo3::Python<'py>, size: usize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size as isize) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + py, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size: size as isize, + current: 0, + }) + } + + pub fn finish(mut self) -> Pickle { + debug_assert_eq!( + self.current, + self.size, + "PickleBuilder::finish: {} unfilled slot(s)", + self.size - self.current + ); + let ptr = self.inner.take().expect("already consumed").as_ptr(); + Pickle(unsafe { pyo3::Bound::from_owned_ptr(self.py, ptr) }.unbind()) + } +} + +impl sealed::Receive for PickleBuilder<'_> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + debug_assert!( + self.current < self.size, + "PickleBuilder: pushed more items than `size`" + ); + let ptr = self.inner.expect("PickleBuilder already consumed").as_ptr(); + if pyo3::ffi::PyTuple_SetItem(ptr, self.current, item) != 0 { + // item was already decreffed by PyTuple_SetItem on failure + return Err(pyo3::PyErr::fetch(self.py)); + } + self.current += 1; + Ok(()) + } +} + +impl<'py> Builder for PickleBuilder<'py> { + #[inline] + fn py(&self) -> pyo3::Python<'py> { + self.py + } +} + +impl Drop for PickleBuilder<'_> { + fn drop(&mut self) { + // Releases the tuple and all items already inserted into it. + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +pub struct TupleBuilder<'a, P: Builder> { + parent: &'a mut P, + inner: Option>, + size: isize, + current: isize, +} + +impl<'a, P: Builder> TupleBuilder<'a, P> { + fn new(parent: &'a mut P, size: usize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size as isize) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(parent.py())); + } + + Ok(Self { + parent, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size: size as isize, + current: 0, + }) + } + + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + debug_assert_eq!( + self.current, + self.size, + "TupleBuilder::end: {} unfilled slot(s)", + self.size - self.current + ); + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; + } + Ok(()) + } +} + +impl sealed::Receive for TupleBuilder<'_, P> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + debug_assert!(self.current < self.size, "TupleBuilder: too many items"); + if pyo3::ffi::PyTuple_SetItem( + self.inner.expect("already consumed").as_ptr(), + self.current, + item, + ) != 0 + { + return Err(pyo3::PyErr::fetch(self.parent.py())); + } + self.current += 1; + Ok(()) + } +} + +impl Builder for TupleBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} + +impl Drop for TupleBuilder<'_, P> { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +pub struct ListBuilder<'a, P: Builder> { + parent: &'a mut P, + inner: Option>, +} + +impl<'a, P: Builder> ListBuilder<'a, P> { + fn new(parent: &'a mut P) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyList_New(0) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(parent.py())); + } + Ok(Self { + parent, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) + } + + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; + } + Ok(()) + } +} + +impl sealed::Receive for ListBuilder<'_, P> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + let rc = pyo3::ffi::PyList_Append(self.inner.expect("already consumed").as_ptr(), item); + pyo3::ffi::Py_DECREF(item); // PyList_Append does not steal + if rc != 0 { + Err(pyo3::PyErr::fetch(self.parent.py())) + } else { + Ok(()) + } + } +} + +impl Builder for ListBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} + +impl Drop for ListBuilder<'_, P> { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +pub struct DictBuilder<'a, P: Builder> { + parent: &'a mut P, + inner: Option>, +} + +impl<'a, P: Builder> DictBuilder<'a, P> { + fn new(parent: &'a mut P) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyDict_New() }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(parent.py())); + } + Ok(Self { + parent, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) + } + + pub fn entry<'k, 'v, K, V>(&mut self, key: K, val: V) -> pyo3::PyResult<&mut Self> + where + K: Into>, + V: Into>, + { + let kptr = unsafe { key.into().into_raw(self.parent.py())? }; + let vptr = unsafe { + match val.into().into_raw(self.parent.py()) { + Ok(v) => v, + Err(e) => { + pyo3::ffi::Py_DECREF(kptr); + return Err(e); + } + } + }; + unsafe { + self.set_kv(kptr, vptr)?; + } + Ok(self) + } + + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; + } + Ok(()) + } + + unsafe fn set_kv( + &mut self, + key: *mut pyo3::ffi::PyObject, + val: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + let rc = + pyo3::ffi::PyDict_SetItem(self.inner.expect("already consumed").as_ptr(), key, val); + pyo3::ffi::Py_DECREF(key); + pyo3::ffi::Py_DECREF(val); + if rc != 0 { + Err(pyo3::PyErr::fetch(self.parent.py())) + } else { + Ok(()) + } + } +} + +// DictBuilder also implements Builder so that begin_tuple/list/dict work +// as value-builders inside a dict value context. +impl sealed::Receive for DictBuilder<'_, P> { + #[inline] + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + pyo3::ffi::Py_DECREF(item); + Err(pyo3::exceptions::PyTypeError::new_err( + "use entry() or entry_*() to insert into a DictBuilder", + )) + } +} + +impl Builder for DictBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} + +impl Drop for DictBuilder<'_, P> { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +impl<'a, P: Builder> DictBuilder<'a, P> { + pub fn entry_tuple<'k, K, F>(&mut self, key: K, size: usize, f: F) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, + { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = TupleBuilder::new(&mut sink, size)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); + return Err(e); + } + } + }; + + unsafe { + self.set_kv(kptr, vptr)?; + } + Ok(self) + } + + pub fn entry_list<'k, K, F>(&mut self, key: K, f: F) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, + { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = ListBuilder::new(&mut sink)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); + return Err(e); + } + } + }; + unsafe { + self.set_kv(kptr, vptr)?; + } + Ok(self) + } + + pub fn entry_dict<'k, K, F>(&mut self, key: K, f: F) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, + { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = DictBuilder::new(&mut sink)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); + return Err(e); + } + } + }; + unsafe { + self.set_kv(kptr, vptr)?; + } + Ok(self) + } +} + +/// A parent that simply discards the pointer it receives. +/// Used only inside `entry_*` closures where the container +/// extracts the raw pointer directly before `end()` is called. +pub struct Sink(pyo3::Python<'static>); + +impl sealed::Receive for Sink { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + pyo3::ffi::Py_DECREF(item); + Ok(()) + } +} + +impl Builder for Sink { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.0 + } +} diff --git a/src/internal/utils.rs b/src/internal/utils.rs new file mode 100644 index 0000000..0724816 --- /dev/null +++ b/src/internal/utils.rs @@ -0,0 +1,517 @@ +use std::fmt::Write; + +use std::sync::atomic; +use std::sync::Arc; + +use crate::internal::alias; + +/// Tries to hash `arg1`. +/// +/// # Safety +/// Pointer must be valid, non-null, live Python objects. +#[inline] +pub unsafe fn pyobject_hash( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + let py_hash = pyo3::ffi::PyObject_Hash(arg1); + if std::hint::unlikely(py_hash == -1) { + // SAFETY: PyObject_Hash never returns -1 on success. + return Err(pyo3::PyErr::take(py).unwrap_unchecked()); + } + + Ok(py_hash as u64) +} + +/// Pointer-equality fast path, then Python `==`. +/// +/// # Safety +/// Both pointers must be valid, non-null, live Python objects. +#[inline] +pub unsafe fn pyobject_equal( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, + arg2: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + if std::ptr::eq(arg1, arg2) { + return Ok(true); + } + + let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); + + if boolean < 0 { + Err(pyo3::PyErr::take(py).unwrap_unchecked()) + } else { + Ok(boolean == 1) + } +} + +/// Calls a Python `getsizeof(key, value) -> int` callable via raw FFI for maximum performance. +/// +/// +/// # Errors +/// Propagates any Python exception raised by `getsizeof`, and also returns a `PyErr` if: +/// - the return value is not an integer +/// - `PyLong_AsSsize_t` returns `-1` with a live Python exception (overflow / type error) +/// +/// # Safety +/// Both pointers must be valid, non-null, live Python objects. +#[inline] +pub unsafe fn call_getsizeof( + py: pyo3::Python<'_>, + getsizeof: Option<&alias::PyObject>, + key: *mut pyo3::ffi::PyObject, + value: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + if getsizeof.is_none() { + return Ok(1); + } + + // SAFETY: + // - All three pointers are valid, live Python objects for the duration of this call. + // - `PyTuple_New(2)` + `PyTuple_SET_ITEM` is the canonical way to build a + // short-lived call tuple without going through Python's allocator twice. + // - `PyTuple_SET_ITEM` steals a reference, so we `Py_INCREF` key and value first. + // - We own `args` and decrement it after the call. + unsafe { + let getsizeof = getsizeof.unwrap_unchecked(); + + let args = pyo3::ffi::PyTuple_New(2); + if args.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + // PyTuple_SetItem steals the reference, so we need to increment first. + pyo3::ffi::Py_INCREF(key); + pyo3::ffi::Py_INCREF(value); + pyo3::ffi::PyTuple_SetItem(args, 0, key); + pyo3::ffi::PyTuple_SetItem(args, 1, value); + + let result = pyo3::ffi::PyObject_Call(getsizeof.as_ptr(), args, std::ptr::null_mut()); + pyo3::ffi::Py_DECREF(args); + + if result.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + // PyLong_AsSsize_t returns -1 on error. + // It never allocates and is the fastest int extraction path. + let size = pyo3::ffi::PyLong_AsSsize_t(result); + pyo3::ffi::Py_DECREF(result); + + if size == -1 { + if let Some(err) = pyo3::PyErr::take(py) { + return Err(err); + } + } + + Ok(size as usize) + } +} + +/// Formats an iterator of key-value pairs into a string representation. +/// +/// Very useful for implementing `__repr__` methods. +#[inline(never)] +pub fn items_to_str(items: I, length: usize) -> Result +where + K: std::fmt::Debug, + V: std::fmt::Debug, + I: IntoIterator, +{ + const EDGE: usize = 50; + const LIMIT: usize = EDGE * 2; + + let mut out = String::with_capacity(64 + length.min(LIMIT) * 16); + out.write_char('{')?; + + // Fast path + if length <= LIMIT { + for (i, (k, v)) in items.into_iter().enumerate() { + if i > 0 { + out.write_str(", ")?; + } + + write!(out, "{k:?}:{v:?}")?; + } + out.write_char('}')?; + + return Ok(out); + } + + let mut iter = items.into_iter(); + + for i in 0..EDGE { + if let Some((k, v)) = iter.next() { + if i > 0 { + out.write_str(", ")?; + } + write!(out, "{k:?}:{v:?}")?; + } + } + + let mut ring: Vec<(K, V)> = Vec::with_capacity(EDGE); + let mut head: usize = 0; + + for item in iter { + if ring.len() < EDGE { + ring.push(item); + } else { + ring[head] = item; + head = (head + 1) % EDGE; + } + } + + let tail_len = ring.len(); + let truncated = length - EDGE - tail_len; + write!(out, ", ... {truncated} truncated ..., ")?; + + for i in 0..tail_len { + let (k, v) = &ring[(head + i) % EDGE]; + if i > 0 { + out.write_str(", ")?; + } + write!(out, "{k:?}:{v:?}")?; + } + + out.write_char('}')?; + Ok(out) +} + +/// Returns the type name of a [`pyo3::ffi::PyObject`]. +/// +/// Returns `""` on failure. +/// +/// # Safety +/// The pointer must be valid, non-null, live Python object. +#[inline(never)] +pub unsafe fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObject) -> String { + use pyo3::types::PyStringMethods; + use pyo3::types::PyTypeMethods; + + let type_ = pyo3::ffi::Py_TYPE(obj); + + if type_.is_null() { + String::from("") + } else { + let obj = pyo3::types::PyType::from_borrowed_type_ptr(py, type_); + + obj.fully_qualified_name() + .map(|x| x.to_string_lossy().into_owned()) + .unwrap_or_else(|_| String::from("")) + } +} + +/// It can use as PyO3 function argument. When an argument is specified, you will get [`OptionalArgument::Defined`], +/// otherwise you will get [`OptionalArgument::Undefined`]. +/// +/// It can be used instead of [`Option`] to improve performance. +#[derive(Debug)] +pub enum OptionalArgument { + /// The argument was not provided by the caller. + Undefined, + /// The argument was provided and holds the bound Python object. + Defined(alias::PyObject), +} + +impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument { + type Error = pyo3::PyErr; + + fn extract(obj: pyo3::Borrowed<'a, 'py, pyo3::PyAny>) -> Result { + Ok(Self::Defined(obj.to_owned().unbind())) + } +} + +#[derive(pyo3::FromPyObject, Debug)] +pub enum TimeToLiveArgument { + Float(f64), + Timedelta(chrono::TimeDelta), + DatetimeUtc(chrono::DateTime), + DatetimeNaive(chrono::NaiveDateTime), +} + +impl TimeToLiveArgument { + #[inline(always)] + pub fn into_expires_at(self) -> pyo3::PyResult { + match self { + Self::Float(secs) => Ok(ExpiresAt::Duration(std::time::Duration::from_secs_f64( + secs.max(0.0), + ))), + Self::Timedelta(delta) => Ok(ExpiresAt::from(delta)), + Self::DatetimeUtc(until) => Ok(ExpiresAt::from(until)), + Self::DatetimeNaive(until) => Ok(ExpiresAt::from(until)), + } + } + + #[inline(always)] + pub fn into_duration(self) -> pyo3::PyResult { + match self { + Self::Float(secs) => Ok(std::time::Duration::from_secs_f64(secs.max(0.0))), + Self::Timedelta(delta) => Ok(delta.to_std().unwrap_or(std::time::Duration::ZERO)), + Self::DatetimeUtc(_) | Self::DatetimeNaive(_) => Err(new_py_error!( + PyTypeError, + "expected datetime.timedelta or float, got datetime.datetime" + )), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExpiresAt { + Duration(std::time::Duration), + Instant(chrono::DateTime), +} + +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::Duration) -> Self { + Self::Duration(value) + } +} + +impl From for ExpiresAt { + #[inline] + fn from(value: chrono::TimeDelta) -> Self { + // Negative or zero timedelta collapses to ZERO duration (expire immediately) + Self::Duration(value.to_std().unwrap_or(std::time::Duration::ZERO)) + } +} + +impl From> for ExpiresAt { + #[inline] + fn from(value: chrono::DateTime) -> Self { + Self::Instant(value) + } +} + +impl From for ExpiresAt { + #[inline] + fn from(value: chrono::NaiveDateTime) -> Self { + let utc: chrono::DateTime = value + .and_local_timezone(chrono::Local) + .single() + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|| value.and_utc()); + Self::Instant(utc) + } +} + +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::SystemTime) -> Self { + Self::Instant(value.into()) + } +} + +impl From for std::time::SystemTime { + #[inline] + fn from(value: ExpiresAt) -> Self { + match value { + ExpiresAt::Duration(dur) => std::time::SystemTime::now() + dur, + ExpiresAt::Instant(until) => until.into(), + } + } +} + +/// Generation version implementation +/// +/// Very useful for checking changes while iteration, like what CPython does; +/// because we can't use lifetimes. +/// +/// ```rust +/// let x = GenerationVersion::default(); +/// +/// x.increment(); +/// assert!(x.get() == 1); +/// ``` +#[derive(Debug, Clone, Default)] +#[repr(transparent)] +pub struct GenerationVersion(Arc); + +impl GenerationVersion { + #[inline(always)] + pub fn increment(&self) -> u32 { + self.0.fetch_add(1, atomic::Ordering::SeqCst) + } + + #[inline(always)] + pub fn get(&self) -> u32 { + self.0.load(atomic::Ordering::Relaxed) + } +} + +/// Precomputed Hash PyObject +/// +/// A precomputed hash is a cryptographic hash value that's calculated in advance +/// and stored for later use, rather than being computed on demand when needed. +#[derive(Debug)] +pub struct PrecomputedHashObject { + object: alias::PyObject, + hash: u64, +} + +impl PrecomputedHashObject { + /// Creates a new [`PrecomputedHashObject`] with a pre-calculated hash. + #[inline] + pub fn with_precomputed_hash(object: alias::PyObject, hash: u64) -> Self { + Self { object, hash } + } + + /// Tries to get `object` hash, then creates a new [`PrecomputedHashObject`]. + #[inline] + pub fn new(py: pyo3::Python<'_>, object: alias::PyObject) -> pyo3::PyResult { + let hash = unsafe { pyobject_hash(py, object.as_ptr())? }; + Ok(Self::with_precomputed_hash(object, hash)) + } + + #[inline] + pub fn hash(&self) -> u64 { + self.hash + } + + /// Pointer-equality fast path, then Python `==`. + #[inline(always)] + pub fn py_eq(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + unsafe { pyobject_equal(py, self.object.as_ptr(), other.object.as_ptr()) } + } + + /// Makes a clone of `self`. + /// + /// This creates another pointer to the same object, increasing its reference count. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + object: self.object.clone_ref(py), + hash: self.hash, + } + } +} + +impl AsRef for PrecomputedHashObject { + /// Returns a reference to its pyobject + #[inline] + fn as_ref(&self) -> &alias::PyObject { + &self.object + } +} + +impl From for alias::PyObject { + /// Consumes `PrecomputedHashObject` and returns its pyobject + fn from(value: PrecomputedHashObject) -> Self { + value.object + } +} +/// Holds and manage `getsizeof` function which is a callable used to measure the +/// size of each key-value pair. +#[derive(pyo3::FromPyObject)] +#[repr(transparent)] +pub struct GetsizeofFunction(Option); + +impl GetsizeofFunction { + /// Creates a new [`GetsizeofFunction`]. + pub fn new(object: Option) -> Self { + Self(object) + } + + /// Makes a clone of `self`. + /// + /// This creates another pointer to the same object, increasing its reference count. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self(self.0.as_ref().map(|x| x.clone_ref(py))) + } + + /// Calls the wrapped function to get size of the pair key-value. + #[inline] + pub fn call( + &self, + py: pyo3::Python<'_>, + key: &alias::PyObject, + value: &alias::PyObject, + ) -> pyo3::PyResult { + unsafe { call_getsizeof(py, self.0.as_ref(), key.as_ptr(), value.as_ptr()) } + } +} + +impl From for Option { + fn from(value: GetsizeofFunction) -> Self { + value.0 + } +} + +/// Immutable slice iterator without lifetime +/// +/// # Safety +/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. +/// Any changes to pointers can cause *Undefined Behaviour*. +/// - It doesn't support `ZST`s. +pub(super) struct RawSliceIter { + pointer: std::ptr::NonNull, + index: usize, + len: usize, +} + +impl RawSliceIter { + /// Creates a new [`RawSliceIter`] + #[inline] + pub(super) fn new(slice: &[T]) -> Self { + let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); + + Self { + pointer, + index: 0, + len: slice.len(), + } + } +} + +impl Iterator for RawSliceIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.len { + None + } else { + let value = unsafe { self.pointer.add(self.index) }; + self.index += 1; + Some(value) + } + } +} + +unsafe impl Send for RawSliceIter {} +unsafe impl Sync for RawSliceIter {} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawVecDequeIter { + first: RawSliceIter, + second: RawSliceIter, +} + +impl RawVecDequeIter { + /// Creates a new [`RawVecDequeIter`] + #[inline] + pub fn new(first: &[T], second: &[T]) -> Self { + Self { + first: RawSliceIter::new(first), + second: RawSliceIter::new(second), + } + } +} + +impl Iterator for RawVecDequeIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + std::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} diff --git a/src/lazyheap.rs b/src/lazyheap.rs deleted file mode 100644 index 3e8304e..0000000 --- a/src/lazyheap.rs +++ /dev/null @@ -1,190 +0,0 @@ -use crate::common::NoLifetimeSliceIter; -use std::ptr::NonNull; - -/// A heap data structure that lazily maintains sorting order. -/// -/// `LazyHeap` allows for efficient insertion of elements without immediately sorting, -/// with the ability to defer sorting until necessary. This can improve performance -/// in scenarios where sorting is not immediately required. -/// -/// ``` -/// let mut heap = LazyHeap::new(); -/// heap.push(5); -/// ``` -pub struct LazyHeap { - data: std::collections::VecDeque>, - is_sorted: bool, -} - -/// An iterator for traversing elements in a `LazyHeap`. -/// -/// This iterator allows sequential access to the elements of a `LazyHeap`, -/// maintaining the current position and total length during iteration. -/// -/// # Safety -/// -/// This iterator uses raw pointers and requires careful management to ensure -/// memory safety and prevent use-after-free or dangling pointer scenarios. -pub struct Iter { - first: NoLifetimeSliceIter>, - second: NoLifetimeSliceIter>, -} - -impl LazyHeap { - pub fn new() -> Self { - Self { - data: std::collections::VecDeque::new(), - is_sorted: true, - } - } - - #[inline] - pub fn queue_sort(&mut self) { - self.is_sorted = false; - } - - #[inline] - pub fn front(&self) -> Option<&NonNull> { - debug_assert!(self.is_sorted, "heap not sorted"); - self.data.front() - } - - #[inline] - pub fn push(&mut self, value: T) -> NonNull { - unsafe { - let node: NonNull = NonNull::new_unchecked(Box::into_raw(Box::new(value))).cast(); - - self.data.push_back(node); - self.is_sorted = false; - - node - } - } - - #[inline] - pub fn sort_by(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) { - if self.is_sorted { - return; - } - - if self.data.len() > 1 { - unsafe { - self.data - .make_contiguous() - .sort_by(|a, b| compare(a.as_ref(), b.as_ref())); - } - } - - self.is_sorted = true; - } - - #[inline] - fn unlink_front(&mut self) -> Option { - let node = self.data.pop_front()?; - let node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(*node) - } - - #[inline] - pub fn pop_front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { - self.sort_by(compare); - self.unlink_front() - } - - #[inline] - fn unlink_back(&mut self) -> Option { - let node = self.data.pop_back()?; - let node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(*node) - } - - #[inline] - pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { - self.sort_by(compare); - self.unlink_back() - } - - #[inline] - pub fn get(&self, index: usize) -> Option<&NonNull> { - self.data.get(index) - } - - #[inline] - pub fn remove(&mut self, node: NonNull, compare: F) -> T - where - F: Fn(&T, &T) -> std::cmp::Ordering, - { - debug_assert!(!self.data.is_empty()); - - if self.data.len() == 1 { - return self.pop_back(compare).unwrap(); - } - - self.sort_by(compare); - - let index = self.data.iter().position(|x| node == *x).unwrap(); - - let node = unsafe { self.data.remove(index).unwrap_unchecked() }; - let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; - *boxed_node - } - - #[inline] - pub fn clear(&mut self) { - while self.unlink_back().is_some() {} - self.is_sorted = true; - } - - #[inline] - pub fn shrink_to_fit(&mut self) { - self.data.shrink_to_fit(); - } - - #[inline] - pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Iter { - self.sort_by(compare); - - let (a, b) = self.data.as_slices(); - - Iter { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } -} - -impl Drop for LazyHeap { - fn drop(&mut self) { - struct DropGuard<'a, T>(&'a mut LazyHeap); - - impl Drop for DropGuard<'_, T> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.unlink_back().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.unlink_back().is_some() {} - core::mem::forget(guard); - } -} - -impl Iterator for Iter { - type Item = NonNull; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(unsafe { *val.as_ptr() }), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next().map(|x| unsafe { *x.as_ptr() }) - } - } - } -} - -unsafe impl Send for Iter {} diff --git a/src/lib.rs b/src/lib.rs index bff475d..a9d1f94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,57 +1,94 @@ -mod lazyheap; -mod linked_list; +#![feature(allocator_api)] +#![feature(dropck_eyepatch)] +#![feature(likely_unlikely)] #[macro_use] -mod common; +mod macro_rules; +mod hashbrown; +mod typeref; -mod bridge; -mod policies; +pub mod internal; +pub mod policies; +pub mod pyclasses; -#[cfg(feature = "mimalloc")] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -/// cachebox core ( written in Rust ) -#[pyo3::pymodule(gil_used = false)] +#[pyo3::pymodule] mod _core { + #[allow(unused_imports)] use pyo3::types::PyModuleMethods; - #[pymodule_export] - use super::bridge::TTLPair; + use crate::typeref; #[pymodule_export] - use super::bridge::BaseCacheImpl; + use crate::pyclasses::base::PyBaseCacheImpl; #[pymodule_export] - use super::bridge::cache::Cache; + use crate::pyclasses::cache::PyCache; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheItems; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheKeys; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheValues; #[pymodule_export] - use super::bridge::fifocache::FIFOCache; + use crate::pyclasses::fifocache::PyFIFOCache; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheItems; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheKeys; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheValues; #[pymodule_export] - use super::bridge::rrcache::RRCache; + use crate::pyclasses::rrcache::PyRRCache; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheItems; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheKeys; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheValues; #[pymodule_export] - use super::bridge::lrucache::LRUCache; + use crate::pyclasses::lrucache::PyLRUCache; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheItems; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheKeys; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheValues; #[pymodule_export] - use super::bridge::lfucache::LFUCache; + use crate::pyclasses::lfucache::PyLFUCache; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheItems; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheKeys; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheValues; #[pymodule_export] - use super::bridge::ttlcache::TTLCache; + use crate::pyclasses::ttlcache::PyTTLCache; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheItems; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheKeys; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheValues; #[pymodule_export] - use super::bridge::vttlcache::VTTLCache; + use crate::pyclasses::vttlcache::PyVTTLCache; #[pymodule_init] - fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { - m.add("__author__", env!("CARGO_PKG_AUTHORS"))?; + pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { + typeref::initialize_typeref(m.py()); + m.add("__version__", env!("CARGO_PKG_VERSION"))?; - m.add( - "CoreKeyError", - m.py().get_type::(), - )?; + #[cfg(feature = "use-small-offset")] + m.add("_use_small_offset_feature", true)?; + + #[cfg(not(feature = "use-small-offset"))] + m.add("_use_small_offset_feature", false)?; Ok(()) } diff --git a/src/linked_list.rs b/src/linked_list.rs deleted file mode 100644 index 6c78280..0000000 --- a/src/linked_list.rs +++ /dev/null @@ -1,206 +0,0 @@ -use crate::common::PreHashObject; -use std::ptr::NonNull; - -/// A doubly-linked list implementation with optional head and tail nodes. -/// -/// This list maintains references to the first and last nodes, and tracks the total number of elements. -/// Uses `NonNull` pointers for efficient memory management and allows for constant-time -/// insertion and deletion at both ends of the list. -pub struct LinkedList { - pub head: Option>, // front - pub tail: Option>, // back - len: usize, -} - -/// A node in a doubly-linked list, containing a reference to the previous and next nodes, -/// and storing a key-value pair as its element. -/// -/// The node uses `NonNull` pointers for efficient memory management and allows for -/// constant-time insertion and deletion in the linked list. -pub struct Node { - pub prev: Option>, - pub next: Option>, - pub element: (PreHashObject, pyo3::Py, usize), -} - -impl LinkedList { - pub fn new() -> Self { - Self { - head: None, - tail: None, - len: 0, - } - } - - #[inline] - pub fn push_back( - &mut self, - key: PreHashObject, - val: pyo3::Py, - size: usize, - ) -> NonNull { - unsafe { - let node = NonNull::new_unchecked(Box::into_raw(Box::new(Node { - prev: None, - next: None, - element: (key, val, size), - }))); - - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - self.len += 1; - node - } - } - - #[inline] - pub fn pop_front(&mut self) -> Option<(PreHashObject, pyo3::Py, usize)> { - unsafe { - self.head.map(|node| { - let boxed_node = Box::from_raw(node.as_ptr()); - debug_assert!(boxed_node.prev.is_none(), "head.prev is not None"); - - self.head = boxed_node.next; - - match self.head { - None => self.tail = None, - // Not creating new mutable (unique!) references overlapping `element`. - Some(head) => (*head.as_ptr()).prev = None, - } - - debug_assert!(self.len > 0, "self.len is zero"); - self.len -= 1; - boxed_node.element - }) - } - } - - #[inline] - pub fn clear(&mut self) { - while self.pop_front().is_some() {} - } - - #[inline] - pub unsafe fn remove( - &mut self, - node: NonNull, - ) -> (PreHashObject, pyo3::Py, usize) { - let node = Box::from_raw(node.as_ptr()); - let result = node.element; - - match node.next { - Some(next) => (*next.as_ptr()).prev = node.prev, - None => { - // Means this node is our self.tail - self.tail = node.prev; - } - } - - match node.prev { - Some(prev) => (*prev.as_ptr()).next = node.next, - None => { - // Means this node is our self.head - self.head = node.next; - } - } - - self.len -= 1; - result - } - - #[inline] - pub unsafe fn move_back(&mut self, node: NonNull) { - if (*node.as_ptr()).next.is_none() { - // Means this node is our self.tail - return; - } - - // unlink - match (*node.as_ptr()).next { - Some(next) => (*next.as_ptr()).prev = (*node.as_ptr()).prev, - None => std::hint::unreachable_unchecked(), - } - - match (*node.as_ptr()).prev { - Some(prev) => (*prev.as_ptr()).next = (*node.as_ptr()).next, - None => { - // Means this node is our self.head - self.head = (*node.as_ptr()).next; - } - } - - (*node.as_ptr()).next = None; - (*node.as_ptr()).prev = None; - - // push_back again - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - } - - #[inline] - pub fn iter(&self) -> Iter { - Iter { - head: self.head, - len: self.len, - } - } -} - -pub struct Iter { - head: Option>, - len: usize, -} - -impl Iterator for Iter { - type Item = NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.len == 0 { - None - } else { - self.head.inspect(|node| unsafe { - self.len -= 1; - self.head = (*node.as_ptr()).next; - }) - } - } -} - -impl Drop for LinkedList { - fn drop(&mut self) { - struct DropGuard<'a>(&'a mut LinkedList); - - impl Drop for DropGuard<'_> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.pop_front().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.pop_front().is_some() {} - core::mem::forget(guard); - } -} - -unsafe impl Sync for Iter {} -unsafe impl Send for Iter {} diff --git a/src/macro_rules.rs b/src/macro_rules.rs new file mode 100644 index 0000000..1ccdf5b --- /dev/null +++ b/src/macro_rules.rs @@ -0,0 +1,54 @@ +/// Implements a `#[pyclass]` with pre-defined pyclass arguments. +/// +/// # Example +/// +/// ```ignore +/// implement_pyclass! { +/// [] MyClass as "MyClass" { field: type } +/// } +/// ``` +#[macro_export] +macro_rules! implement_pyclass { + ( + $(#[$outer:meta])* + [$($pyclass_args:tt)*] $struct_name:ident as $python_name:literal $($rest:tt)* + ) => { + #[pyo3::pyclass( + module = "cachebox._core", + name = $python_name, + immutable_type, + skip_from_py_object, + $($pyclass_args)* + )] + $(#[$outer])* + pub struct $struct_name $($rest)* + }; +} + +/// Creates a new [`PyErr`] of the given exception type. +#[macro_export] +macro_rules! new_py_error { + ($name:ident, $msg:expr $(,)?) => { + ::pyo3::exceptions::$name::new_err($msg) + }; + ($name:ident, $fmt:expr, $($args:tt)*) => { + ::pyo3::exceptions::$name::new_err( + format_args!($fmt, $($args)*) + ) + }; +} + +/// Creates a new std::num::NonZeroUsize safely. Uses `isize::MAX as usize` when `num` is zero. +/// +/// # Usage +/// +/// ```ignore +/// safe_non_zero!(2) -> std::num::NonZeroUsize(2) +/// safe_non_zero!(0) -> std::num::NonZeroUsize(isize::MAX as usize) +/// ``` +#[macro_export] +macro_rules! safe_non_zero { + ($num:expr) => { + std::num::NonZeroUsize::new(if $num == 0 { isize::MAX as usize } else { $num }).unwrap() + }; +} diff --git a/src/policies/common.rs b/src/policies/common.rs new file mode 100644 index 0000000..9f657dc --- /dev/null +++ b/src/policies/common.rs @@ -0,0 +1,165 @@ +//! Common implementations accross multiple policies + +use crate::internal::alias; +use crate::internal::utils; +use crate::policies::traits; + +/// A key-value pair with a precomputed hash and combined size. +pub struct Handle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, +} + +impl Handle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { key, value, size }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + } + } +} + +impl traits::HandleExt for Handle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// Shared variables which should separated from Mutex +pub struct Shared { + // Hard upper bound on `currsize`. + maxsize: std::num::NonZeroUsize, + /// Monotonically incrementing counter bumped on every structural mutation + gv: utils::GenerationVersion, + /// Callable used to measure size of each key-value pair. + getsizeof: utils::GetsizeofFunction, + /// Global time-to-live for cache entries. This is for *TTL* implementations. + global_ttl: Option, +} + +impl Shared { + /// Creates a new [`Shared`]. + #[inline] + pub fn new(maxsize: usize, getsizeof: Option) -> Self { + Self::with_ttl(maxsize, getsizeof, None) + } + + /// Creates a new [`Shared`] with configured TTL. + #[inline] + pub fn with_ttl( + maxsize: usize, + getsizeof: Option, + ttl: Option, + ) -> Self { + Self { + maxsize: safe_non_zero!(maxsize), + gv: utils::GenerationVersion::default(), + getsizeof: utils::GetsizeofFunction::new(getsizeof), + global_ttl: ttl, + } + } +} + +impl traits::SharedExt for Shared { + #[inline] + fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + fn generation_version(&self) -> &utils::GenerationVersion { + &self.gv + } + + #[inline] + fn getsizeof(&self) -> &utils::GetsizeofFunction { + &self.getsizeof + } + + #[inline] + fn global_ttl(&self) -> Option { + self.global_ttl + } + + fn clone_ref(&self, py: pyo3::Python) -> Self { + Self { + maxsize: self.maxsize, + gv: Default::default(), + getsizeof: self.getsizeof.clone_ref(py), + global_ttl: self.global_ttl, + } + } +} diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs deleted file mode 100644 index 81ac15e..0000000 --- a/src/policies/fifo.rs +++ /dev/null @@ -1,497 +0,0 @@ -use crate::common::Entry; -use crate::common::NoLifetimeSliceIter; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -use std::collections::VecDeque; - -pub const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); - -pub struct FIFOPolicy { - /// We set [Vec] objects indexes in hashtable to make search O(1). hashtable is unordered, - /// that is why we are using [Vec]. - table: hashbrown::raw::RawTable, - - /// Keep objects in order. - entries: VecDeque<(PreHashObject, pyo3::Py, usize)>, - maxsize: core::num::NonZeroUsize, - maxmemory: core::num::NonZeroUsize, - memory: usize, - - /// When we pop front an object from entries, two operations have to do: - /// 1. Shift all elements in vector. - /// 2. Decrement all indexes in hashtable. - /// - /// these are expensive operations in large elements; - /// - We removed first operation by using [`std::collections::VecDeque`] instead of [`Vec`] - /// - We removed second operation by using this variable: Instead of decrement indexes in hashtable, - /// we will increment this variable. - n_shifts: usize, - - pub observed: Observed, -} - -pub struct FIFOPolicyOccupied<'a> { - instance: &'a mut FIFOPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct FIFOPolicyAbsent<'a> { - instance: &'a mut FIFOPolicy, - insert_slot: Option, -} - -pub struct FIFOIterator { - first: NoLifetimeSliceIter<(PreHashObject, pyo3::Py, usize)>, - second: NoLifetimeSliceIter<(PreHashObject, pyo3::Py, usize)>, -} - -impl FIFOPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - maxmemory, - memory: 0, - n_shifts: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> (usize, usize) { - (self.table.capacity(), self.entries.capacity()) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - let old = self - .table - .get_mut(entry.0.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - *old -= 1; - } - } - } - - #[inline] - pub fn popitem( - &mut self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult, usize)>> { - let ret = self.entries.front(); - if ret.is_none() { - return Ok(None); - } - - let ret = unsafe { ret.unwrap_unchecked() }; - - match self.table.try_find(ret.0.hash, |x| { - self.entries[(*x) - self.n_shifts].0.equal(py, &ret.0) - })? { - Some(bucket) => { - unsafe { self.table.remove(bucket) }; - } - None => unreachable!("popitem key not found in table"), - } - - let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; - self.memory = self.memory.saturating_sub(ret.2); - - self.observed.change(); - - self.decrement_indexes(1, self.entries.len()); - Ok(Some(ret)) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, FIFOPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].0.equal(py, key))? - { - Some(bucket) => { - Ok( - Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) - ) - } - None => { - Ok( - Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, FIFOPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |x| self.entries[(*x) - self.n_shifts].0.equal(py, key), - |x| self.entries[(*x) - self.n_shifts].0.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |x| { - self.entries[(*x) - self.n_shifts].0.equal(py, key) - })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => Ok(Some(&self.entries[(*index) - self.n_shifts].1)), - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.entries.clear(); - self.n_shifts = 0; - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |x| { - self.entries[(*x) - self.n_shifts].0.hash - }); - self.entries.shrink_to_fit(); - self.observed.change(); - } - - pub fn entries_iter( - &self, - ) -> std::collections::vec_deque::Iter<'_, (PreHashObject, pyo3::Py, usize)> { - self.entries.iter() - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for index1 in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _) = &self.entries[(*index1) - self.n_shifts]; - - match other.table.try_find(key1.hash, |x| { - key1.equal(py, &other.entries[(*x) - other.n_shifts].0) - })? { - Some(bucket) => { - let (_, value2, _) = &other.entries[(*bucket.as_ref()) - other.n_shifts]; - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&self) -> FIFOIterator { - let (a, b) = self.entries.as_slices(); - - FIFOIterator { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - *self = new; - Ok(()) - } - } - - #[inline(always)] - pub fn get_index(&self, n: usize) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.entries.get(n) - } -} - -impl<'a> FIFOPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let index = unsafe { self.bucket.as_ref() }; - let old_value; - { - let item = &mut self.instance.entries[index - self.instance.n_shifts]; - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = item.2; - old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let m = self.instance.entries.remove(index).unwrap(); - self.instance.memory = self.instance.memory.saturating_sub(m.2); - - self.instance.observed.change(); - m - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - let index = unsafe { self.bucket.as_ref() }; - &mut self.instance.entries[index - self.instance.n_shifts] - } -} - -impl FIFOPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - }, - None => { - self.instance.table.insert( - key.hash, - self.instance.entries.len() + self.instance.n_shifts, - |index| { - self.instance.entries[(*index) - self.instance.n_shifts] - .0 - .hash - }, - ); - } - } - - self.instance.entries.push_back((key, value, entry_size)); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - - self.instance.observed.change(); - Ok(()) - } -} - -impl Iterator for FIFOIterator { - type Item = std::ptr::NonNull<(PreHashObject, pyo3::Py, usize)>; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for FIFOIterator {} diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs new file mode 100644 index 0000000..31e1632 --- /dev/null +++ b/src/policies/fifopolicy.rs @@ -0,0 +1,459 @@ +use std::collections::VecDeque; + +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// Shorthand for `self.entries[index - self.front_offset]` +macro_rules! get_handle { + (&$slf:expr, $index:expr) => { + &$slf.entries[$index - $slf.front_offset] + }; + (&mut $slf:expr, $index:expr) => { + &mut $slf.entries[$index - $slf.front_offset] + }; +} + +/// A view into an occupied entry in [`FIFOPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut FIFOPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = Handle; + type Shared = Shared; + + #[inline] + fn replace(self, new: Self::Handle) -> Self::Handle { + // In update we don't need to increment this; because this does not change the memory address ranges + // self.shared.generation_version().increment(); + + let item = unsafe { get_handle!(&mut self.policy, *self.bucket.as_ref()) }; + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(item.size()) + .saturating_add(new.size()); + + std::mem::replace(item, new) + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; + index -= self.policy.front_offset; + + self.policy + .decrement_indexes(index + 1, self.policy.entries.len()); + + let handle = self.policy.entries.remove(index).unwrap(); + self.policy.currsize = self.policy.currsize.saturating_sub(handle.size()); + handle + } +} + +/// A view into a vacant slot in [`FIFOPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut FIFOPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = Handle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + self.policy.table.insert( + handle.key().hash(), + self.policy.entries.len() + self.policy.front_offset, + |index| get_handle!(&self.policy, *index).key().hash(), + ); + self.policy.entries.push_back(handle); + } +} + +pub struct FIFOPolicy { + /// Maps each key to its logical index into [`FIFOPolicy::entries`], enabling O(1) lookups. + /// + /// Stored indices are *logical* (i.e. they do not reset when entries are popped from the + /// front), so they must be adjusted on read: `entries[table[k] - front_offset]`. + /// As a result, table values grow monotonically over the lifetime of the cache, + /// but their *count* stays bounded by the cache capacity — this is not a memory concern. + table: hashbrown::raw::RawTable, + + /// Insertion-ordered sequence of cached handles, providing O(1) front removal. + entries: VecDeque, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, + + /// Number of handles ever popped from the front of [`FIFOPolicy::entries`]. + /// + /// Because [`VecDeque`] indices shift on front-removal, naively keeping + /// [`FIFOPolicy::table`] consistent would require decrementing every stored + /// index — an O(n) operation. Instead, this counter is incremented on each + /// pop and subtracted at read time: `entries[table[k] - front_offset]`, + /// keeping both the pop and the lookup O(1). + /// + /// To prevent `usize` overflow in the subtraction, once `front_offset` + /// reaches `usize::MAX - isize::MAX`, all indices in `table` are decremented + /// by the current `front_offset` and the counter is reset to zero. This + /// rewrite is O(n) but occurs so rarely, at most once per + /// `usize::MAX - isize::MAX` evictions, that it is effectively free in practice. + front_offset: usize, +} + +impl FIFOPolicy { + /// Creates a new [`FIFOPolicy`]. + /// + /// The underlying [`VecDeque`] is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + entries: VecDeque::with_capacity(capacity), + currsize: 0, + front_offset: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } + + #[inline] + pub fn entries(&self) -> &VecDeque { + &self.entries + } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + #[cfg(not(feature = "use-small-offset"))] + const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; + + #[cfg(feature = "use-small-offset")] + const MAX_FRONT_OFFSET: usize = u8::MAX as usize; + + // Fast path: shifting the entire front is a single counter increment. + // Guard against overflow; the full-normalization path below handles that case. + if start <= 1 && end == self.entries.len() && self.front_offset < MAX_FRONT_OFFSET { + self.front_offset += 1; + return; + } + + if (end - start) > self.table.capacity() / 2 { + // Table-scan + // normalize every index (subtract fo) and decrement those in range [start, end). + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + + let vd_idx = *i - self.front_offset; + + *i = if start <= vd_idx && vd_idx < end { + vd_idx - 1 // normalize + decrement + } else { + vd_idx // normalize + }; + } + } + } else { + // Entries-scan + // decrement the logical indices for entries in range [start, end). + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let result = unsafe { + self.table + .get_mut(entry.key().hash(), |x| { + Ok::<_, pyo3::PyErr>((*x) - self.front_offset == i) + }) + .unwrap_unchecked() + .expect("index not found") + }; + *result -= 1; + } + + // normalize every stored index by subtracting `fo`. + // - Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // - All others: (vd_idx + fo) - fo = vd_idx + if self.front_offset != 0 { + unsafe { + for bucket in self.table.iter() { + *bucket.as_mut() -= self.front_offset; + } + } + } + } + + // Both branches now store raw VecDeque indices, so the offset is zero. + self.front_offset = 0; + } + + #[inline] + pub fn iter(&self) -> utils::RawVecDequeIter { + let (first, second) = self.entries.as_slices(); + utils::RawVecDequeIter::new(first, second) + } +} + +impl PolicyExt for FIFOPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + match self.table.get(key.hash(), eq)? { + Some(index) => Ok(Some(get_handle!(&self, *index))), + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + let front = self.entries.front(); + if front.is_none() { + return Err(new_py_error!(PyKeyError, ())); + } + + let front = unsafe { front.unwrap_unchecked() }; + + let eq = |index: &usize| Ok::<_, pyo3::PyErr>(*index - self.front_offset == 0); + if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { + unreachable!("popitem key not found in table"); + } + + shared.generation_version().increment(); + + self.decrement_indexes(1, self.entries.len()); + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + + Ok(front) + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |index| get_handle!(&self, *index).key().hash()); + + self.entries.shrink_to_fit(); + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.entries.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear(); + self.entries.clear(); + self.currsize = 0; + self.front_offset = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().all(|x| { + let handle = get_handle!(&self, *x.as_ref()); + let key = handle.key(); + + match other + .table + .get(key.hash(), |i| key.py_eq(py, get_handle!(&other, *i).key())) + { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(i)) => { + let v1 = handle.value(); + let v2 = get_handle!(&other, *i).value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut entries = VecDeque::with_capacity(self.entries.len()); + for handle in self.entries.iter() { + entries.push_back(handle.clone_ref(py)); + } + + Self { + table: self.table.clone(), + entries, + currsize: self.currsize, + front_offset: self.front_offset, + } + } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for handle in self.entries.iter() { + let mut tuple = list.begin_tuple(2)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value) = bound.extract::<(alias::PyObject, alias::PyObject)>()?; + + let handle = Handle::new(bound.py(), shared.getsizeof(), key, value)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + unsafe { + slf.table.insert_no_grow( + handle.key().hash(), + // Adding `slf.front_offset` is unnecessary here + slf.entries.len(), + ); + } + slf.entries.push_back(handle); + } + + Ok((shared, slf)) + } +} diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs deleted file mode 100644 index 9b57699..0000000 --- a/src/policies/lfu.rs +++ /dev/null @@ -1,428 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; -use crate::lazyheap; -use std::ptr::NonNull; - -type TupleValue = (PreHashObject, pyo3::Py, usize, usize); - -pub struct LFUPolicy { - table: hashbrown::raw::RawTable>, - heap: lazyheap::LazyHeap, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct LFUPolicyOccupied<'a> { - instance: &'a mut LFUPolicy, - bucket: hashbrown::raw::Bucket>, -} - -pub struct LFUPolicyAbsent<'a> { - instance: &'a mut LFUPolicy, - insert_slot: Option, -} - -pub type LFUIterator = lazyheap::Iter<(PreHashObject, pyo3::Py, usize, usize)>; - -impl LFUPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: lazyheap::LazyHeap::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn popitem(&mut self) -> Option { - self.heap.sort_by(|a, b| a.2.cmp(&b.2)); - let front = self.heap.front()?; - - unsafe { - self.table - .remove_entry(front.as_ref().0.hash, |x| { - std::ptr::eq(x.as_ptr(), front.as_ptr()) - }) - .unwrap(); - } - - self.observed.change(); - let item = self.heap.pop_front(|a, b| a.2.cmp(&b.2)).unwrap(); - self.memory = self.memory.saturating_sub(item.3); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LFUPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().0.equal(py, key) })? - { - Some(bucket) => { - Ok( - Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) - ) - }, - None => { - Ok( - Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: None }) - ) - } - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LFUPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |ptr| unsafe { ptr.as_ref().0.equal(py, key) }, - |ptr| unsafe { ptr.as_ref().0.hash }, - )? { - Ok(bucket) => { - Ok( - Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) - ) - }, - Err(slot) => { - Ok( - Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: Some(slot) }) - ) - } - } - } - - #[inline] - pub fn lookup( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self.entry(py, key)? { - Entry::Occupied(x) => unsafe { - x.bucket.as_mut().as_mut().2 += 1; - x.instance.heap.queue_sort(); - - Ok(Some(&x.bucket.as_ref().as_ref().1)) - }, - Entry::Absent(_) => Ok(None), - } - } - - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - let result = self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().0.equal(py, key) })? - .map(|x| unsafe { &x.as_ref().as_ref().1 }); - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.heap.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().0.hash }); - - self.heap.shrink_to_fit(); - self.observed.change(); - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _, _) = node.as_ref(); - - match other - .table - .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().0))? - { - Some(bucket) => { - let (_, value2, _, _) = bucket.as_ref().as_ref(); - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind(), 0)?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value, 0)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&mut self) -> LFUIterator { - self.heap.iter(|a, b| a.2.cmp(&b.2)) - } - - pub fn least_frequently_used(&mut self, n: usize) -> Option> { - self.heap.sort_by(|a, b| a.2.cmp(&b.2)); - let node = self.heap.get(n)?; - - Some(*node) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "iterable object size is greater than maxsize", - )); - } - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, freq) = - pair?.extract::<(pyo3::Py, pyo3::Py, usize)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value, freq)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.heap.sort_by(|a, b| a.2.cmp(&b.2)); - - *self = new; - Ok(()) - } - } -} - -impl LFUPolicyOccupied<'_> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let item = unsafe { self.bucket.as_mut() }; - let (old_value, old_size, new_size) = { - let element = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &element.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = element.3; - let old_value = std::mem::replace(&mut element.1, value); - element.3 = new_size; - element.2 += 1; - (old_value, old_size, new_size) - }; - - self.instance.heap.queue_sort(); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TupleValue { - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = self.instance.heap.remove(item, |a, b| a.2.cmp(&b.2)); - - self.instance.memory = self.instance.memory.saturating_sub(item.3); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> NonNull { - let item = unsafe { self.bucket.as_mut() }; - *item - } -} - -impl LFUPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self.instance.heap.push((key, value, freq, entry_size)); - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - }, - None => { - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().0.hash }); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} - -unsafe impl Send for LFUPolicy {} diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs new file mode 100644 index 0000000..82010e8 --- /dev/null +++ b/src/policies/lfupolicy.rs @@ -0,0 +1,541 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use crate::policies::common::Shared; + +macro_rules! compare_fn { + () => { + |x, y| x.frequency.cmp(&y.frequency) + }; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Frequency(u128); + +impl Frequency { + #[inline(always)] + fn increment(&mut self) { + self.0 = self.0.saturating_add(1) + } +} + +/// Same as [`Handle`](struct@super::common::Handle), but with a frequency counter. +pub struct FrequencyHandle { + key: utils::PrecomputedHashObject, + value: alias::PyObject, + size: usize, + frequency: Frequency, +} + +impl FrequencyHandle { + /// Creates a new [`FrequencyHandle`] with an initial frequency (always is zero, except + /// in loading pickle states). + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: alias::PyObject, + value: alias::PyObject, + frequency: u128, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + utils::PrecomputedHashObject::new(py, key)?, + value, + frequency, + ) + } + + /// Creates a new [`FrequencyHandle`] from an already-hashed key, + /// with an initial frequency (always is zero, except in loading pickle states). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + frequency: u128, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + frequency: Frequency(frequency), + }) + } + + /// Returns the frequency. + #[inline] + pub fn frequency(&self) -> u128 { + self.frequency.0 + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + frequency: self.frequency, + } + } +} + +impl HandleExt for FrequencyHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`LFUPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut LFUPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = FrequencyHandle; + type Shared = Shared; + + fn replace(self, new: Self::Handle) -> Self::Handle { + // Here we don't need to increment generation version + // self.shared.generation_version().increment(); + + unsafe { + let cursor = self.bucket.as_mut(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + + cursor.element_mut().frequency.increment(); + self.policy.heap.mark_unsorted(); + + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = self.policy.heap.remove(cursor, compare_fn!()); + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} + +/// A view into a vacant slot in [`LFUPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut LFUPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = FrequencyHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.heap.push(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct LFUPolicy { + /// Maps each key to its node pointer into [`LFUPolicy::entries`], enabling O(1) lookups. + table: hashbrown::raw::RawTable>, + + /// A lazy binary heap. + heap: lazyheap::LazyHeap, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl LFUPolicy { + /// Creates a new [`LFUPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + heap: lazyheap::LazyHeap::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn heap(&self) -> &lazyheap::LazyHeap { + &self.heap + } + + #[inline] + pub fn iter(&mut self, gv: &utils::GenerationVersion) -> lazyheap::RawIter { + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap.iter(compare_fn!()) + } + + #[inline] + pub fn least_frequently_used( + &mut self, + py: pyo3::Python, + n: usize, + gv: &utils::GenerationVersion, + ) -> Option { + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap + .get(n) + .map(|cursor| unsafe { cursor.element().key().clone_ref(py) }) + } + + #[inline] + pub fn peek( + &self, + py: pyo3::Python, + key: &utils::PrecomputedHashObject, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .find(key.hash(), |cursor| key.py_eq(py, &cursor.element().key))?; + + Ok(bucket.map(|x| x.as_ref().element())) + } + } +} + +impl PolicyExt for LFUPolicy { + type Shared = Shared; + type Handle = FrequencyHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let cursor = self + .table + .get_mut(key.hash(), |x| unsafe { key.py_eq(py, &x.element().key) })?; + + match cursor { + Some(cursor) => unsafe { + // increment frequency + cursor.element_mut().frequency.increment(); + + Ok(Some(cursor.element())) + }, + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = |cursor: &lazyheap::Cursor| unsafe { + key.py_eq(py, cursor.element().key()) + }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = self + .heap + .front(compare_fn!()) + .ok_or_else(|| new_py_error!(PyKeyError, "cache is empty"))?; + + self.table + .remove_entry(unsafe { front_cursor.element().key.hash() }, |x| { + Ok::<_, pyo3::PyErr>(std::ptr::eq(front_cursor.as_ptr(), x.as_ptr())) + })? + .expect("evict: item not found in table"); + } + + shared.generation_version().increment(); + + let handle = self.heap.pop_front(compare_fn!()).unwrap(); + + self.currsize = self.currsize.saturating_sub(handle.size); + Ok(handle) + } + + fn clear(&mut self, shared: &Self::Shared) { + if self.heap.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.heap.clear(); + self.currsize = 0; + } + + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |x| unsafe { x.element().key.hash() }); + + self.heap.shrink_to_fit(); + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().all(|x| { + let handle = x.as_ref().element(); + + let key = handle.key(); + + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(cursor)) => { + match utils::pyobject_equal( + py, + handle.value.as_ptr(), + cursor.element().value.as_ptr(), + ) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.len()); + let mut heap = lazyheap::LazyHeap::new(); + + unsafe { + for cursor in self.heap.iter(compare_fn!()) { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = heap.push(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + heap, + currsize: self.currsize, + } + } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.table.iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key.as_ref())?; + tuple.push(handle.value())?; + tuple.push(handle.frequency.0)?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, frequency) = + bound.extract::<(alias::PyObject, alias::PyObject, u128)>()?; + + let handle = + FrequencyHandle::new(bound.py(), shared.getsizeof(), key, value, frequency)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.heap.push(handle); + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + slf.heap.sort_by(compare_fn!()); + Ok((shared, slf)) + } +} diff --git a/src/policies/lru.rs b/src/policies/lru.rs deleted file mode 100644 index 8a9dab5..0000000 --- a/src/policies/lru.rs +++ /dev/null @@ -1,420 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; -use crate::linked_list; - -type NotNullNode = std::ptr::NonNull; - -pub struct LRUPolicy { - table: hashbrown::raw::RawTable, - list: linked_list::LinkedList, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct LRUPolicyOccupied<'a> { - instance: &'a mut LRUPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct LRUPolicyAbsent<'a> { - instance: &'a mut LRUPolicy, - insert_slot: Option, -} - -impl LRUPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - list: linked_list::LinkedList::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn popitem(&mut self) -> Option<(PreHashObject, pyo3::Py, usize)> { - let ret = self.list.head?; - - unsafe { - self.table - .remove_entry((*ret.as_ptr()).element.0.hash, |node| { - core::ptr::eq(node.as_ptr(), ret.as_ptr()) - }) - .expect("popitem key not found."); - } - - self.observed.change(); - let item = self.list.pop_front().unwrap(); - self.memory = self.memory.saturating_sub(item.2); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LRUPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? - { - Some(bucket) => { - Ok( - Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) - ) - } - None => { - Ok( - Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LRUPolicyAbsent<'_>>> { - match self - .table - .try_find_or_find_insert_slot( - key.hash, - |x| unsafe { x.as_ref().element.0.equal(py, key) }, - |x| unsafe { x.as_ref().element.0.hash } - )? { - Ok(bucket) => { - Ok( - Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) - ) - } - Err(slot) => { - Ok( - Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: Some(slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self.entry(py, key)? { - Entry::Occupied(x) => unsafe { - x.instance.list.move_back(*x.bucket.as_ptr()); - - Ok(Some(&x.bucket.as_ref().as_ref().element.1)) - }, - Entry::Absent(_) => Ok(None), - } - } - - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - let result = self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? - .map(|x| unsafe { &x.as_ref().as_ref().element.1 }); - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.list.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().element.0.hash }); - - self.observed.change(); - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _) = &node.as_ref().element; - - match other - .table - .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().element.0))? - { - Some(bucket) => { - let (_, value2, _) = &bucket.as_ref().as_ref().element; - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&self) -> linked_list::Iter { - self.list.iter() - } - - pub fn least_recently_used(&self) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.list.head.map(|x| unsafe { &x.as_ref().element }) - } - - pub fn most_recently_used(&self) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.list.tail.map(|x| unsafe { &x.as_ref().element }) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - *self = new; - Ok(()) - } - } -} - -impl<'a> LRUPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let item = unsafe { self.bucket.as_mut() }; - let (old_value, old_size, new_size) = { - let element = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &element.element.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = element.element.2; - let old_value = std::mem::replace(&mut element.element.1, value); - element.element.2 = new_size; - (old_value, old_size, new_size) - }; - - unsafe { - self.instance.list.move_back(*item); - } - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = unsafe { self.instance.list.remove(item) }; - - self.instance.memory = self.instance.memory.saturating_sub(item.2); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { - self.instance.list.move_back(*self.bucket.as_ptr()); - } - - let item = unsafe { self.bucket.as_mut() }; - unsafe { &mut item.as_mut().element } - } -} - -impl LRUPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self.instance.list.push_back(key, value, entry_size); - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - }, - None => { - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().element.0.hash }); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} - -unsafe impl Send for LRUPolicy {} diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs new file mode 100644 index 0000000..244cab9 --- /dev/null +++ b/src/policies/lrupolicy.rs @@ -0,0 +1,379 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::linked_list; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`LRUPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut LRUPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = Handle; + type Shared = Shared; + + fn replace(self, new: Self::Handle) -> Self::Handle { + self.shared.generation_version().increment(); + + unsafe { + let mut cursor = *self.bucket.as_ref(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + cursor.move_to_back(&mut self.policy.list); + + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = unsafe { cursor.unlink(&mut self.policy.list) }; + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} + +/// A view into a vacant slot in [`LRUPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut LRUPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = Handle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.list.push_back(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct LRUPolicy { + /// Maps each key to its node pointer into [`LRUPolicy::list`], enabling O(1) lookups. + table: hashbrown::raw::RawTable>, + + /// A doubly-linked list, which holds cached handles, providing O(1) pops (front/back) and pushes (front/back). + list: linked_list::LinkedList, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl LRUPolicy { + /// Creates a new [`LRUPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + list: linked_list::LinkedList::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn list(&self) -> &linked_list::LinkedList { + &self.list + } + + #[inline] + pub fn peek( + &self, + py: pyo3::Python, + key: &utils::PrecomputedHashObject, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .find(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; + + Ok(bucket.map(|x| x.as_ref().element())) + } + } +} + +impl PolicyExt for LRUPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .get(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; + + match bucket { + Some(cursor) => { + cursor.move_to_back(&mut self.list); + Ok(Some(cursor.element())) + } + None => Ok(None), + } + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = + |cursor: &linked_list::Cursor| unsafe { key.py_eq(py, cursor.element().key()) }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = match self.list.cursor_front() { + Some(x) => x, + None => return Err(new_py_error!(PyKeyError, "cache is empty")), + }; + + let hash = unsafe { front_cursor.element().key().hash() }; + + shared.generation_version().increment(); + self.table + .remove_entry(hash, |cursor| Ok::<_, pyo3::PyErr>(*cursor == front_cursor)) + .expect("evict: key not found in table."); + } + + let handle = unsafe { self.list.pop_front().unwrap_unchecked() }; + self.currsize = self.currsize.saturating_sub(handle.size()); + Ok(handle) + } + + #[inline] + fn shrink_to_fit(&mut self, _shared: &Self::Shared) { + self.table + .shrink_to(0, |cursor| unsafe { cursor.element().key().hash() }); + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.list.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.list.clear(); + self.currsize = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().all(|x| { + let handle = x.as_ref().element(); + let key = handle.key(); + + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(cursor)) => { + let v1 = handle.value(); + let v2 = cursor.element().value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.list.len()); + let mut entries = linked_list::LinkedList::new(); + + unsafe { + for cursor in self.list.iter() { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = entries.push_back(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + list: entries, + currsize: self.currsize, + } + } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.list.iter() } { + let handle = unsafe { cursor.element() }; + + let mut tuple = list.begin_tuple(2)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value) = bound.extract::<(alias::PyObject, alias::PyObject)>()?; + + let handle = Handle::new(bound.py(), shared.getsizeof(), key, value)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.list.push_back(handle); + + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + Ok((shared, slf)) + } +} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 2945250..2c62800 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,7 +1,12 @@ -pub mod fifo; -pub mod lfu; -pub mod lru; +pub mod common; +pub mod traits; + +pub mod fifopolicy; +pub mod lfupolicy; +pub mod lrupolicy; pub mod nopolicy; -pub mod random; -pub mod ttl; -pub mod vttl; +pub mod rrpolicy; +pub mod ttlpolicy; +pub mod vttlpolicy; + +pub mod wrapped; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 3629f2b..585878f 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,360 +1,297 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -pub struct NoPolicy { - table: hashbrown::raw::RawTable<(PreHashObject, pyo3::Py, usize)>, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`NoPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut NoPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied slot within the hash table. + bucket: hashbrown::raw::Bucket, } -pub struct NoPolicyOccupied<'a> { - instance: &'a mut NoPolicy, - bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::Py, usize)>, -} +impl traits::OccupiedExt for Occupied<'_> { + type Shared = Shared; + type Handle = Handle; -pub struct NoPolicyAbsent<'a> { - instance: &'a mut NoPolicy, - insert_slot: Option, -} + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); -impl NoPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) + let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; + self.policy.currsize = self.policy.currsize.saturating_sub(h.size()); + h } - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } + fn replace(self, new: Self::Handle) -> Self::Handle { + self.policy.currsize = self.policy.currsize.saturating_add(new.size()); + let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; + self.policy.currsize = self.policy.currsize.saturating_sub(old.size()); - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() + old } +} - pub fn memory(&self) -> usize { - self.memory - } +/// A view into a vacant slot in [`NoPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut NoPolicy, + /// The shared configuration + shared: &'a Shared, + /// If true, means we used `.evict()` method, and empty slots are available + /// in table; so we don't need to reserve a new one. + space_available: bool, +} + +impl traits::VacantExt for Vacant<'_> { + type Shared = Shared; + type Handle = Handle; #[inline] - pub fn len(&self) -> usize { - self.table.len() + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() } - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() + #[inline(always)] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) } - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + if !self.space_available { + self.policy.table.reserve(1, |x| x.key().hash()); + } + unsafe { + self.policy + .table + .insert_no_grow(handle.key().hash(), handle); + } } +} - pub fn capacity(&self) -> usize { - self.table.capacity() +pub struct NoPolicy { + /// The raw hash table storing all live [`Handle`] entries. + table: hashbrown::raw::RawTable, + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl NoPolicy { + /// Creates a new [`NoPolicy`]. + /// + /// The underlying hash table is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + currsize: 0, + } + } + + /// Returns a reference to the underlying raw hash table. + #[inline(always)] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table } +} + +impl traits::PolicyExt for NoPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; - pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)> { - unsafe { self.table.iter() } + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize } #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, NoPolicyAbsent<'_>>> { - match self.table.try_find(key.hash, |(x, _, _)| x.equal(py, key))? { + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let bucket = self.table.find(key.hash(), |x| key.py_eq(py, x.key()))?; + Ok(bucket.map(|x| unsafe { x.as_ref() })) + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + match self.table.find(key.hash(), |x| key.py_eq(py, x.key()))? { Some(bucket) => { - Ok( - Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) - ) - }, + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } None => { - Ok( - Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: None }) - ) + let result = Vacant { + policy: self, + shared, + space_available: false, + }; + Ok(traits::PolicyEntry::Vacant(result)) } } } #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, NoPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |(x, _, _)| x.equal(py, key), - |(x, _, _)| x.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } + fn evict(&mut self, _shared: &Self::Shared) -> pyo3::PyResult { + Err(new_py_error!( + PyOverflowError, + "The cache has no algorithm to evict items" + )) } #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |(x, _, _)| x.equal(py, key))? - { - Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), - None => Ok(None), - } + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + self.table.shrink_to(0, |x| x.key().hash()); } - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.table.is_empty() { + return; } + self.table.clear(); + shared.generation_version().increment(); + self.currsize = 0; + } - if self.table.len() != other.table.len() { + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { return Ok(false); } let mut error = None; let result = unsafe { - self.table.iter().all(|bucket| { - let (key, val, _) = bucket.as_ref(); + self.table.iter().map(|x| x.as_ref()).all(|h1| { + let key = h1.key(); - match other.table.try_find(key.hash, |(x, _, _)| x.equal(py, key)) { + match other.table.get(key.hash(), |x| key.py_eq(py, x.key())) { Err(e) => { error = Some(e); - true + false } - Ok(Some(bucket)) => { - let (_, val2, _) = bucket.as_ref(); - - match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { - Ok(result) => result, + Ok(None) => false, + Ok(Some(h2)) => { + match utils::pyobject_equal(py, h1.value().as_ptr(), h2.value().as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - true + false } } } - Ok(None) => false, } }) }; - if let Some(error) = error { - return Err(error); - } - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.memory = 0; - self.observed.change(); + error.map_or(Ok(result), Err) } - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |(x, _, _)| x.hash); - self.observed.change(); - } + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + table.insert_no_grow(handle.key().hash(), handle.clone_ref(py)); } } - Ok(()) + Self { + table, + currsize: self.currsize, + } } - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, ) -> pyo3::PyResult<()> { - use pyo3::types::PyDictMethods; - - let (maxsize, iterable, capacity, maxmemory) = - unsafe { extract_pickle_tuple!(py, state => dict) }; - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; + let mut dict = tuple.begin_dict()?; unsafe { - for (key, value) in dict.iter() { - let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - _ => std::hint::unreachable_unchecked(), - } + for handle in self.table.iter().map(|x| x.as_ref()) { + dict.entry(handle.key().as_ref(), handle.value())?; } } - *self = new; - Ok(()) - } -} - -impl<'a> NoPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - unsafe { - let item = self.bucket.as_mut(); - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let next_memory = self - .instance - .memory - .saturating_sub(item.2) - .saturating_add(new_size); - if next_memory > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - let old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = next_memory; - Ok(old_value) - } + dict.end() } - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; - self.instance.memory = self.instance.memory.saturating_sub(x.2); - self.instance.observed.change(); - x - } + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyDictMethods; + use pyo3::types::PyTupleMethods; - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { self.bucket.as_mut() } - } -} + let dict = builded.get_item(0)?.cast_into::()?; + let dict_length = dict.len(); -impl NoPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", + if dict_length > maxsize { + return Err(new_py_error!( + PyValueError, + "dict size is incompatible with maxsize" )); } - if self.instance.table.len() >= self.instance.maxsize.get() { - // There's no algorithm for removing a key-value pair, so we raise PyOverflowError. - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(dict.len()); - match self.insert_slot { - Some(slot) => unsafe { - self.instance - .table - .insert_in_slot(key.hash, slot, (key, value, entry_size)); - }, - None => { - self.instance - .table - .insert(key.hash, (key, value, entry_size), |(x, _, _)| x.hash); + for (key, value) in dict.iter() { + let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + unsafe { + slf.table.insert_no_grow(handle.key().hash(), handle); } } - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) + Ok((shared, slf)) } } diff --git a/src/policies/random.rs b/src/policies/random.rs deleted file mode 100644 index 0803724..0000000 --- a/src/policies/random.rs +++ /dev/null @@ -1,391 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -pub struct RandomPolicy { - table: hashbrown::raw::RawTable<(PreHashObject, pyo3::Py, usize)>, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct RandomPolicyOccupied<'a> { - instance: &'a mut RandomPolicy, - bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::Py, usize)>, -} - -pub struct RandomPolicyAbsent<'a> { - instance: &'a mut RandomPolicy, - insert_slot: Option, -} - -impl RandomPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)> { - unsafe { self.table.iter() } - } - - #[inline] - pub fn popitem( - &mut self, - ) -> pyo3::PyResult, usize)>> { - if self.table.is_empty() { - Ok(None) - } else { - let nth = fastrand::usize(0..self.table.len()); - - let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; - let (x, _) = unsafe { self.table.remove(bucket) }; - self.memory = self.memory.saturating_sub(x.2); - - self.observed.change(); - Ok(Some(x)) - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, RandomPolicyAbsent<'_>>> { - match self.table.try_find(key.hash, |(x, _, _)| x.equal(py, key))? { - Some(bucket) => { - Ok( - Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) - ) - }, - None => { - Ok( - Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: None }) - ) - } - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, RandomPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |(x, _, _)| x.equal(py, key), - |(x, _, _)| x.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |(x, _, _)| x.equal(py, key))? - { - Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), - None => Ok(None), - } - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - let mut error = None; - - let result = unsafe { - self.table.iter().all(|bucket| { - let (key, val, _) = bucket.as_ref(); - - match other.table.try_find(key.hash, |(x, _, _)| x.equal(py, key)) { - Err(e) => { - error = Some(e); - true - } - Ok(Some(bucket)) => { - let (_, val2, _) = bucket.as_ref(); - - match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { - Ok(result) => result, - Err(e) => { - error = Some(e); - true - } - } - } - Ok(None) => false, - } - }) - }; - - if let Some(error) = error { - return Err(error); - } - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |(x, _, _)| x.hash); - self.observed.change(); - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyDictMethods; - - let (maxsize, iterable, capacity, maxmemory) = - unsafe { extract_pickle_tuple!(py, state => dict) }; - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - unsafe { - for (key, value) in dict.iter() { - let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - _ => std::hint::unreachable_unchecked(), - } - } - } - - *self = new; - Ok(()) - } - - pub fn random_key(&self) -> Option<&PreHashObject> { - if self.table.is_empty() { - None - } else { - let nth = fastrand::usize(0..self.table.len()); - - let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; - let (key, _, _) = unsafe { bucket.as_ref() }; - - Some(key) - } - } -} - -impl<'a> RandomPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let old_value; - { - let item = unsafe { self.bucket.as_mut() }; - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = item.2; - old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem()?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; - self.instance.memory = self.instance.memory.saturating_sub(x.2); - self.instance.observed.change(); - x - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { self.bucket.as_mut() } - } -} - -impl RandomPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem()?.is_none() { - break; - } - } - - match self.insert_slot { - Some(slot) => unsafe { - self.instance - .table - .insert_in_slot(key.hash, slot, (key, value, entry_size)); - }, - None => { - self.instance - .table - .insert(key.hash, (key, value, entry_size), |(x, _, _)| x.hash); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs new file mode 100644 index 0000000..7797641 --- /dev/null +++ b/src/policies/rrpolicy.rs @@ -0,0 +1,308 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`RRPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut RRPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied slot within the hash table. + bucket: hashbrown::raw::Bucket, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Shared = Shared; + type Handle = Handle; + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; + self.policy.currsize = self.policy.currsize.saturating_sub(h.size()); + h + } + + #[inline] + fn replace(self, new: Self::Handle) -> Self::Handle { + self.policy.currsize = self.policy.currsize.saturating_add(new.size()); + let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; + self.policy.currsize = self.policy.currsize.saturating_sub(old.size()); + + old + } +} + +/// A view into a vacant slot in [`RRPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut RRPolicy, + /// The shared configuration + shared: &'a Shared, + /// If true, means we used `.evict()` method, and empty slots are available + /// in table; so we don't need to reserve a new one. + space_available: bool, +} + +impl traits::VacantExt for Vacant<'_> { + type Shared = Shared; + type Handle = Handle; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline(always)] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + if !self.space_available { + self.policy.table.reserve(1, |x| x.key().hash()); + } + unsafe { + self.policy + .table + .insert_no_grow(handle.key().hash(), handle); + } + } +} + +pub struct RRPolicy { + /// The raw hash table storing all live [`Handle`] entries. + table: hashbrown::raw::RawTable, + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl RRPolicy { + /// Creates a new [`RRPolicy`]. + /// + /// The underlying hash table is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + currsize: 0, + } + } + + /// Returns a reference to the underlying raw hash table. + #[inline(always)] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } +} + +impl PolicyExt for RRPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let bucket = self.table.find(key.hash(), |x| key.py_eq(py, x.key()))?; + Ok(bucket.map(|x| unsafe { x.as_ref() })) + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + match self.table.find(key.hash(), |x| key.py_eq(py, x.key()))? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + space_available: false, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + #[inline] + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + if self.table.is_empty() { + Err(new_py_error!(PyKeyError, "cache is empty")) + } else { + let nth = fastrand::usize(0..self.table.len()); + + let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; + + shared.generation_version().increment(); + + let (handle, _) = unsafe { self.table.remove(bucket) }; + self.currsize = self.currsize.saturating_sub(handle.size()); + Ok(handle) + } + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + self.table.shrink_to(0, |x| x.key().hash()); + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.table.is_empty() { + return; + } + self.table.clear(); + shared.generation_version().increment(); + self.currsize = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().map(|x| x.as_ref()).all(|h1| { + let key = h1.key(); + + match other.table.get(key.hash(), |x| key.py_eq(py, x.key())) { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(h2)) => { + match utils::pyobject_equal(py, h1.value().as_ptr(), h2.value().as_ptr()) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); + + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + table.insert_no_grow(handle.key().hash(), handle.clone_ref(py)); + } + } + + Self { + table, + currsize: self.currsize, + } + } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut dict = tuple.begin_dict()?; + + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + dict.entry(handle.key().as_ref(), handle.value())?; + } + } + + dict.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyDictMethods; + use pyo3::types::PyTupleMethods; + + let dict = builded.get_item(0)?.cast_into::()?; + let dict_length = dict.len(); + + if dict_length > maxsize { + return Err(new_py_error!( + PyValueError, + "dict size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(dict.len()); + + for (key, value) in dict.iter() { + let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + unsafe { + slf.table.insert_no_grow(handle.key().hash(), handle); + } + } + + Ok((shared, slf)) + } +} diff --git a/src/policies/traits.rs b/src/policies/traits.rs new file mode 100644 index 0000000..99da9b2 --- /dev/null +++ b/src/policies/traits.rs @@ -0,0 +1,156 @@ +use crate::internal::alias; +use crate::internal::pickle; +use crate::internal::utils; + +pub trait HandleExt { + type Key; + + /// Borrows the key stored in this handle. + fn key(&self) -> &Self::Key; + + /// The size this handle contributes toward [`PolicyExt::maxsize`]. + /// + /// Return `1` for count-based policies or a byte/cost value for + /// size-based policies. Must be `> 0`. + fn size(&self) -> usize; +} + +/// Guard for an *occupied* slot. +pub trait OccupiedExt { + type Shared: SharedExt; + type Handle: HandleExt; + + /// Replaces the current handle with `new`, returning the old one. + fn replace(self, new: Self::Handle) -> Self::Handle; + + /// Removes the handle from this slot and returns it. + fn remove(self) -> Self::Handle; +} + +/// Guard for a *vacant* slot. +pub trait VacantExt { + type Shared: SharedExt; + type Handle: HandleExt; + + /// Returns `true` if adding `extra_size` would meet or exceed [`SharedExt::maxsize`]. + /// Called *before* [`VacantExt::insert`]. + /// + /// This method is exists here because after calling [`PolicyExt::entry`], we can't use + /// policy. + fn would_exceed(&self, extra_size: usize) -> bool; + + /// Evicts one entry, freeing budget for a subsequent insert or replace. + /// + /// This method is exists here because after calling [`PolicyExt::entry`], we can't use + /// policy. + /// + /// # Errors + /// Returns any Python exception raised while dropping the evicted value. + fn evict(&mut self) -> pyo3::PyResult<()>; + + /// Inserts `handle` into this slot. + /// + /// Does **not** enforce the weight budget; call + /// [`would_exceed`](EntryExt::would_exceed) first. + fn insert(self, handle: Self::Handle); +} + +/// The state of a policy slot, returned by [`PolicyExt::entry`]. +pub enum PolicyEntry { + Occupied(O), + Vacant(V), +} + +pub trait SharedExt: Send + Sync { + /// Returns the configured maxsize. + fn maxsize(&self) -> usize; + + /// Returns the generation version. + fn generation_version(&self) -> &utils::GenerationVersion; + + /// Returns a reference to configued getsizeof function. + fn getsizeof(&self) -> &utils::GetsizeofFunction; + + /// Returns a reference to configued getsizeof function. + fn global_ttl(&self) -> Option; + + /// Make a clone of `self`. + fn clone_ref(&self, py: pyo3::Python) -> Self; +} + +pub trait PolicyExt: Sized { + /// Read-only variables, we keep this type separated from the main policy implementation, + /// because we need to access them outside of `Mutex`s. + type Shared: SharedExt; + type Handle: HandleExt; + + type Occupied<'a>: OccupiedExt + 'a + where + Self: 'a; + + type Vacant<'a>: VacantExt + 'a + where + Self: 'a; + + const PICKLE_SIZE: usize; + + /// Returns the current total cumulative size consumed by all stored entries. + fn current_size(&self) -> usize; + + /// Looks up a handle by `hash` and `eq`, applying policy side-effects on hit. + /// + /// # Errors + /// Returns `Err` if `eq` raises a Python exception. + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult>; + + /// Returns a [`PolicyEntry`] for the slot at `hash` / `eq`. + /// + /// # Errors + /// Returns `Err` if `eq` raises a Python exception. + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>>; + + /// Evicts a handle according to the policy algorithm, returning it. + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult; + + /// Removes all handles without shrinking the allocation. + fn clear(&mut self, shared: &Self::Shared); + + /// Shrinks the internal allocation as close to length as possible. + fn shrink_to_fit(&mut self, shared: &Self::Shared); + + /// Performs Python `==`. + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult; + + /// Make a clone of `self`. + fn clone_ref(&mut self, py: pyo3::Python) -> Self; + + /// Buildes the pickle. + /// Should not add items to pickle more than the configured [`Self::PICKLE_SIZE`]. + fn build_pickle( + &self, + tuple: &mut pickle::TupleBuilder<'_, pickle::PickleBuilder>, + ) -> pyo3::PyResult<()>; + + /// Loads the builded pickle. + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)>; +} diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs deleted file mode 100644 index ddaff20..0000000 --- a/src/policies/ttl.rs +++ /dev/null @@ -1,770 +0,0 @@ -use super::fifo::MAX_N_SHIFT; -use crate::common::AbsentSituation; -use crate::common::Entry; -use crate::common::NoLifetimeSliceIter; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; -use crate::common::TryFindMethods; - -use std::collections::VecDeque; - -pub struct TTLPolicy { - // See FIFOPolicy to find out fields - table: hashbrown::raw::RawTable, - entries: VecDeque, - maxsize: core::num::NonZeroUsize, - maxmemory: core::num::NonZeroUsize, - memory: usize, - ttl: std::time::Duration, - n_shifts: usize, - pub observed: Observed, -} - -pub struct TTLPolicyOccupied<'a> { - instance: &'a mut TTLPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct TTLPolicyAbsent<'a> { - instance: &'a mut TTLPolicy, - situation: AbsentSituation, -} - -pub struct TTLIterator { - first: NoLifetimeSliceIter, - second: NoLifetimeSliceIter, -} - -impl TTLPolicy { - pub fn new( - maxsize: usize, - mut capacity: usize, - secs: f64, - maxmemory: usize, - ) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - maxmemory, - memory: 0, - ttl: std::time::Duration::from_secs_f64(secs), - n_shifts: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - pub fn ttl(&self) -> std::time::Duration { - self.ttl - } - - #[inline] - pub fn real_len(&self) -> usize { - let now = std::time::SystemTime::now(); - let mut c = 0usize; - - for item in &self.entries { - if !item.is_expired(now) { - break; - } - - c += 1; - } - - self.table.len() - c - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.real_len() == 0 - } - - pub fn is_full(&self) -> bool { - self.real_len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> (usize, usize) { - (self.table.capacity(), self.entries.capacity()) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - let old = self - .table - .get_mut(entry.key.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - *old -= 1; - } - } - } - - #[inline] - pub fn expire(&mut self, py: pyo3::Python<'_>) { - let now = std::time::SystemTime::now(); - - while let Some(e) = self.entries.front() { - if !e.is_expired(now) { - break; - } - - unsafe { - self.popitem(py).unwrap_unchecked(); - } - } - } - - #[inline] - pub fn popitem(&mut self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let ret = self.entries.front(); - if ret.is_none() { - return Ok(None); - } - - let ret = unsafe { ret.unwrap_unchecked() }; - - match self.table.try_find(ret.key.hash, |x| { - self.entries[(*x) - self.n_shifts].key.equal(py, &ret.key) - })? { - Some(bucket) => { - unsafe { self.table.remove(bucket) }; - } - None => unreachable!("popitem key not found in table"), - } - - let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; - self.memory = self.memory.saturating_sub(ret.size); - - self.observed.change(); - - self.decrement_indexes(1, self.entries.len()); - Ok(Some(ret)) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, TTLPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].key.equal(py, key))? - { - Some(bucket) => { - let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - None => { - Ok( - Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, TTLPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |x| self.entries[(*x) - self.n_shifts].key.equal(py, key), - |x| self.entries[(*x) - self.n_shifts].key.hash, - )? { - Ok(bucket) => { - let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - }, - Err(insert_slot) => { - Ok( - Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(insert_slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult> { - match self - .table - .try_find(key.hash, |x| { - self.entries[(*x) - self.n_shifts].key.equal(py, key) - })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => { - let pair = &self.entries[(*index) - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Some(pair)) - } else { - Ok(None) - } - } - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.entries.clear(); - self.n_shifts = 0; - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self, py: pyo3::Python<'_>) { - self.expire(py); - - self.table.shrink_to(self.table.len(), |x| { - self.entries[(*x) - self.n_shifts].key.hash - }); - self.entries.shrink_to_fit(); - self.observed.change(); - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn entries_iter(&self) -> std::collections::vec_deque::Iter<'_, TimeToLivePair> { - self.entries.iter() - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.real_len() != other.real_len() { - return Ok(false); - } - - let now = std::time::SystemTime::now(); - - unsafe { - for index1 in self.table.iter().map(|x| x.as_ref()) { - let pair1 = &self.entries[(*index1) - self.n_shifts]; - - if pair1.is_expired(now) { - continue; - } - - match other.table.try_find(pair1.key.hash, |x| { - pair1 - .key - .equal(py, &other.entries[(*x) - other.n_shifts].key) - })? { - Some(bucket) => { - let pair2 = &other.entries[(*bucket.as_ref()) - other.n_shifts]; - - if pair2.is_expired(now) { - return Ok(false); - } - - if !crate::common::pyobject_equal( - py, - pair1.value.as_ptr(), - pair2.value.as_ptr(), - )? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - pub fn iter(&mut self, py: pyo3::Python<'_>) -> TTLIterator { - self.expire(py); - - let (a, b) = self.entries.as_slices(); - - TTLIterator { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } - - pub fn get_index(&self, n: usize) -> Option<&TimeToLivePair> { - self.entries.get(n) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - if pyo3::ffi::PyTuple_CheckExact(state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size(state); - if size != 4 && size != 5 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 1); - - if pyo3::ffi::PyList_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - pyo3::Py::::from_borrowed_ptr(py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - // SAFETY: we check `iterable` type in this function - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "the iterable object size is more than maxsize!", - )); - } - - let ttl = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 3); - pyo3::ffi::PyFloat_AsDouble(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - let maxmemory = if size == 5 { - let obj = pyo3::ffi::PyTuple_GetItem(state, 4); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - result - } else { - 0 - }; - - let mut new = Self::new(maxsize, capacity, ttl, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::Py, pyo3::Py, f64)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.pickle_insert( - py, - hk, - value, - std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp), - )?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.expire(py); - new.shrink_to_fit(py); - - *self = new; - Ok(()) - } - } -} - -impl<'a> TTLPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let new_size = { - let index = unsafe { *self.bucket.as_ref() } - self.instance.n_shifts; - let item = &self.instance.entries[index]; - crate::common::entry_size(py, &item.key, &value)? - }; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - // We have to move the value to the end of the vector - let (mut index, slot) = unsafe { self.instance.table.remove(self.bucket.clone()) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let mut item = self.instance.entries.remove(index).unwrap(); - let old_size = item.size; - item.expire_at = Some(std::time::SystemTime::now() + self.instance.ttl); - let old_value = std::mem::replace(&mut item.value, value); - item.size = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - unsafe { - self.instance.table.insert_in_slot( - item.key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(item); - } - - self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TimeToLivePair { - let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let m = self.instance.entries.remove(index).unwrap(); - self.instance.memory = self.instance.memory.saturating_sub(m.size); - - self.instance.observed.change(); - m - } - - pub fn into_value(self) -> &'a mut TimeToLivePair { - let index = unsafe { self.bucket.as_ref() }; - &mut self.instance.entries[index - self.instance.n_shifts] - } -} - -impl TTLPolicyAbsent<'_> { - unsafe fn pickle_insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - expire_at: std::time::SystemTime, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(_) => { - return Err(pyo3::PyErr::new::( - "pikcle object is suspicious!", - )) - } - AbsentSituation::Slot(slot) => unsafe { - // This means the key is not available and we have insert_slot - // for inserting it - - // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle - - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - }, - AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - Ok(()) - } - - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let expire_at = std::time::SystemTime::now() + self.instance.ttl; - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(bucket) => { - // This means the key is available but expired - // So we have to move the value to the end of the vector - // and update the bucket ( like TTLPolicyOccupied::update ) - let (mut index, slot) = unsafe { self.instance.table.remove(bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let mut item = self.instance.entries.remove(index).unwrap(); - let old_size = item.size; - - item.expire_at = Some(expire_at); - item.value = value; - item.size = entry_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(entry_size); - - unsafe { - self.instance.table.insert_in_slot( - item.key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(item); - } - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - } - AbsentSituation::Slot(slot) => unsafe { - // This means the key is not available and we have insert_slot - // for inserting it - - self.instance.expire(py); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - }, - AbsentSituation::None => { - // This is same as AbsentSituation::Slot but we don't have any slot - - self.instance.expire(py); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - self.instance.table.insert( - key.hash, - self.instance.entries.len() + self.instance.n_shifts, - |index| { - self.instance.entries[(*index) - self.instance.n_shifts] - .key - .hash - }, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - } - } - - self.instance.observed.change(); - Ok(()) - } -} - -impl Iterator for TTLIterator { - type Item = std::ptr::NonNull; - - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for TTLIterator {} diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs new file mode 100644 index 0000000..74a67e9 --- /dev/null +++ b/src/policies/ttlpolicy.rs @@ -0,0 +1,611 @@ +use std::collections::VecDeque; + +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use crate::policies::common::Shared; + +macro_rules! get_handle { + (&$slf:expr, $index:expr) => { + &$slf.entries[$index - $slf.front_offset] + }; + (&mut $slf:expr, $index:expr) => { + &mut $slf.entries[$index - $slf.front_offset] + }; +} + +/// A key-value pair with a precomputed hash and combined size. +pub struct ExpiringHandle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, + /// Configured ttl for handle. + expires_at: std::time::SystemTime, +} + +impl ExpiringHandle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: utils::ExpiresAt, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + expires_at, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: utils::ExpiresAt, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + expires_at: expires_at.into(), + }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + #[inline] + pub fn expires_at(&self) -> std::time::SystemTime { + self.expires_at + } + + #[inline] + pub fn is_expired(&self, now: std::time::SystemTime) -> bool { + self.expires_at <= now + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + expires_at: self.expires_at, + } + } +} + +impl HandleExt for ExpiringHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`TTLPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut TTLPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn replace(self, new: Self::Handle) -> Self::Handle { + // In update we don't need to increment this; because this does not change the memory address ranges + // self.shared.generation_version().increment(); + + let item = unsafe { get_handle!(&mut self.policy, *self.bucket.as_ref()) }; + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(item.size()) + .saturating_add(new.size()); + + std::mem::replace(item, new) + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; + index -= self.policy.front_offset; + + self.policy + .decrement_indexes(index + 1, self.policy.entries.len()); + + let handle = self.policy.entries.remove(index).unwrap(); + self.policy.currsize = self.policy.currsize.saturating_sub(handle.size()); + handle + } +} + +/// A view into a vacant slot in [`TTLPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut TTLPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + self.policy.table.insert( + handle.key().hash(), + self.policy.entries.len() + self.policy.front_offset, + |index| get_handle!(&self.policy, *index).key().hash(), + ); + self.policy.entries.push_back(handle); + } +} + +pub struct TTLPolicy { + // Fields are same as `FIFOPolicy` + table: hashbrown::raw::RawTable, + entries: VecDeque, + currsize: usize, + front_offset: usize, +} + +impl TTLPolicy { + /// Creates a new [`TTLPolicy`]. + /// + /// The underlying [`VecDeque`] is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + entries: VecDeque::with_capacity(capacity), + currsize: 0, + front_offset: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } + + #[inline] + pub fn entries(&self) -> &VecDeque { + &self.entries + } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + #[cfg(not(feature = "use-small-offset"))] + const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; + + #[cfg(feature = "use-small-offset")] + const MAX_FRONT_OFFSET: usize = u8::MAX as usize; + + // Fast path: shifting the entire front is a single counter increment. + // Guard against overflow; the full-normalization path below handles that case. + if start <= 1 && end == self.entries.len() && self.front_offset < MAX_FRONT_OFFSET { + self.front_offset += 1; + return; + } + + if (end - start) > self.table.capacity() / 2 { + // Table-scan + // normalize every index (subtract fo) and decrement those in range [start, end). + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + + let vd_idx = *i - self.front_offset; + + *i = if start <= vd_idx && vd_idx < end { + vd_idx - 1 // normalize + decrement + } else { + vd_idx // normalize + }; + } + } + } else { + // Entries-scan + // decrement the logical indices for entries in range [start, end). + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let result = unsafe { + self.table + .get_mut(entry.key().hash(), |x| { + Ok::<_, pyo3::PyErr>((*x) - self.front_offset == i) + }) + .unwrap_unchecked() + .expect("index not found") + }; + *result -= 1; + } + + // normalize every stored index by subtracting `fo`. + // - Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // - All others: (vd_idx + fo) - fo = vd_idx + if self.front_offset != 0 { + unsafe { + for bucket in self.table.iter() { + *bucket.as_mut() -= self.front_offset; + } + } + } + } + + // Both branches now store raw VecDeque indices, so the offset is zero. + self.front_offset = 0; + } + + pub fn expire(&mut self, gv: &utils::GenerationVersion) { + let now = std::time::SystemTime::now(); + + while let Some(handle) = self.entries.front() { + if !handle.is_expired(now) { + break; + } + + let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); + if std::hint::unlikely( + self.table + .remove_entry(handle.key().hash(), eq) + .unwrap() + .is_none(), + ) { + unreachable!("popitem key not found in table"); + } + + gv.increment(); + + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + self.decrement_indexes(1, self.entries.len()); + } + } + + #[inline] + pub fn iter(&mut self, shared: &Shared) -> utils::RawVecDequeIter { + self.expire(shared.generation_version()); + + let (first, second) = self.entries.as_slices(); + utils::RawVecDequeIter::new(first, second) + } +} + +impl PolicyExt for TTLPolicy { + type Shared = Shared; + type Handle = ExpiringHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + + match self + .table + .get(key.hash(), eq)? + .map(|index| get_handle!(&self, *index)) + { + Some(handle) => { + if handle.is_expired(std::time::SystemTime::now()) { + Ok(None) + } else { + Ok(Some(handle)) + } + } + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + self.expire(shared.generation_version()); + + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + let front = self.entries.front(); + if front.is_none() { + return Err(new_py_error!(PyKeyError, ())); + } + + let front = unsafe { front.unwrap_unchecked() }; + + let eq = |index: &usize| Ok::<_, pyo3::PyErr>(*index - self.front_offset == 0); + if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { + unreachable!("popitem key not found in table"); + } + + shared.generation_version().increment(); + + self.decrement_indexes(1, self.entries.len()); + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + + Ok(front) + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |index| get_handle!(&self, *index).key().hash()); + self.entries.shrink_to_fit(); + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.entries.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear(); + self.entries.clear(); + self.currsize = 0; + self.front_offset = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let now = std::time::SystemTime::now(); + + let result = unsafe { + self.table.iter().all(|x| { + let handle = get_handle!(&self, *x.as_ref()); + if handle.is_expired(now) { + return true; + } + + let key = handle.key(); + + match other + .table + .get(key.hash(), |i| key.py_eq(py, get_handle!(&other, *i).key())) + { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(i)) => { + let v1 = handle.value(); + let v2 = get_handle!(&other, *i).value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut entries = VecDeque::with_capacity(self.entries.len()); + for handle in self.entries.iter() { + entries.push_back(handle.clone_ref(py)); + } + + Self { + table: self.table.clone(), + entries, + currsize: self.currsize, + front_offset: self.front_offset, + } + } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for handle in self.entries.iter() { + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.push( + handle + .expires_at + .duration_since(std::time::UNIX_EPOCH) + .unwrap(), + )?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + if global_ttl.is_none_or(|x| x.is_zero()) { + return Err(new_py_error!(PyValueError, "global_ttl is zero")); + } + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::with_ttl(maxsize, getsizeof, global_ttl); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, timestamp) = + bound.extract::<(alias::PyObject, alias::PyObject, f64)>()?; + + let handle = ExpiringHandle::new( + bound.py(), + shared.getsizeof(), + (std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp)).into(), + key, + value, + )?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + unsafe { + slf.table.insert_no_grow( + handle.key().hash(), + // Adding `slf.front_offset` is unnecessary here + slf.entries.len(), + ); + } + slf.entries.push_back(handle); + } + + Ok((shared, slf)) + } +} diff --git a/src/policies/vttl.rs b/src/policies/vttl.rs deleted file mode 100644 index 199ec58..0000000 --- a/src/policies/vttl.rs +++ /dev/null @@ -1,597 +0,0 @@ -use crate::common::AbsentSituation; -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; -use crate::common::TryFindMethods; -use crate::lazyheap; - -use std::ptr::NonNull; - -macro_rules! compare_fn { - () => { - |a, b| { - if a.expire_at.is_none() && b.expire_at.is_none() { - return std::cmp::Ordering::Equal; - } else if b.expire_at.is_none() { - return std::cmp::Ordering::Less; - } else if a.expire_at.is_none() { - return std::cmp::Ordering::Greater; - } - - a.expire_at.cmp(&b.expire_at) - } - }; -} - -pub struct VTTLPolicy { - table: hashbrown::raw::RawTable>, - heap: lazyheap::LazyHeap, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct VTTLPolicyOccupied<'a> { - instance: &'a mut VTTLPolicy, - bucket: hashbrown::raw::Bucket>, -} - -pub struct VTTLPolicyAbsent<'a> { - instance: &'a mut VTTLPolicy, - situation: AbsentSituation>, -} - -pub type VTTLIterator = lazyheap::Iter; - -impl VTTLPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: lazyheap::LazyHeap::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn real_len(&mut self) -> usize { - self.expire(); - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn expire(&mut self) { - self.heap.sort_by(compare_fn!()); - - let now = std::time::SystemTime::now(); - - while let Some(x) = self.heap.front() { - if unsafe { !x.as_ref().is_expired(now) } { - break; - } - - unsafe { - self.table - .remove_entry(x.as_ref().key.hash, |x| { - std::ptr::eq(x.as_ptr(), x.as_ptr()) - }) - .unwrap(); - } - - let removed = self.heap.pop_front(compare_fn!()); - if let Some(pair) = removed { - self.memory = self.memory.saturating_sub(pair.size); - } - self.observed.change(); - } - } - - #[inline] - pub fn popitem(&mut self) -> Option { - self.heap.sort_by(compare_fn!()); - - let front = self.heap.front()?; - - unsafe { - self.table - .remove_entry(front.as_ref().key.hash, |x| { - std::ptr::eq(x.as_ptr(), front.as_ptr()) - }) - .unwrap(); - } - - self.observed.change(); - let item = self.heap.pop_front(compare_fn!()).unwrap(); - self.memory = self.memory.saturating_sub(item.size); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, VTTLPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? - { - Some(bucket) => unsafe { - let pair = bucket.as_ref(); - - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - None => { - Ok( - Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, VTTLPolicyAbsent<'_>>> { - match self - .table - .try_find_or_find_insert_slot( - key.hash, - |ptr| unsafe { ptr.as_ref().key.equal(py, key) }, - |ptr| unsafe { ptr.as_ref().key.hash }, - )? { - Ok(bucket) => unsafe { - let pair = bucket.as_ref(); - - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - Err(slot) => { - Ok( - Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(pair) => unsafe { - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Some(pair.as_ref())) - } else { - Ok(None) - } - }, - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.heap.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().key.hash }); - - self.heap.shrink_to_fit(); - self.observed.change(); - } - - pub fn iter(&mut self) -> VTTLIterator { - self.heap.iter(compare_fn!()) - } - - pub fn equal(&mut self, py: pyo3::Python<'_>, other: &mut Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.real_len() != other.real_len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let pair1 = node.as_ref(); - - // NOTE: there's no need to check if the pair is expired - // because we already expired all expired pairs by using real_len method - - match other - .table - .try_find(pair1.key.hash, |x| pair1.key.equal(py, &x.as_ref().key))? - { - Some(bucket) => { - let pair2 = bucket.as_ref().as_ref(); - - if !crate::common::pyobject_equal( - py, - pair1.value.as_ptr(), - pair2.value.as_ptr(), - )? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind(), ttl)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind(), ttl)?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value, ttl)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value, ttl)?; - } - } - } - } - - Ok(()) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "iterable object size is greater than maxsize", - )); - } - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::Py, pyo3::Py, f64)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - let ttl = { - if timestamp == 0.0 { - None - } else { - Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp)) - } - }; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.pickle_insert(py, hk, value, ttl)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.expire(); - new.shrink_to_fit(); - - *self = new; - Ok(()) - } - } -} - -impl VTTLPolicyOccupied<'_> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult> { - let old_value; - { - let item = unsafe { self.bucket.as_mut() }; - let pair = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &pair.key, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = pair.size; - old_value = std::mem::replace(&mut pair.value, value); - pair.size = new_size; - pair.expire_at = - ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - self.instance.heap.queue_sort(); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TimeToLivePair { - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = self.instance.heap.remove(item, compare_fn!()); - - self.instance.memory = self.instance.memory.saturating_sub(item.size); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> NonNull { - let item = unsafe { self.bucket.as_mut() }; - *item - } -} - -impl VTTLPolicyAbsent<'_> { - unsafe fn pickle_insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - expire_at: Option, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(_) => { - return Err(pyo3::PyErr::new::( - "pikcle object is suspicious!", - )) - } - AbsentSituation::Slot(slot) => { - // This means the key is not available and we have insert_slot - // for inserting it - - // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - } - } - AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - Ok(()) - } - - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - let expire_at = - ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(bucket) => { - // This means the key is available but expired - // So we have to update the values of the old key - // and queue the heap's sort - let old_size = unsafe { bucket.as_ref().as_ref().size }; - - { - let item = unsafe { bucket.as_mut() }; - unsafe { - item.as_mut().expire_at = ttl.map(|x| { - std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x) - }); - item.as_mut().value = value; - item.as_mut().size = entry_size; - } - } - - self.instance.heap.queue_sort(); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(entry_size); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - // Like VTTLPolicyOccupied::update, Here we don't need to change this - // self.instance.observed.change(); - } - AbsentSituation::Slot(slot) => { - self.instance.expire(); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - } - AbsentSituation::None => { - self.instance.expire(); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().key.hash }); - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - } - } - - Ok(()) - } -} - -unsafe impl Send for VTTLPolicy {} diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs new file mode 100644 index 0000000..e8e88c1 --- /dev/null +++ b/src/policies/vttlpolicy.rs @@ -0,0 +1,554 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::pickle::Builder; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; + +pub use crate::policies::common::Shared; +use crate::policies::traits::SharedExt; + +/// Compares two items by `expires_at`, placing `None` values last. +macro_rules! compare_fn { + () => { + |a, b| { + a.expires_at + .is_none() + .cmp(&b.expires_at.is_none()) + .then_with(|| a.expires_at.cmp(&b.expires_at)) + } + }; +} + +/// A key-value pair with a precomputed hash and combined size. +pub struct ExpiringHandle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, + /// Configured ttl for handle. `None` means has no ttl. + expires_at: Option, +} + +impl ExpiringHandle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: Option, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + expires_at, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: Option, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + expires_at: expires_at.map(Into::into), + }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + #[inline] + pub fn expires_at(&self) -> Option { + self.expires_at + } + + #[inline] + pub fn is_expired(&self, now: std::time::SystemTime) -> bool { + self.expires_at.map(|x| x <= now).unwrap_or_default() + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + expires_at: self.expires_at, + } + } +} + +impl HandleExt for ExpiringHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`VTTLPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut VTTLPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + fn replace(self, new: Self::Handle) -> Self::Handle { + // Here we don't need to increment generation version + // self.shared.generation_version().increment(); + + unsafe { + let cursor = self.bucket.as_mut(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + + self.policy.heap.mark_unsorted(); + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = self.policy.heap.remove(cursor, compare_fn!()); + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} +/// A view into a vacant slot in [`VTTLPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut VTTLPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.heap.push(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct VTTLPolicy { + // Fields are same as `LFUPolicy` + table: hashbrown::raw::RawTable>, + heap: lazyheap::LazyHeap, + currsize: usize, +} + +impl VTTLPolicy { + /// Creates a new [`VTTLPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + heap: lazyheap::LazyHeap::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn heap(&self) -> &lazyheap::LazyHeap { + &self.heap + } + + #[inline] + pub fn iter(&mut self, gv: &utils::GenerationVersion) -> lazyheap::RawIter { + self.expire(gv); + + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap.iter(compare_fn!()) + } + + pub fn expire(&mut self, gv: &utils::GenerationVersion) { + let now = std::time::SystemTime::now(); + + while let Some(cursor) = self.heap.front(compare_fn!()) { + let handle = unsafe { cursor.element() }; + + if !handle.is_expired(now) { + break; + } + + self.table + .remove_entry(handle.key.hash(), |x| { + Ok::<_, pyo3::PyErr>(x.as_ptr() == cursor.as_ptr()) + }) + .unwrap(); + + drop(cursor); + + gv.increment(); + + let handle = self.heap.pop_front(compare_fn!()).unwrap(); + self.currsize = self.currsize.saturating_sub(handle.size); + } + } +} + +impl PolicyExt for VTTLPolicy { + type Shared = Shared; + type Handle = ExpiringHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + const PICKLE_SIZE: usize = 1; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let cursor = self + .table + .get_mut(key.hash(), |x| unsafe { key.py_eq(py, &x.element().key) })?; + + match cursor { + Some(cursor) => { + let handle = unsafe { cursor.element() }; + + if handle.is_expired(std::time::SystemTime::now()) { + Ok(None) + } else { + Ok(Some(handle)) + } + } + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + self.expire(shared.generation_version()); + + let eq = |cursor: &lazyheap::Cursor| unsafe { + key.py_eq(py, cursor.element().key()) + }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = self + .heap + .front(compare_fn!()) + .ok_or_else(|| new_py_error!(PyKeyError, "cache is empty"))?; + + self.table + .remove_entry(unsafe { front_cursor.element().key.hash() }, |x| { + Ok::<_, pyo3::PyErr>(std::ptr::eq(front_cursor.as_ptr(), x.as_ptr())) + })? + .expect("evict: item not found in table"); + } + + shared.generation_version().increment(); + + let handle = self.heap.pop_front(compare_fn!()).unwrap(); + + self.currsize = self.currsize.saturating_sub(handle.size); + Ok(handle) + } + + fn clear(&mut self, shared: &Self::Shared) { + if self.heap.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.heap.clear(); + self.currsize = 0; + } + + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |x| unsafe { x.element().key.hash() }); + + self.heap.shrink_to_fit(); + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let now = std::time::SystemTime::now(); + + let result = unsafe { + self.table.iter().all(|x| { + let handle = x.as_ref().element(); + + if handle.is_expired(now) { + return true; + } + + let key = handle.key(); + + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { + Err(e) => { + error = Some(e); + false + } + Ok(None) => false, + Ok(Some(cursor)) => { + match utils::pyobject_equal( + py, + handle.value.as_ptr(), + cursor.element().value.as_ptr(), + ) { + Ok(eq) => eq, + Err(e) => { + error = Some(e); + false + } + } + } + } + }) + }; + + error.map_or(Ok(result), Err) + } + + fn clone_ref(&mut self, py: pyo3::Python) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.len()); + let mut heap = lazyheap::LazyHeap::new(); + + unsafe { + for cursor in self.heap.iter(compare_fn!()) { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = heap.push(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + heap, + currsize: self.currsize, + } + } + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.table.iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key.as_ref())?; + tuple.push(handle.value())?; + tuple.push( + handle + .expires_at + .map(|x| x.duration_since(std::time::UNIX_EPOCH).unwrap()), + )?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, timestamp) = + bound.extract::<(alias::PyObject, alias::PyObject, Option)>()?; + + let handle = ExpiringHandle::new( + bound.py(), + shared.getsizeof(), + timestamp + .map(|x| std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(x)) + .map(Into::into), + key, + value, + )?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.heap.push(handle); + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + slf.heap.sort_by(compare_fn!()); + Ok((shared, slf)) + } +} diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs new file mode 100644 index 0000000..d729803 --- /dev/null +++ b/src/policies/wrapped.rs @@ -0,0 +1,304 @@ +use pyo3::types::PyAnyMethods; +use pyo3::types::PyTupleMethods; + +use crate::internal::alias; +use crate::internal::pickle; +use crate::internal::pickle::Builder; +use crate::policies::traits::HandleExt; +use crate::policies::traits::OccupiedExt; +use crate::policies::traits::PolicyEntry; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::traits::VacantExt; + +/// A wrapper over [`PolicyExt`] implementations that adds +/// higher-level methods shared across all policies. +/// +/// - [`insert`](Wrapped::insert) +/// - [`remove`](Wrapped::remove) +/// - [`contains`](Wrapped::contains) +/// - [`extend`](Wrapped::extend). +/// +/// The shared (lock-free) fields of the policy are accessible directly via +/// [`Wrapped::shared`], while mutable state is accessed through the inner +/// [`std::sync::Mutex`]. +pub struct Wrapped { + /// Read-only fields after initialization — no lock required. + /// Accessible directly without acquiring the mutex. + shared: P::Shared, + /// Mutable policy state — protected by a [`std::sync::Mutex`]. + inner: parking_lot::Mutex

, +} + +impl Wrapped

{ + /// Wraps an existing policy alongside its shared (lock-free) data. + pub fn new(policy: P, shared: P::Shared) -> Self { + Self { + shared, + inner: parking_lot::Mutex::new(policy), + } + } + + /// Returns a reference to the shared, lock-free fields of the policy. + #[inline(always)] + pub fn shared(&self) -> &P::Shared { + &self.shared + } + + /// Acquires the mutex and returns a guard over the mutable policy state. + /// + /// # Panics + /// Panics if the mutex is poisoned. + #[inline(always)] + pub fn policy(&self) -> parking_lot::MutexGuard<'_, P> { + self.inner.lock() + } +} + +#[inline(always)] +fn insert_inner( + lock: &mut parking_lot::MutexGuard<'_, P>, + shared: &P::Shared, + py: pyo3::Python<'_>, + handle: P::Handle, +) -> pyo3::PyResult> { + let handle_size = handle.size(); + + if handle_size > shared.maxsize() { + return Err(new_py_error!( + PyOverflowError, + "handle size is more than the configured maximum size" + )); + } + + let result = match lock.entry(py, handle.key(), shared)? { + PolicyEntry::Occupied(occupied) => Some(occupied.replace(handle)), + PolicyEntry::Vacant(mut vacant) => { + // Evict if need + while vacant.would_exceed(handle_size) { + vacant.evict()?; + } + + vacant.insert(handle); + None + } + }; + + if result.is_some() { + // For the `PolicyEntry::Occupied` case, evict after replacement + while lock.current_size() > shared.maxsize() { + lock.evict(shared)?; + } + } + + Ok(result) +} + +// Duplicate methods across all policies +impl Wrapped

{ + /// Returns the remaining size. Equals to `maxsize - current_size`. + #[inline] + pub fn remaining_size(&self) -> usize { + let policy = self.inner.lock(); + self.shared.maxsize().saturating_sub(policy.current_size()) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + pub fn contains( + &self, + py: pyo3::Python<'_>, + key: &::Key, + ) -> pyo3::PyResult { + let mut lock = self.inner.lock(); + + let handle = lock.get(py, key)?; + Ok(handle.is_some()) + } + + /// Inserts a [`Handle`](PolicyExt::Handle) into the cache, evicting entries as needed + /// to stay within the size budget before inserting. + /// + /// - If the key was already present, the old handle is replaced and returned as `Some`. + /// - If the key was absent, the handle is inserted and `None` is returned. + #[inline] + pub fn insert( + &self, + py: pyo3::Python<'_>, + handle: P::Handle, + ) -> pyo3::PyResult> { + let mut lock = self.inner.lock(); + insert_inner(&mut lock, &self.shared, py, handle) + } + + /// Removes the entry for `key` from the cache, returning its [`Handle`](PolicyExt::Handle) + /// if it was present, or `None` if the key was not found. + #[inline] + pub fn remove( + &self, + py: pyo3::Python<'_>, + key: &::Key, + ) -> pyo3::PyResult> { + let mut lock = self.inner.lock(); + + let entry = lock.entry(py, key, &self.shared)?; + match entry { + PolicyEntry::Occupied(occupied) => { + let handle = occupied.remove(); + Ok(Some(handle)) + } + PolicyEntry::Vacant(_) => Ok(None), + } + } + + /// Inserts all key-value pairs from `iterable` into the cache. + /// + /// `transform` converts a raw `(key, value)` Python object pair into a + /// policy-specific [`Handle`](PolicyExt::Handle) before insertion. + /// + /// # Supported iterables + /// + /// - **`dict`** — detected via a fast [`PyObject_TypeCheck`](pyo3::ffi::PyObject_TypeCheck) + /// check and iterated with [`PyDictMethods::items`](pyo3::types::PyDictMethods) to avoid + /// the overhead of a generic Python iterator. + /// - **Any object with an `.items()` method** — covers all cache classes and + /// other dict-like types; `.items()` is called and the result is iterated. + /// - **Any other iterable** — iterated directly, with each element expected to + /// unpack as a `(key, value)` pair. + #[inline] + pub fn extend(&self, iterable: alias::BoundObject, mut transform: F) -> pyo3::PyResult<()> + where + F: FnMut(alias::PyObject, alias::PyObject) -> pyo3::PyResult, + { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyDictMethods; + + let mut lock = self.inner.lock(); + + // Using [pyo3::ffi::PyObject_TypeCheck] and [Bound::cast_unchecked] is so faster than [Bound::cast] + let is_dictionary = unsafe { + pyo3::ffi::PyObject_TypeCheck(iterable.as_ptr(), crate::typeref::STD_DICT_TYPE) == 1 + }; + if is_dictionary { + let dict = unsafe { iterable.cast_unchecked::() }; + + for pair in dict.items() { + let (key, value) = unsafe { + pair.extract::<(alias::PyObject, alias::PyObject)>() + .unwrap_unchecked() + }; + + insert_inner(&mut lock, &self.shared, pair.py(), transform(key, value)?)?; + } + + return Ok(()); + } + + // By this we will support everything has `.items()` attribute, + // including our cache classes + let items_iterable = { + if let Some(items_attribute) = iterable.getattr_opt(c"items")? { + items_attribute.call0()? + } else { + iterable + } + }; + + for pair in items_iterable.try_iter()? { + let pair = pair?; + let (key, value) = pair.extract::<(alias::PyObject, alias::PyObject)>()?; + + insert_inner(&mut lock, &self.shared, pair.py(), transform(key, value)?)?; + } + + Ok(()) + } + + /// Calls the `evict()` `n` times and returns count of removed items. + #[inline] + pub fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + if n <= 0 { + return Ok(0); + } + + let mut lock = self.inner.lock(); + + let mut count: pyo3::ffi::Py_ssize_t = 0; + while count < n { + match lock.evict(&self.shared) { + Ok(_) => {} + Err(err) => { + if !err.is_instance_of::(py) { + return Err(err); + } + + break; + } + } + + count += 1; + } + + Ok(count) + } + + #[inline] + pub fn clone_ref(&self, py: pyo3::Python) -> Self { + let shared = self.shared.clone_ref(py); + let policy = self.inner.lock().clone_ref(py); + + Self { + shared, + inner: parking_lot::Mutex::new(policy), + } + } + + pub fn build_pickle(&self, py: pyo3::Python) -> pyo3::PyResult { + let mut builder = pickle::Pickle::builder(py, 4)?; + + let getsizeof: Option = self.shared.getsizeof().clone_ref(py).into(); + + builder + .push(self.shared.maxsize())? + .push(getsizeof)? + .push(self.shared.global_ttl())?; + + let mut tuple = builder.begin_tuple(P::PICKLE_SIZE)?; + self.inner.lock().build_pickle(&mut tuple)?; + tuple.end()?; + + Ok(builder.finish()) + } +} + +impl Wrapped

{ + pub fn from_pickle(py: pyo3::Python<'_>, state: alias::PyObject) -> pyo3::PyResult { + let tuple = state.into_bound(py).cast_into::()?; + + let maxsize: usize = tuple.get_item(0)?.extract()?; + let getsizeof: Option = tuple.get_item(1)?.extract()?; + let global_ttl: Option = tuple.get_item(2)?.extract()?; + + if global_ttl.is_some_and(|x| x < 0.0) { + return Err(new_py_error!(PyValueError, "global_ttl is negative")); + } + + let builded = tuple.get_item(3)?.cast_into::()?; + + let (shared, inner) = P::from_pickle( + maxsize, + getsizeof, + global_ttl.map(|x| std::time::Duration::from_secs_f64(x)), + builded, + )?; + + Ok(Self { + shared, + inner: parking_lot::Mutex::new(inner), + }) + } +} diff --git a/src/pyclasses/base.rs b/src/pyclasses/base.rs new file mode 100644 index 0000000..581c018 --- /dev/null +++ b/src/pyclasses/base.rs @@ -0,0 +1,22 @@ +use crate::internal::alias; + +crate::implement_pyclass! { + /// Base implementation for cache classes. + /// + /// This abstract base class defines the generic structure for cache + /// implementations. + #[derive(Debug, Default, Clone, Copy)] + [subclass, generic, frozen] PyBaseCacheImpl as "BaseCacheImpl" ; +} + +#[pyo3::pymethods] +impl PyBaseCacheImpl { + #[new] + #[pyo3(signature = (*args, **kwargs))] + #[allow(unused_variables)] + fn __new__(args: alias::ArgsType, kwargs: Option) -> Self { + Self + } + + fn __init__(&self) {} +} diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs new file mode 100644 index 0000000..14be865 --- /dev/null +++ b/src/pyclasses/cache.rs @@ -0,0 +1,682 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::nopolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A thread-safe, memory-efficient key-value cache with no eviction policy. + /// + /// Items remain in the cache until manually removed or the cache is cleared. + /// + /// ``Cache`` is essentially a configurable hashmap-like store. When an item is + /// inserted, it is stored directly without any ordering, priority tracking, or + /// access metadata. If a maximum size is configured, insertions beyond that + /// limit are rejected with an ``OverflowError``. All read and write operations + /// are thread-safe. + /// + /// Because no eviction logic runs in the background, there is no overhead from + /// tracking usage order, frequency counters, or expiry timestamps. + /// + /// Pros: + /// - Minimal overhead: no bookkeeping for eviction means lower CPU and + /// memory usage per entry compared to policy-based caches. + /// - Predictable behavior: items are never silently removed, so cache hits + /// are deterministic once an item is stored. + /// - Thread-safe: safe for concurrent reads and writes out of the box. + /// - Configurable capacity: a hard size limit prevents unbounded memory + /// growth. + /// + /// Cons: + /// - No automatic eviction: the cache can fill up and stop accepting new + /// entries if a max size is set, requiring manual management. + /// - Unordered: unlike a standard ``dict`` (Python 3.7+), insertion order + /// is not preserved. + /// - Not suitable for volatile data: stale entries persist forever unless + /// explicitly invalidated. + /// + /// Use ``Cache`` when you have a fixed, well-known set of keys that are + /// expensive to compute and never go stale (e.g. parsed config values, + /// compiled regex patterns, loaded templates), and when the lowest possible + /// overhead is required. + /// + /// Avoid it when cached data can become stale, when the working set is + /// unpredictable in size, or when automatic memory pressure relief is needed. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyCache as "Cache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `Cache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + nopolicy::NoPolicy::new(capacity), + nopolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + FIXED_SIZE + (policy.table().capacity() * std::mem::size_of::()) + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + /// + /// Note: raises `OverflowError` if the cache reached the maxsize limit, + /// because this class does not have any algorithm. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = nopolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = nopolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + /// + /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for handle_ref in unsafe { policy.table().iter() } { + let handle = unsafe { handle_ref.as_ref() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyCacheItems as "cache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyCacheKeys as "cache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyCacheValues as "cache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs new file mode 100644 index 0000000..08bf7a4 --- /dev/null +++ b/src/pyclasses/fifocache.rs @@ -0,0 +1,708 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::fifopolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A First-In-First-Out (FIFO) cache eviction policy: when the cache is full, the oldest + /// inserted item is always the first to be removed, regardless of how often it has been accessed. + /// + /// ## How It Works + /// The FIFO algorithm is one of the simplest cache eviction strategies. Items are stored in + /// insertion order, and when the cache reaches capacity, the item that has been there the + /// longest is evicted to make room. There is no concept of "recently used" or "frequently used" + /// - age alone determines eviction order. Conceptually, it behaves like a queue: new items + /// join the back, and evictions come from the front. + /// + /// This implementation backs that queue with a `double-ended queue` for O(1) front removal, + /// paired with a `hash map` for O(1) key lookups. Rather than storing physical indices into + /// the deque (which shift every time an item is evicted from the front), the table stores + /// logical indices - a monotonically increasing counter assigned at insertion time. + /// A separate `front_offset` counter tracks how many items have ever been evicted; the physical + /// position of any key is recovered at read time as `entries[table[key] - front_offset]`, + /// keeping both eviction and lookup O(1) without any per-eviction rewriting of the table. + /// + /// ### Pros + /// - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + /// index-shifting that a native implementation would require on every eviction. + /// - Eviction order is fully deterministic: the oldest item always goes first, independent of access + /// patterns, making behaviour easy to reason about and reproduce in tests. + /// - No per-read overhead. Unlike LRU, FIFO requires no bookkeeping on cache hits. + /// + /// ### Cons + /// - Access-blind eviction. A hot item accessed thousands of times is evicted just as readily as one + /// that has never been read. Hit rates suffer on workloads with strong temporal locality. + /// - The logical-index indirection adds a layer of internal complexity compared to a naive queue-based cache. + /// - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) introduces + /// an occasional latency spike. Amortized cost is negligible, but worst-case latency is unbounded in principle. + /// + /// ## When to use it + /// Reach for `FIFOPolicy` when: + /// - Eviction order must be predictable and auditable: streaming pipelines, sequential batch processors, or + /// any context where deterministic behaviour simplifies debugging. + /// - Access patterns are roughly uniform, so there is no meaningful "hot" subset of keys that a recency or + /// frequency-aware policy could exploit. + /// - Read overhead must be minimal: FIFO's zero-cost hits make it preferable to LRU in insert-heavy workloads + /// with infrequent re-reads. + /// + /// Avoid it when your workload has strong temporal locality. If recently or frequently accessed items are likely + /// to be needed again soon, an LRU or LFU policy will deliver meaningfully better hit rates. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyFIFOCache as "FIFOCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyFIFOCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `FIFOCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + fifopolicy::FIFOPolicy::new(capacity), + fifopolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity().min(policy.entries().capacity()) + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.entries().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * std::mem::size_of::(); + let vecdeque_cap = policy.entries().capacity() * std::mem::size_of::(); + FIXED_SIZE + table_cap + vecdeque_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = fifopolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = fifopolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheItems { + iter: parking_lot::Mutex::new(inner.policy().iter()), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheValues { + iter: parking_lot::Mutex::new(inner.policy().iter()), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheKeys { + iter: parking_lot::Mutex::new(inner.policy().iter()), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = policy.entries().iter().map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }); + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (n=0))] + fn first( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + if n < 0 { + n += policy.entries().len() as isize; + } + if n < 0 { + return Err(new_py_error!(PyIndexError, "`n` out of range")); + } + + match policy.entries().get(n as usize) { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn last(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + match policy.entries().back() { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for handle in policy.entries().iter() { + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyFIFOCacheItems as "fifocache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyFIFOCacheKeys as "fifocache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyFIFOCacheValues as "fifocache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs new file mode 100644 index 0000000..bd6193d --- /dev/null +++ b/src/pyclasses/lfucache.rs @@ -0,0 +1,778 @@ +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::lfupolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Least-Frequently-Used (LFU) cache eviction policy: when the cache is full, the item + /// with the lowest access count is evicted first. Ties in frequency are broken by recency - + /// among equally rare items, the oldest is evicted. + /// + /// ## How It Works + /// The LFU algorithm tracks how many times each cached item has been accessed, and always + /// evicts the item with the smallest count. This makes it well-suited for workloads where + /// some items are structurally "hot" and where that frequency signal is stable enough to + /// be worth preserving across cache pressure events. + /// + /// This implementation uses a `lazy binary min-heap` keyed on access frequency, paired with + /// a `hash map` that maps each key to its cursor (a stable pointer into the heap's backing + /// buffer). The heap is "lazy" in the sense that it does not restore the heap invariant after + /// every frequency increment; instead it sets a dirty flag and defers the full re-sort until + /// the next eviction. This amortises the cost of heap maintenance across many hits, so + /// read-heavy workloads pay far less per operation than a classic eager heap would require. + /// + /// On a cache hit, the item's frequency counter is incremented in O(1) and the heap is marked + /// dirty. On eviction, the heap is sorted if dirty, and the minimum-frequency item is popped + /// in O(n log n) worst-case (amortised O(log n) under typical access distributions). Lookups + /// are O(1) via the hash map. + /// + /// ### Pros + /// - Frequency-aware eviction. Items that are accessed often are protected from eviction even + /// under heavy cache pressure, leading to higher hit rates on skewed workloads. + /// - O(1) cache hits. Incrementing a counter and marking the heap dirty is constant-time work, + /// with no structural reorganisation on the hot path. + /// - Lazy heap sorting amortises O(n log n) sort cost across many inserts and hits, keeping + /// the average cost per operation much lower than a naive eager implementation. + /// + /// ### Cons + /// - Eviction is O(n log n) worst-case. If the heap is maximally dirty (every entry modified + /// since last sort), a single eviction triggers a full re-sort over all entries. This is + /// amortised away in practice but introduces latency spikes under adversarial access patterns. + /// - Frequency counters accumulate indefinitely. A key that was hot during an early burst remains + /// privileged long after traffic shifts, causing "cache pollution" - stale items that monopolise + /// capacity because of historical frequency, not current utility. + /// - Access patterns must be skewed for LFU to outperform simpler policies. On uniform workloads, + /// frequency counters provide no signal and the extra bookkeeping is pure overhead. + /// + /// ## When to use it + /// Reach for `LFUPolicy` when: + /// - Your workload has a stable hot set: a minority of keys that are accessed disproportionately + /// often and whose relative popularity changes slowly over time. + /// - Cache pollution from one-time scans is a concern: LFU naturally resists large sequential reads + /// from displacing frequently accessed items, because freshly inserted keys start at count 1 and + /// are evicted before any item with accumulated hits. + /// - Hit rate matters more than worst-case eviction latency: the amortised cost is low, but if your + /// system has hard real-time latency requirements, the occasional sort spike may be unacceptable. + /// + /// Avoid it when access patterns shift rapidly. If the "hot" subset of keys changes frequently, + /// frequency counters become stale signals and LFU will evict items that have recently become + /// popular. In those cases, an LRU policy - which tracks recency rather than frequency - will + /// adapt faster and typically deliver better hit rates. + /// + /// Avoid it on uniform workloads where all keys are accessed with roughly equal probability. + /// The frequency signal provides no meaningful discrimination, and the overhead of maintaining + /// counters and a heap is wasted compared to the simpler bookkeeping of FIFO or LRU. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyLFUCache as "LFUCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyLFUCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `LFUCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + lfupolicy::LFUPolicy::new(capacity), + lfupolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 0), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.heap().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.heap().len() * std::mem::size_of::(); + + FIXED_SIZE + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = + lfupolicy::FrequencyHandle::new(py, inner.shared().getsizeof(), key, value, 0)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 0), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = lfupolicy::FrequencyHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + 1, + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyLFUCacheItems { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyLFUCacheValues { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyLFUCacheKeys { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn items_with_frequency(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyLFUCacheItemsWithFrequency { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + // We cannot use heap.iter here, because it requires re-sorting + // and this can lead to intrupt iterators. + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|cursor| { + let handle = cursor.element(); + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn peek( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let policy = inner.policy(); + + if let Some(x) = policy.peek(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + #[pyo3(signature = (n=0))] + fn least_frequently_used( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + if n < 0 { + n += policy.table().len() as isize; + } + if n < 0 { + return Err(new_py_error!(PyIndexError, "`n` out of range")); + } + + match policy.least_frequently_used(py, n as usize, inner.shared().generation_version()) { + Some(key) => Ok(key.into()), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.table().iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.element() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyLFUCacheItems as "lfucache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyLFUCacheItemsWithFrequency as "lfucache_items_with_freq" + fn(py, handle) -> (alias::PyObject, alias::PyObject, u128) {{ + let freq = handle.frequency(); + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, freq) + }} + + PyLFUCacheKeys as "lfucache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyLFUCacheValues as "lfucache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs new file mode 100644 index 0000000..393468e --- /dev/null +++ b/src/pyclasses/lrucache.rs @@ -0,0 +1,756 @@ +use crate::internal::alias; +use crate::internal::linked_list; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::lrupolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Least-Recently-Used (LRU) cache eviction policy: when the cache is full, + /// the item that has not been accessed for the longest time is removed first, + /// regardless of how many times it was accessed in the past. + /// + /// ## How It Works + /// The LRU algorithm is one of the most widely used cache eviction strategies in + /// practice. Items are tracked by their access recency—every time an item is read + /// or written, it becomes the most recently used. When the cache reaches capacity, + /// the least recently used item (the one that was accessed longest ago) is + /// evicted to make room for new entries. + /// + /// This implementation pairs a doubly-linked list with a hash map. The linked list + /// maintains items in access order: the most recently used item sits at the back, + /// and the least recently used at the front. The hash map stores pointers (cursors) + /// into this list, enabling O(1) key lookups. On every access—read or write—the + /// accessed item is moved to the back of the list, promoting it to "most recently used" + /// status. When eviction is needed, the front item is removed. + /// + /// The doubly-linked list structure is critical: it permits O(1) removal and + /// reinsertion of any item anywhere in the ordering, without requiring a full rebuild + /// or index shifting. A running total tracks the current size of cached items, + /// allowing capacity checks in constant time. + /// + /// ### Pros + /// - **Excellent hit rates on temporal locality.** Workloads where recently or + /// frequently accessed items are likely to be needed again soon benefit dramatically + /// from LRU's recency-aware eviction. Real-world caches (CPU L1/L2, database + /// buffers, CDN edges) rely on this principle. + /// - **Insert, lookup, and evict are all O(1) amortized.** The doubly-linked list + /// and hash map combination guarantees no per-operation index shifting or traversals. + /// - **Automatic adaptation to access patterns.** Hot keys naturally migrate to the + /// back of the list and stay there, while cold keys drift toward eviction. No + /// manual tuning of weights or thresholds is needed. + /// - **Per-hit cost is minimal.** While LRU does require bookkeeping on reads (moving + /// an item to the back), this bookkeeping is O(1) and adds negligible overhead to most + /// workloads. + /// + /// ### Cons + /// - **Per-read overhead.** Every cache hit requires updating the linked list (removing + /// the item from its current position and reinserting it at the back), which is + /// measurably slower than FIFO's zero-cost hits on read-heavy workloads. + /// - **Burst traffic can skew eviction.** A single item accessed many times in rapid + /// succession will be kept alive indefinitely, even if other keys have better long-term + /// utility. Recency is a proxy for future use, not a guarantee. + /// - **Implementation complexity.** The doubly-linked list and cursor-based hash table add + /// internal complexity compared to simpler policies like FIFO. + /// - **Memory overhead.** Storing doubly-linked pointers (prev/next) for every cached item + /// consumes extra memory compared to array-based alternatives. + /// + /// ## When to use it + /// Reach for `LRUPolicy` when: + /// - Your workload exhibits temporal locality—recently accessed items are likely to be + /// needed again soon. Databases, web caches, and CPU caches all exhibit this pattern. + /// - Hit rate is your primary metric. If maximizing the proportion of requests served + /// from the cache matters more than minimizing per-hit latency, LRU is typically the + /// best general-purpose choice. + /// - Access patterns are unknown or unpredictable. LRU's automatic adaptation makes it a safe + /// default when you cannot statically analyze what keys will be hot. + /// - You need a standard, battle-tested algorithm. LRU is the de facto eviction policy in most + /// production systems; it is well-understood, widely supported, and easy to reason about. + /// + /// Avoid it when: + /// - Your workload is write-heavy with few or no re-reads. FIFO's zero per-hit bookkeeping + /// will outperform LRU if the cache is rarely hit. + /// - You need sub-microsecond latency on every operation. The linked-list manipulation on each + /// read can add measurable overhead in ultra-low-latency systems. + /// - Access patterns are bimodal or exhibit frequency-heavy behavior (a small set of items is + /// accessed far more often than others). An LFU policy may deliver better hit rates in such cases. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyLRUCache as "LRUCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyLRUCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `LRUCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + lrupolicy::LRUPolicy::new(capacity), + lrupolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.list().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.list().len() * std::mem::size_of::(); + + FIXED_SIZE + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = lrupolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = lrupolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = unsafe { + policy.list().iter().map(|cursor| { + let handle = cursor.element(); + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn peek( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let policy = inner.policy(); + + if let Some(x) = policy.peek(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + #[inline] + fn least_recently_used(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + match policy.list().cursor_front() { + Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), + None => Err(new_py_error!(PyKeyError, "cache is empty")), + } + } + + #[inline] + fn most_recently_used(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + match policy.list().cursor_back() { + Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), + None => Err(new_py_error!(PyKeyError, "cache is empty")), + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.list().iter() } { + let handle = unsafe { cursor.element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.element() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyLRUCacheItems as "lrucache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyLRUCacheKeys as "lrucache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyLRUCacheValues as "lrucache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs new file mode 100644 index 0000000..56cf742 --- /dev/null +++ b/src/pyclasses/mod.rs @@ -0,0 +1,8 @@ +pub mod base; +pub mod cache; +pub mod fifocache; +pub mod lfucache; +pub mod lrucache; +pub mod rrcache; +pub mod ttlcache; +pub mod vttlcache; diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs new file mode 100644 index 0000000..55326d8 --- /dev/null +++ b/src/pyclasses/rrcache.rs @@ -0,0 +1,702 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::rrpolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A thread-safe, memory-efficient key-value cache with Random Replacement eviction policy. + /// When the cache reaches its maximum size, an item is randomly selected and + /// evicted to make room for new entries. + /// + /// ## How It Works + /// `RRCache` is a configurable hashmap-like store with automatic eviction. When an item is inserted: + /// - It is stored directly without any ordering or priority tracking. + /// - If a maximum size is configured and the cache is full, a random entry is evicted to make room + /// for the new item. + /// - All read and write operations are thread-safe, making it safe for concurrent access without + /// external locking. + /// + /// The Random Replacement policy selects entries for eviction uniformly at random, ensuring fair + /// treatment across all cached items regardless of access patterns. + /// + /// ### Pros + /// - Low overhead: Random Replacement is computationally cheap compared to tracking access order or frequency. + /// - Thread-safe: safe for concurrent reads and writes out of the box. + /// - Configurable capacity: a hard size limit prevents unbounded memory growth while allowing new entries + /// through automatic eviction. + /// - No staleness issues: items persist only as long as they remain unselected by the eviction policy, + /// preventing indefinite accumulation of stale data. + /// + /// ### Cons + /// - Non-deterministic eviction: random selection means you cannot predict which entry will be removed, + /// potentially evicting recently cached or frequently accessed items. + /// - Unordered: insertion order is not preserved. + /// - Less optimal than LRU/LFU: for workloads with skewed access patterns, Random Replacement will + /// evict frequently used items more often than policy-aware caches. + /// + /// ## When to Use It + /// `RRCache` is the right choice when: + /// - You have a working set that can grow unpredictably and requires automatic memory management. + /// - Access patterns are relatively uniform and predictable, so random eviction is not significantly + /// worse than smarter policies. + /// - You need low computational overhead and simple eviction logic. + /// - You want to prevent unbounded memory growth without the complexity of tracking usage metadata. + /// + /// Avoid it when you have highly skewed access patterns (where certain items are accessed far more + /// frequently than others), when cache hits are mission-critical and predictability matters, or when + /// you need fine-grained control over what gets evicted. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyRRCache as "RRCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyRRCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `RRCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + rrpolicy::RRPolicy::new(capacity), + rrpolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + FIXED_SIZE + policy.table().capacity() * std::mem::size_of::() + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + /// + /// Note: raises `OverflowError` if the cache reached the maxsize limit, + /// because this class does not have any algorithm. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = rrpolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = rrpolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + /// + /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[inline] + fn random_key(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + if policy.table().is_empty() { + Err(new_py_error!(PyKeyError, "cache is empty")) + } else { + let nth = fastrand::usize(0..policy.table().len()); + + let bucket = unsafe { policy.table().iter().nth(nth).unwrap_unchecked() }; + + let handle = unsafe { bucket.as_ref() }; + Ok(handle.key().clone_ref(py).into()) + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for handle_ref in unsafe { policy.table().iter() } { + let handle = unsafe { handle_ref.as_ref() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyRRCacheItems as "rrcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyRRCacheKeys as "rrcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyRRCacheValues as "rrcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs new file mode 100644 index 0000000..deb5f19 --- /dev/null +++ b/src/pyclasses/ttlcache.rs @@ -0,0 +1,857 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::ttlpolicy; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp + /// and is considered stale — and eligible for eviction - once that deadline has passed, + /// regardless of how recently or frequently it was accessed. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyTTLCache as "TTLCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyTTLCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `PyTTLCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// global_ttl: Time-to-live for cache entries, either as seconds or a timedelta. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, global_ttl, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + global_ttl: utils::TimeToLiveArgument, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let global_ttl = global_ttl.into_duration()?; + + if global_ttl == std::time::Duration::ZERO { + return Err(new_py_error!( + PyValueError, + "global_ttl must be positive and non-zero" + )); + } + + let wrapped = Wrapped::new( + ttlpolicy::TTLPolicy::new(capacity), + ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl.into())), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| { + ttlpolicy::ExpiringHandle::new( + py, + &getsizeof, + global_ttl.into(), + key, + value, + ) + }, + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + policy.current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + { + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + } + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + #[getter] + #[inline] + fn global_ttl(&self) -> f64 { + let inner = self.0.get(); + unsafe { inner.shared().global_ttl().unwrap_unchecked().as_secs_f64() } + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity().min(policy.entries().capacity()) + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.entries().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * std::mem::size_of::(); + let vecdeque_cap = + policy.entries().capacity() * std::mem::size_of::(); + + FIXED_SIZE + table_cap + vecdeque_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let shared = inner.shared(); + let handle = ttlpolicy::ExpiringHandle::new( + py, + shared.getsizeof(), + unsafe { shared.global_ttl().unwrap_unchecked().into() }, + key, + value, + )?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let shared = inner.shared(); + + let ttl: utils::ExpiresAt = unsafe { shared.global_ttl().unwrap_unchecked().into() }; + let getsizeof = shared.getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = ttlpolicy::ExpiringHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + unsafe { shared.global_ttl().unwrap_unchecked().into() }, + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(inner.shared()); + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheItems { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(inner.shared()); + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheValues { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(inner.shared()); + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheKeys { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let now = std::time::SystemTime::now(); + let iter = policy + .entries() + .iter() + .filter(|handle| !handle.is_expired(now)) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }); + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[inline] + #[pyo3(signature=(*, reuse=false))] + fn expire(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.expire(shared.generation_version()); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + #[pyo3(signature = (n=0))] + fn first( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + policy.expire(inner.shared().generation_version()); + + if n < 0 { + n += policy.entries().len() as isize; + } + if n < 0 { + return Err(new_py_error!(PyIndexError, "`n` out of range")); + } + + match policy.entries().get(n as usize) { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn last(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + policy.expire(inner.shared().generation_version()); + + match policy.entries().back() { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, f64)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + let dur = x + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + return Ok((x.value().clone_ref(py), dur.as_secs_f64())); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, 0.0)), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(( + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind(), + 0.0, + )) + }, + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, f64)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + let dur = x + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + return Ok((x.into_value(), dur.as_secs_f64())); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, 0.0)), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn popitem_with_expire(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, f64)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let dur = handle + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val, dur.as_secs_f64())) + } + + fn items_with_expire(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(inner.shared()); + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheItemsWithExpire { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for handle in policy.entries().iter() { + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let now = std::time::SystemTime::now(); + let mut iter = slf.iter.lock(); + let $py = slf.py(); + + while let Some(x) = iter.next() { + let $handle = unsafe { x.as_ref() }; + if $handle.is_expired(now) { + continue; + } + + return Ok($init); + } + + Err(new_py_error!(PyStopIteration, ())) + } + } + )+ + }; +} +implement_iterator!( + PyTTLCacheItems as "ttlcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyTTLCacheItemsWithExpire as "ttlcache_items_with_expire" + fn(py, handle) -> (alias::PyObject, alias::PyObject, f64) {{ + let dur = handle + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, dur.as_secs_f64()) + }} + + PyTTLCacheKeys as "ttlcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyTTLCacheValues as "ttlcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs new file mode 100644 index 0000000..451199a --- /dev/null +++ b/src/pyclasses/vttlcache.rs @@ -0,0 +1,833 @@ +use pyo3::IntoPyObjectExt; + +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::vttlpolicy; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A cache with a Variable Time-To-Live (VTTL) eviction policy. + /// + /// Each item can be inserted with its own individual TTL (time-to-live). When + /// an item's TTL expires, it is considered stale and will be evicted. Items + /// inserted without a TTL never expire and are only evicted when the cache + /// reaches capacity. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyVTTLCache as "VTTLCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyVTTLCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `PyTTLCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// ttl: Time-to-live duration for `iterable` items. This *is not* a global ttl. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, ttl=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + ttl: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + vttlpolicy::VTTLPolicy::new(capacity), + vttlpolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let ttl: Option = match ttl { + Some(x) => Some(x.into_expires_at()?), + None => None, + }; + + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| vttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + policy.current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + { + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + } + + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.heap().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.heap().len() * std::mem::size_of::(); + + FIXED_SIZE + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + #[pyo3(signature=(key, value, ttl=None))] + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ttl: Option, + ) -> pyo3::PyResult> { + let ttl = match ttl { + Some(x) => Some(x.into_expires_at()?), + None => None, + }; + + let inner = self.0.get(); + let shared = inner.shared(); + let handle = vttlpolicy::ExpiringHandle::new(py, shared.getsizeof(), ttl, key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + #[pyo3(signature=(iterable, ttl=None))] + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ttl: Option, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let ttl = match ttl { + Some(x) => Some(x.into_expires_at()?), + None => None, + }; + + let inner = slf.0.get(); + let shared = inner.shared(); + let getsizeof = shared.getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| vttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value, None)?; + Ok(()) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined, ttl=None))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ttl: Option, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let ttl = match ttl { + Some(x) => Some(x.into_expires_at()?), + None => None, + }; + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = vttlpolicy::ExpiringHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + ttl, + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheItems { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn values(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheValues { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn keys(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheKeys { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + #[inline] + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let now = std::time::SystemTime::now(); + + // We cannot use heap.iter here, because it requires re-sorting + // and this can lead to intrupt iterators. + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref().element()) + .filter(|handle| !handle.is_expired(now)) + .map(|handle| { + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[maxsize={}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.maxsize(), + items + ) + } + + #[inline] + #[pyo3(signature=(*, reuse=false))] + fn expire(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.expire(shared.generation_version()); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(handle) = policy.get(py, &key)? { + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + return Ok((handle.value().clone_ref(py), dur)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, py.None())), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(( + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind(), + py.None(), + )) + }, + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(handle) = inner.remove(py, &key)? { + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + return Ok((handle.into_value(), dur)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, py.None())), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn popitem_with_expire( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + let (key, val) = handle.into_pair(); + Ok((key.into(), val, dur)) + } + + fn items_with_expire(&self) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheItemsWithExpire { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.table().iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let now = std::time::SystemTime::now(); + let mut iter = slf.iter.lock(); + let $py = slf.py(); + + while let Some(x) = iter.next() { + let $handle = unsafe { x.element() }; + if $handle.is_expired(now) { + continue; + } + + return Ok($init); + } + + Err(new_py_error!(PyStopIteration, ())) + } + } + )+ + }; +} +implement_iterator!( + PyVTTLCacheItems as "vttlcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyVTTLCacheItemsWithExpire as "vttlcache_items_with_expire" + fn(py, handle) -> (alias::PyObject, alias::PyObject, alias::PyObject) {{ + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, dur) + }} + + PyVTTLCacheKeys as "vttlcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyVTTLCacheValues as "vttlcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/typeref.rs b/src/typeref.rs new file mode 100644 index 0000000..e671a43 --- /dev/null +++ b/src/typeref.rs @@ -0,0 +1,28 @@ +/// Raw pointer to the CPython `dict`, cached at initialization. +pub static mut STD_DICT_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); + +/// Raw pointer to the CPython `tuple`, cached at initialization. +pub static mut STD_TUPLE_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); + +#[inline(never)] +unsafe fn get_type_object_for( + py: pyo3::Python, +) -> *mut pyo3::ffi::PyTypeObject { + T::type_object_raw(py) +} + +#[cold] +#[inline(never)] +fn _initialize_typeref(py: pyo3::Python) { + unsafe { + STD_DICT_TYPE = get_type_object_for::(py); + STD_TUPLE_TYPE = get_type_object_for::(py); + } +} + +/// Initializes the cached CPython type object pointers. +pub fn initialize_typeref(py: pyo3::Python) { + static INIT: std::sync::Once = std::sync::Once::new(); + + INIT.call_once(|| _initialize_typeref(py)); +} diff --git a/python/tests/__init__.py b/tests/__init__.py similarity index 100% rename from python/tests/__init__.py rename to tests/__init__.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..4c8a3a2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +from hypothesis import HealthCheck, settings + +# Register a custom profile that suppresses the health check +settings.register_profile( + "global_fuzz_settings", suppress_health_check=[HealthCheck.differing_executors] +) + +# Load the profile globally for the entire test run +settings.load_profile("global_fuzz_settings") diff --git a/tests/mixins.py b/tests/mixins.py new file mode 100644 index 0000000..7dade7f --- /dev/null +++ b/tests/mixins.py @@ -0,0 +1,1227 @@ +import copy as stdcopy +import dataclasses +import pickle +import sys +import threading +import time +import typing +from datetime import timedelta +from unittest.mock import patch + +import pytest +from hypothesis import assume, given +from hypothesis import strategies as st + +import cachebox + +# Strategy for keys that are hashable (str, int, tuple of ints) +hashable_keys = st.one_of( + st.text(), + st.integers(), + st.floats(allow_nan=False), + st.decimals(allow_nan=False), + st.tuples(st.integers(), st.integers()), +) + +# Strategy for arbitrary values +any_value = st.one_of( + st.none(), + st.booleans(), + st.integers(), + st.floats(allow_nan=False), + st.text(), + st.binary(), + st.lists(st.integers(), max_size=5), +) + + +class BaseMixin: + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + raise NotImplementedError + + +class InitializeMixin(BaseMixin): + def test_empty_on_creation(self): + cache = self.create_cache() + assert len(cache) == 0 + + def test_maxsize_stored(self): + cache = self.create_cache() + assert cache.maxsize == 10 + + def test_maxsize_zero_means_unlimited(self): + cache = self.create_cache(0) + assert cache.maxsize == sys.maxsize + + def test_init_from_dict(self): + c = self.create_cache(maxsize=10, iterable={"a": 1, "b": 2}) + assert c.get("a") == 1 + assert c.get("b") == 2 + assert len(c) == 2 + + def test_init_from_list_of_tuples(self): + c = self.create_cache(maxsize=10, iterable=[("x", 10), ("y", 20)]) + assert c.get("x") == 10 + assert c.get("y") == 20 + + def test_init_from_other_cache(self): + iterable = self.create_cache(maxsize=10, iterable=[("x", 10), ("y", 20)]) + + c = self.create_cache(maxsize=10, iterable=iterable) + assert c.get("x") == 10 + assert c.get("y") == 20 + + def test_capacity_param(self): + c = self.create_cache(maxsize=10, capacity=10) + assert c.capacity() >= 10 + + def test_getsizeof_stored(self): + sizer = lambda k, v: len(v) # noqa: E731 + + c = self.create_cache(maxsize=100, getsizeof=sizer) + assert c.getsizeof is sizer + + +class InsertAndGetMixin(BaseMixin): + def test_insert_returns_none_on_new_key(self): + cache = self.create_cache() + + result = cache.insert("k", "v") + assert result is None + + def test_insert_returns_old_value_on_update(self): + cache = self.create_cache() + + cache.insert("k", "v1") + result = cache.insert("k", "v2") + assert result == "v1" + + def test_get_existing_key(self): + cache = self.create_cache() + + cache.insert("k", 42) + assert cache.get("k") == 42 + + def test_get_missing_key_returns_none(self): + cache = self.create_cache() + + assert cache.get("nope") is None + + def test_get_missing_key_returns_custom_default(self): + cache = self.create_cache() + + assert cache.get("nope", "fallback") == "fallback" + + def test_setitem_getitem(self): + cache = self.create_cache() + + cache["k"] = "v" + assert cache["k"] == "v" + + def test_getitem_missing_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + _ = cache["ghost"] + + def test_none_value_stored_correctly(self): + cache = self.create_cache() + + cache.insert("k", None) + # None value is present — default should NOT be returned + assert cache.get("k", "MISS") is None + + def test_overwrite_keeps_len_unchanged(self): + cache = self.create_cache() + + cache.insert("k", 1) + cache.insert("k", 2) + assert len(cache) == 1 + + def test_insert_get_raw_type(self): + class AType: + pass + + cache = self.create_cache() + cache[AType] = AType + assert cache[AType] is AType + + +class PopitemMixin(BaseMixin): + def test_popitem_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + cache.popitem() + + def test_popitem_updates_currsize(self): + cache = self.create_cache(10, {i: i for i in range(20)}) + + assert cache.is_full() + assert cache.remaining_size() == 0 + assert cache.current_size() == 10 + assert len(cache) == 10 + + cache.popitem() + + assert not cache.is_full() + assert cache.remaining_size() == 1 + assert cache.current_size() == 9 + assert len(cache) == 9 + + +class SetDefaultMixin(BaseMixin): + def test_setdefault_inserts_when_absent(self): + cache = self.create_cache() + + result = cache.setdefault("k", "default") + assert result == "default" + assert cache.get("k") == "default" + + def test_setdefault_returns_existing_value(self): + cache = self.create_cache() + + cache.insert("k", "existing") + result = cache.setdefault("k", "default") + assert result == "existing" + assert cache.get("k") == "existing" + + +class PopAndDeleteMixin(BaseMixin): + def test_pop_existing_key(self): + cache = self.create_cache() + + cache.insert("k", "v") + result = cache.pop("k") + assert result == "v" + assert cache.get("k") is None + + def test_pop_missing_key_with_default(self): + cache = self.create_cache() + + assert cache.pop("ghost", "default") == "default" + + def test_pop_missing_key_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + cache.pop("ghost") + + def test_delitem_existing_key(self): + cache = self.create_cache() + + cache["k"] = "v" + del cache["k"] + assert cache.get("k") is None + + def test_delitem_missing_key_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + del cache["ghost"] + + +class UpdateMixin(BaseMixin): + def test_update_from_dict(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + assert cache.get("a") == 1 + assert cache.get("b") == 2 + + def test_update_from_other(self): + iterable = self.create_cache(10, ((str(i), i) for i in range(10))) + cache = self.create_cache() + + cache.update(iterable) + for i in range(10): + assert cache.get(str(i)) == i + + def test_update_from_list_of_tuples(self): + cache = self.create_cache() + + cache.update([("x", 10), ("y", 20)]) + assert cache.get("x") == 10 + assert cache.get("y") == 20 + + def test_update_overwrites_existing(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.update({"a": 99}) + assert cache.get("a") == 99 + + def test_update_invalid_argument(self): + cache = self.create_cache() + + with pytest.raises(TypeError): + cache.update("abc") # type: ignore + + with pytest.raises(TypeError): + cache.update({1, 2, 3}) # type: ignore + + class _invalid_items: + def items(self): + return [1, 2, 3] + + with pytest.raises(TypeError): + cache.update(_invalid_items()) # type: ignore + + +class IntrospectionMixin(BaseMixin): + def test_len_reflects_insertions(self): + cache = self.create_cache() + + assert len(cache) == 0 + cache.insert("a", 1) + assert len(cache) == 1 + cache.insert("b", 2) + assert len(cache) == 2 + + def test_current_size_equals_len_without_getsizeof(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.insert("b", 2) + assert cache.current_size() == len(cache) + + def test_remaining_size(self): + cache = self.create_cache() + + cache.insert("a", 1) + assert cache.remaining_size() == cache.maxsize - cache.current_size() + + def test_is_empty_on_new_cache(self): + cache = self.create_cache() + + assert cache.is_empty() + + def test_is_not_empty_after_insert(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert not cache.is_empty() + + def test_bool_false_when_empty(self): + cache = self.create_cache() + + assert not bool(cache) + + def test_bool_true_when_not_empty(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert bool(cache) + + def test_contains_operator(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert "k" in cache + assert "ghost" not in cache + + def test_contains_method(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert cache.contains("k") + assert not cache.contains("ghost") + + def test_repr_string(self): + cache = self.create_cache() + + cache.insert("k", "v") + out = repr(cache) + + assert isinstance(out, str) + assert type(cache).__name__ in out + + def test_eq_same_contents(self): + c1 = self.create_cache(maxsize=10, iterable={"a": 1}) + c2 = self.create_cache(maxsize=10, iterable={"a": 1}) + assert c1 == c2 + + def test_ne_different_contents(self): + c1 = self.create_cache(maxsize=10, iterable={"a": 1}) + c2 = self.create_cache(maxsize=10, iterable={"b": 2}) + assert c1 != c2 + + +class IterationMixin(BaseMixin): + def test_keys_returns_all_keys(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + assert set(cache.keys()) == {"a", "b", "c"} + + def test_values_returns_all_values(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + assert set(cache.values()) == {1, 2, 3} + + def test_items_returns_all_pairs(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + assert set(cache.items()) == {("a", 1), ("b", 2)} + + def test_iter_yields_keys(self): + cache = self.create_cache() + + cache.update({"x": 10, "y": 20}) + assert set(iter(cache)) == {"x", "y"} + + def test_generation_version_on_remove(self): + cache = self.create_cache(10, {i: i for i in range(10)}) + + with pytest.raises(RuntimeError): + for _ in cache: + del cache[9] + + with pytest.raises(RuntimeError): + for _ in cache.values(): + del cache[8] + + with pytest.raises(RuntimeError): + for _ in cache.items(): + del cache[7] + + for _ in cache: + # It should not increment the generation version + # because the key doesn't exist + cache.pop("hello", None) + + def test_generation_version_on_insert(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + with pytest.raises(RuntimeError): + for _ in cache: + cache.insert("A", 1) + + with pytest.raises(RuntimeError): + for _ in cache.values(): + cache.insert("B", 1) + + with pytest.raises(RuntimeError): + for _ in cache.items(): + cache.insert("C", 1) + + if isinstance(cache, cachebox.LRUCache): + return + + for i in cache: + # It should not increment the generation version + # in replacing value + cache.insert(i, "hello") + + def test_generation_version_on_shrink_to_fit(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + if isinstance(cache, cachebox.LRUCache): + pytest.skip("LRUCache is excluded") + + with pytest.raises(RuntimeError): + for _ in cache: + cache.shrink_to_fit() + + def test_generation_version_on_clear(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + with pytest.raises(RuntimeError): + for _ in cache: + cache.clear() + + def test_generation_version_on_popitem(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + if isinstance(cache, cachebox.Cache): + pytest.skip("Cache doesn't implemented popitem") + + with pytest.raises(RuntimeError): + for _ in cache: + cache.popitem() + + +class DrainClearShrinkMixin(BaseMixin): + def test_clear_removes_all_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + cache.clear() + assert len(cache) == 0 + assert cache.is_empty() + assert cache.current_size() == 0 + + def test_clear_with_reuse(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + cache.clear(reuse=True) + assert len(cache) == 0 + + def test_items_accessible_after_clear_and_reinsert(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.clear() + cache.insert("b", 2) + assert cache.get("b") == 2 + assert cache.get("a") is None + + def test_shrink_to_fit_does_not_lose_data(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + cache.shrink_to_fit() + assert cache.get("a") == 1 + assert cache.get("b") == 2 + assert cache.get("c") == 3 + + +class CopyMixin(BaseMixin): + def test_copy_has_same_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + c2 = cache.copy() + assert set(c2.items()) == set(cache.items()) + + def test_copy_is_independent(self): + cache = self.create_cache() + + cache.insert("a", 1) + c2 = cache.copy() + c2.insert("b", 2) + assert not cache.contains("b") + + def test_copy_preserves_maxsize(self): + cache = self.create_cache() + + c2 = cache.copy() + assert c2.maxsize == cache.maxsize + assert c2.current_size() == cache.current_size() + + def test_copy_is_shallow(self): + cache = self.create_cache() + + cache["A"] = [1, 2] + cache["B"] = {1: 1, 2: 2} + + c2 = cache.copy() + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 2 + assert len(c2["B"]) == 2 + + c2["A"].append(3) + c2["B"][3] = 3 + + assert len(cache["A"]) == 3 + assert len(cache["B"]) == 3 + assert len(c2["A"]) == 3 + assert len(c2["B"]) == 3 + + def test_deepcopy_has_same_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + c2 = stdcopy.deepcopy(cache) + assert set(c2.items()) == set(cache.items()) + + def test_deepcopy_is_independent(self): + cache = self.create_cache() + + cache.insert("a", 1) + c2 = stdcopy.deepcopy(cache) + c2.insert("b", 2) + assert not cache.contains("b") + + def test_deepcopy_preserves_maxsize(self): + cache = self.create_cache() + + c2 = stdcopy.deepcopy(cache) + assert c2.maxsize == cache.maxsize + assert c2.current_size() == cache.current_size() + + def test_deepcopy_is_not_shallow(self): + cache = self.create_cache() + + cache["A"] = [1, 2] + cache["B"] = {1: 1, 2: 2} + + c2 = stdcopy.deepcopy(cache) + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 2 + assert len(c2["B"]) == 2 + + c2["A"].append(3) + c2["B"][3] = 3 + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 3 + assert len(c2["B"]) == 3 + + +@dataclasses.dataclass +class Sized: + size: int + key: typing.Any + + def __hash__(self) -> int: + return hash(self.key) + + def __eq__(self, other: object) -> bool: + return isinstance(other, Sized) and self.key == other.key + + +class GetSizeOfMixin(BaseMixin): + def test_current_size_uses_getsizeof(self): + # Each value is a list; size = len(value) + sizer = lambda k, v: len(v) # noqa: E731 + + c = self.create_cache(maxsize=10, getsizeof=sizer) + c.insert("a", [1, 2, 3]) # size 3 + c.insert("b", [1]) # size 1 + assert c.current_size() == 4 + + def test_overflow_based_on_weighted_size(self): + # maxsize=5; each entry costs its value + sizer = lambda k, v: v # noqa: E731 + + c = self.create_cache(maxsize=5, getsizeof=sizer) + c.insert("a", 3) # size now 3 + c.insert("b", 2) # size now 5 — full + + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c.insert("c", 1) # would push to 6 + + def test_getsizeof_invalid_handle_size(self): + c = self.create_cache(maxsize=5, getsizeof=lambda x, _: len(x)) + + with pytest.raises(OverflowError): + c["more than 5"] = 1 + + with pytest.raises(OverflowError): + c.update({"more than 5": 1}) + + with pytest.raises(OverflowError): + c.update({"5": 1, "more than 5": 2}) + + assert "5" in c + + def test_getsizeof_insert_enforced(self): + c = self.create_cache(maxsize=100, getsizeof=lambda x, v: x.size + v.size) + + k1 = Sized(10, 1) + v1 = Sized(80, 101) + c[k1] = v1 + + k2 = Sized(10, 2) + v2 = Sized(80, 102) + + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c[k2] = v2 + + assert k1 in c + + else: + c[k2] = v2 + assert k1 not in c + assert k2 in c + assert c.current_size() <= c.maxsize + + def test_getsizeof_insert_existing_key_enforced(self): + c = self.create_cache(maxsize=100, getsizeof=lambda x, _: x.size) + + a_size_10 = Sized(10, "A") + a_size_100 = Sized(100, "A") + + b_size_10 = Sized(10, "B") + + c[a_size_10] = 1 + c[b_size_10] = 2 + + # A(10) -> currsize=10 + # B(10) -> currsize=20 + # + # A(100) -> currsize=110 - exceeded maxsize, should call evict + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c[a_size_100] = "new" + + return + + c[a_size_100] = "new" + + +class EdgeCasesMixin(BaseMixin): + def test_integer_keys(self): + cache = self.create_cache() + + cache.insert(1, "one") + assert cache.get(1) == "one" + + def test_tuple_keys(self): + cache = self.create_cache() + + cache.insert((1, 2), "tuple") + assert cache.get((1, 2)) == "tuple" + + def test_empty_string_key_and_value(self): + cache = self.create_cache() + + cache.insert("", "") + assert cache.get("") == "" + + def test_large_value(self): + unlimited = self.create_cache(0) + + big = "x" * 100_000 + unlimited.insert("big", big) + assert unlimited.get("big") == big + + def test_multiple_types_as_values(self): + cache = self.create_cache() + + cache.insert("int", 1) + cache.insert("list", [1, 2]) + cache.insert("dict", {"a": 1}) + assert cache.get("int") == 1 + assert cache.get("list") == [1, 2] + assert cache.get("dict") == {"a": 1} + + def test_bad_hash_key(self): + + @dataclasses.dataclass + class BadHash: + val: int + + def __hash__(self) -> int: + return 1 + + size = 1000 + cache = self.create_cache(size, capacity=size) + + for i in range(size): + cache.insert(BadHash(val=i), i) + cache.get(BadHash(val=i)) + + +class IssuesMixin(BaseMixin): + def test_issue_5(self): + # https://github.com/awolverp/cachebox/issues/5 + + @dataclasses.dataclass + class EQ: + val: int + + def __hash__(self) -> int: + return self.val + + @dataclasses.dataclass + class NoEQ: + val: int + + def __hash__(self) -> int: + return self.val + + size = 1000 + cache = self.create_cache(size, capacity=size) + + for i in range(size): + cache.insert(NoEQ(val=i), i) + cache.get(NoEQ(val=i)) + + cache = self.create_cache(size, capacity=size) + + for i in range(size): + cache.insert(EQ(val=i), i) + cache.get(EQ(val=i)) + + +class SweepIntervalMixin(BaseMixin): + def _create_sweep_cache( + self, *args, **kwds + ) -> cachebox.TTLCache | cachebox.VTTLCache: + return typing.cast( + cachebox.TTLCache | cachebox.VTTLCache, + self.create_cache(*args, **kwds), + ) + + def test_none_by_default_no_thread(self): + cache = self._create_sweep_cache(maxsize=10) + assert cache.sweep_interval is None + assert cache._thread is None + assert cache._thread_is_running is False + + def test_numeric_sweep_interval_starts_thread(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + + try: + assert cache._thread is not None + assert cache._thread.is_alive() + assert cache._thread_is_running is True + finally: + cache.stop_sweeper() + + def test_timedelta_sweep_interval_starts_thread(self): + cache = self._create_sweep_cache( + maxsize=10, sweep_interval=timedelta(seconds=1) + ) + try: + assert cache._thread is not None + assert cache._thread.is_alive() + finally: + cache.stop_sweeper() + + def test_timedelta_converted_to_seconds(self): + cache = self._create_sweep_cache( + maxsize=10, sweep_interval=timedelta(seconds=5) + ) + try: + assert cache.sweep_interval == 5.0 + finally: + cache.stop_sweeper() + + def test_sweep_interval_stored_as_float(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=2) + try: + assert cache.sweep_interval == 2.0 + finally: + cache.stop_sweeper() + + def test_sweep_interval_below_1_raises(self): + with pytest.raises( + ValueError, match="sweep_interval must be more than 1 seconds" + ): + self._create_sweep_cache(maxsize=10, sweep_interval=0.5) + + def test_sweep_interval_zero_raises(self): + with pytest.raises(ValueError): + self._create_sweep_cache(maxsize=10, sweep_interval=0) + + def test_sweep_interval_negative_raises(self): + with pytest.raises(ValueError): + self._create_sweep_cache(maxsize=10, sweep_interval=-1) + + def test_sweep_interval_exactly_1_is_valid(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + assert cache.sweep_interval == 1.0 + finally: + cache.stop_sweeper() + + def test_thread_is_daemon(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + assert cache._thread.daemon is True # type: ignore + finally: + cache.stop_sweeper() + + def test_stop_sets_flag_false(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + assert cache._thread_is_running is True + cache.stop_sweeper() + assert cache._thread_is_running is False + + def test_stop_on_cache_without_sweeper_is_safe(self): + cache = self._create_sweep_cache(maxsize=10) + cache.stop_sweeper() # should not raise + assert cache._thread_is_running is False + + def test_stop_idempotent(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + cache.stop_sweeper() + cache.stop_sweeper() # second call must not raise + assert cache._thread_is_running is False + + def test_thread_eventually_stops_after_signal(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + cache.stop_sweeper() + cache._thread.join(timeout=3) # type: ignore + assert not cache._thread.is_alive() # type: ignore + + def test_expire_called_periodically(self): + """expire() should be invoked by the background thread on schedule.""" + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + with patch.object(cache, "expire", wraps=cache.expire) as mock_expire: + time.sleep(2.5) + assert mock_expire.call_count >= 2 + finally: + cache.stop_sweeper() + + def test_expired_items_removed_by_sweeper(self): + """Items with elapsed TTLs should be absent after a sweep cycle.""" + cache = self._create_sweep_cache(maxsize=50, sweep_interval=1) + try: + if isinstance(cache, cachebox.TTLCache): + cache.insert("b", 2) + else: + cache.insert("b", 2, 0.1) + + time.sleep(2) + assert "b" not in cache + finally: + cache.stop_sweeper() + + def test_concurrent_writes_with_sweeper_running(self): + """Concurrent inserts alongside the sweeper must not raise.""" + cache = self._create_sweep_cache(maxsize=100, sweep_interval=1) + errors = [] + + def writer(start): + try: + for i in range(start, start + 50): + cache[f"k{i}"] = i + time.sleep(0.01) + except Exception as exc: + errors.append(exc) + + threads = [threading.Thread(target=writer, args=(i * 50,)) for i in range(4)] + try: + for t in threads: + t.start() + for t in threads: + t.join() + assert errors == [], f"Unexpected errors: {errors}" + finally: + cache.stop_sweeper() + + def test_stop_sweeper_while_sleeping(self): + """stop_sweeper() called mid-sleep should clear the flag without hanging.""" + cache = self._create_sweep_cache(maxsize=10, sweep_interval=30) # long interval + thread = cache._thread + cache.stop_sweeper() + assert cache._thread_is_running is False + assert thread is not None + + def test_del_stops_sweeper(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + assert cache._thread_is_running is True + cache.__del__() + assert cache._thread_is_running is False + + def test_del_without_sweeper_is_safe(self): + cache = self._create_sweep_cache(maxsize=10) + cache.__del__() # must not raise + + +class FuzzyMixin(BaseMixin): + @given(key=hashable_keys, value=any_value) + def test_fuzzy_insert_then_get_returns_same_value(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert c.get(key) == value + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_insert_new_key_returns_none(self, key, value): + c = self.create_cache(maxsize=0) + result = c.insert(key, value) + assert result is None + + @given(key=hashable_keys, v1=any_value, v2=any_value) + def test_fuzzy_insert_existing_key_returns_old_value(self, key, v1, v2): + c = self.create_cache(maxsize=0) + c.insert(key, v1) + old = c.insert(key, v2) + assert old == v1 + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_len_never_exceeds_unique_keys(self, pairs): + c = self.create_cache(maxsize=0) + expected = {} + for k, v in pairs: + c.insert(k, v) + expected[k] = v + assert len(c) == len(expected) + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_len_increases_by_one_on_new_key(self, key, value): + c = self.create_cache(maxsize=0) + before = len(c) + c.insert(key, value) + assert len(c) == before + 1 + + @given(key=hashable_keys, v1=any_value, v2=any_value) + def test_fuzzy_len_unchanged_on_overwrite(self, key, v1, v2): + c = self.create_cache(maxsize=0) + c.insert(key, v1) + before = len(c) + c.insert(key, v2) + assert len(c) == before + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_contains_true_after_insert(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert key in c + assert c.contains(key) + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_contains_false_after_delete(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + del c[key] + assert key not in c + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_pop_returns_inserted_value(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert c.pop(key) == value + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_pop_removes_key(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + c.pop(key) + assert key not in c + + @given( + maxsize=st.integers(min_value=1, max_value=50), + pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=50), + ) + def test_fuzzy_current_size_plus_remaining_equals_maxsize(self, maxsize, pairs): + c = self.create_cache(maxsize=maxsize) + for k, v in pairs: + if c.is_full(): + break + c.insert(k, v) + assert c.current_size() + c.remaining_size() == maxsize + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_clear_always_leaves_cache_empty(self, pairs): + c = self.create_cache(maxsize=0) + for k, v in pairs: + c.insert(k, v) + c.clear() + assert len(c) == 0 + assert c.is_empty() + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_keys_values_items_are_consistent(self, pairs): + c = self.create_cache(maxsize=0) + truth = {} + for k, v in pairs: + c.insert(k, v) + truth[k] = v + + cache_items = dict(c.items()) + assert cache_items == truth + assert set(c.keys()) == set(truth.keys()) + assert sorted(str(v) for v in c.values()) == sorted( + str(v) for v in truth.values() + ) + + @given(key=hashable_keys, existing=any_value, default=any_value) + def test_fuzzy_setdefault_never_overwrites_existing(self, key, existing, default): + c = self.create_cache(maxsize=0) + c.insert(key, existing) + c.setdefault(key, default) + assert c.get(key) == existing + + @given(key=hashable_keys, default=any_value) + def test_fuzzy_setdefault_inserts_when_missing(self, key, default): + c = self.create_cache(maxsize=0) + c.setdefault(key, default) + assert c.get(key) == default + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_copy_equals_original(self, pairs): + c = self.create_cache(maxsize=0) + for k, v in pairs: + c.insert(k, v) + assert c.copy() == c + + @given( + key=hashable_keys, value=any_value, new_key=hashable_keys, new_value=any_value + ) + def test_fuzzy_copy_is_independent_of_original( + self, key, value, new_key, new_value + ): + assume(new_key != key) + c = self.create_cache(maxsize=0) + c.insert(key, value) + c2 = c.copy() + c2.insert(new_key, new_value) + assert not c.contains(new_key) + + @given(key=hashable_keys, value=any_value) + def test_fuzzy_chain_methods(self, key, value): + c = self.create_cache(maxsize=0) + assert c.insert(key, value) is None + assert c.setdefault(key, value) == value + assert c.get(key) == value + assert c[key] == value + assert c.pop(key) == value + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_getstate_setstate(self, pairs): + original = self.create_cache(20, pairs) + + state = original.__getstate__() + + pickled = original.__class__.__new__(original.__class__) + pickled.__setstate__(state) + + assert pickled.current_size() == original.current_size() + assert pickled == original + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_pickle_loads_dumps(self, pairs): + original = self.create_cache(20, pairs) + + state = pickle.dumps(original) + pickled = pickle.loads(state) + + assert pickled.current_size() == original.current_size() + assert pickled == original + + +class BenchmarkMixin(BaseMixin): + @pytest.fixture(autouse=True) + def _set_benchmark_name(self, benchmark, request): + benchmark.name = f"{type(self).__name__}.{request.node.originalname}" + + @pytest.fixture() + def cache(self) -> cachebox.BaseCacheImpl: + return self.create_cache(256) + + @pytest.fixture() + def full_cache(self) -> cachebox.BaseCacheImpl: + """A cache pre-populated to capacity.""" + c = self.create_cache(256) + + for i in range(256): + c.insert(i, i) + + return c + + def test_bench_insert(self, benchmark, cache): + i = 0 + + def run(): + nonlocal i + cache.insert(i % 256, i) + i += 1 + + benchmark.pedantic(run, iterations=1000, rounds=100, warmup_rounds=2) + + def test_bench_update(self, benchmark, cache): + data = {i: i for i in range(64)} + benchmark.pedantic( + cache.update, + args=(data,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_get_hit(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.get, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_get_miss(self, benchmark, cache): + key = 9999 + benchmark.pedantic( + cache.get, + args=(key, None), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_getitem(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.__getitem__, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_contains(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.contains, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_pop(self, benchmark): + """Each round gets a fresh cache so pop always finds the key.""" + key = 0 + val = 0 + + def setup(): + c = self.create_cache(256) + c.insert(key, val) + return (c,), {} + + benchmark.pedantic( + lambda c: c.pop(key, None), + setup=setup, + iterations=1, + rounds=1000, + warmup_rounds=5, + ) + + def test_bench_popitem(self, benchmark): + """Each round gets a fresh full cache.""" + if isinstance(self.create_cache(0), cachebox.Cache): + pytest.skip("cachebox.Cache not supported this") + + def setup(): + c = self.create_cache(1000) + for i in range(1000): + c.insert(i, i) + + return (c,), {} + + benchmark.pedantic( + lambda c: c.popitem(), + setup=setup, + iterations=1, + rounds=200, + warmup_rounds=5, + ) + + def test_bench_delitem(self, benchmark): + key = 0 + val = 0 + + def setup(): + c = self.create_cache(256) + c.insert(key, val) + return (c,), {} + + benchmark.pedantic( + lambda c: c.__delitem__(key), + setup=setup, + iterations=1, + rounds=200, + warmup_rounds=5, + ) diff --git a/tests/test_impls.py b/tests/test_impls.py new file mode 100644 index 0000000..b21e14f --- /dev/null +++ b/tests/test_impls.py @@ -0,0 +1,1832 @@ +import time +import typing +from datetime import datetime, timedelta + +import pytest + +import cachebox + +from . import mixins + + +class TestCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) + + def test_popitem_overflow_error(self): + cache = self.create_cache() + + # cachebox.Cache does not have any algorithm to use + with pytest.raises(OverflowError): + cache.popitem() + + def test_insert_overflow_error(self): + cache = self.create_cache(5) + + for i in range(5): + cache.insert(i, i) + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + cache.insert(4, "A") # <- Replacing should be OK + + # Try again with custom getsizeof + cache = self.create_cache(5, getsizeof=lambda k, v: len(k)) + cache.insert("AA", 1) + cache.insert("BBB", 1) # <- Now is full + + assert cache.is_full() + + with pytest.raises(OverflowError): + cache.insert("NEW", 1) + + cache.insert("AA", "A") # <- Replacing should be OK + + def test_update_overflow_error(self): + with pytest.raises(OverflowError): + self.create_cache(5, {i: i for i in range(6)}) + + cache = self.create_cache(5) + cache.update({i: i for i in range(5)}) # <- Now is full + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + with pytest.raises(OverflowError): + cache.update({10: 10}) + + # Replacing should be OK + cache.update({i: i for i in range(5)}) + + +class TestFIFOCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.FIFOCache: + return cachebox.FIFOCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestFIFOCachePolicy(mixins.BaseMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.FIFOCache: + return cachebox.FIFOCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_oldest_item_evicted_on_overflow(self): + """When capacity is exceeded, the first inserted key must be evicted.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # triggers eviction of key 1 + assert 1 not in cache + assert 4 in cache + + def test_eviction_is_strictly_insertion_ordered(self): + """Keys evict in the exact order they were inserted, not access order.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + cache[4] = "d" # evicts 1 + cache[5] = "e" # evicts 2 + cache[6] = "f" # evicts 3 + + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert {4, 5, 6} == set(cache.keys()) + + def test_accessing_key_does_not_reset_eviction_priority(self): + """ + Unlike LRU, a cache hit must NOT push the key to the back. + Key 1 is accessed repeatedly but must still be the first evicted. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + _ = cache[1] + _ = cache[1] + _ = cache[1] + + cache[4] = "d" # must still evict key 1 + assert 1 not in cache + + def test_overwriting_existing_key_does_not_change_eviction_order(self): + """ + Updating the value of an existing key must NOT change its insertion + position in the eviction queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + cache[1] = "updated" # update, not a new insertion + cache[4] = "d" # must still evict key 1 + + assert 1 not in cache + assert cache[4] == "d" + + def test_popitem_removes_oldest(self): + """popitem() must always remove and return the oldest inserted entry.""" + cache = self.create_cache(3, [(10, "x"), (20, "y"), (30, "z")]) + key, value = cache.popitem() + assert key == 10 + assert value == "x" + + def test_popitem_successive_calls_follow_fifo(self): + """Successive popitem() calls must yield keys in insertion order.""" + insertion_order = [(1, "a"), (2, "b"), (3, "c"), (4, "d")] + cache = self.create_cache(4, insertion_order) + popped_keys = [cache.popitem()[0] for _ in range(4)] + assert popped_keys == [1, 2, 3, 4] + + def test_drain_removes_n_oldest(self): + """drain(n) must remove exactly n items, oldest-first.""" + cache = self.create_cache(5, [(i, str(i)) for i in range(1, 6)]) + removed = cache.drain(3) + assert removed == 3 + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert 4 in cache + assert 5 in cache + + def test_first_returns_oldest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.first() == 7 + + def test_last_returns_newest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.last() == 9 + + def test_first_with_positive_n_browses_in_insertion_order(self): + """first(n) must walk forward through insertion order.""" + cache = self.create_cache(4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(0) == 10 + assert cache.first(1) == 20 + assert cache.first(2) == 30 + assert cache.first(3) == 40 + + def test_first_with_negative_n_browses_from_end(self): + """first(-1) is an alias for last(); first(-2) is the second newest.""" + cache = self.create_cache(4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(-1) == 40 + assert cache.first(-2) == 30 + + def test_first_after_eviction_reflects_new_head(self): + """After an eviction, first() must return the new oldest key.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts key 1 + assert cache.first() == 2 + + def test_last_after_insertion_reflects_new_tail(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" + assert cache.last() == 4 + + def test_first_on_single_element_cache(self): + cache = self.create_cache(1, [(42, "only")]) + assert cache.first() == 42 + assert cache.last() == 42 + + def test_first_raise_indexerror_on_empty_cache(self): + cache = self.create_cache(0) + + with pytest.raises(IndexError): + cache.first() + + def test_rolling_window_maintains_correct_contents(self): + """ + Simulate a sliding-window workload: insert N items into a cache of + size K and verify that only the most-recently inserted K items survive. + """ + maxsize = 4 + total = 20 + cache = self.create_cache(maxsize) + + for i in range(total): + cache[i] = i * 10 + + expected = set(range(total - maxsize, total)) + assert set(cache.keys()) == expected + + def test_no_phantom_keys_after_eviction(self): + """Evicted keys must not linger in contains() or iteration.""" + cache = self.create_cache(2, [(1, "a"), (2, "b")]) + cache[3] = "c" # evicts 1 + + for key in cache: + assert key != 1 + + assert not cache.contains(1) + + def test_reinsert_evicted_key_rejoins_at_tail(self): + """ + Re-inserting a previously evicted key must treat it as a brand-new + entry positioned at the back of the queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts 1 + cache[1] = "re" # re-insert 1 — should now be at the tail + cache[5] = "e" # must evict 2 (now the oldest), not 1 + + assert 2 not in cache + assert 1 in cache + assert cache[1] == "re" + + def test_is_full_triggers_at_maxsize(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + assert cache.is_full() + cache[4] = "d" # eviction should keep it full, not overflow + assert cache.is_full() + assert len(cache) == 3 + + def test_len_never_exceeds_maxsize(self): + cache = self.create_cache(5) + for i in range(100): + cache[i] = i + + assert len(cache) <= 5 + + def test_clear_resets_fifo_order(self): + """After clear(), the insertion order restarts from scratch.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache.clear() + cache[10] = "x" + cache[20] = "y" + cache[30] = "z" + assert cache.first() == 10 + assert cache.last() == 30 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_entries_scan(self): + """ + Verifies that FIFOCache correctly rebases its internal `front_offset` + counter when it approaches `u8::MAX` (255 in the use-small-offset test build). + """ + U8_MAX = 255 + CACHE_SIZE = 2 + + # Phase 2 + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_table_scan(self): + U8_MAX = 255 + CACHE_SIZE = 20 + + # Phase 2 + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + +class TestRRCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.RRCache: + return cachebox.RRCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_random_key_method(self): + cache = self.create_cache(10) + + with pytest.raises(KeyError): + cache.random_key() + + cache["a"] = 1 + assert cache.random_key() == "a" + + cache["b"] = 2 + cache["c"] = 3 + cache["d"] = 4 + assert cache.random_key() in ("a", "b", "c", "d") + + +class TestLRUCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LRUCache: + return cachebox.LRUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestLRUCachePolicy(mixins.BaseMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LRUCache: + return cachebox.LRUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_evicts_lru_when_full(self): + c = self.create_cache(3, {"a": 1, "b": 2, "c": 3}) + c.insert("d", 4) + assert "a" not in c + assert "d" in c + + c = self.create_cache(3, {"a": 1, "b": 2, "c": 3}) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.insert("d", 4) + assert "a" not in c + assert "d" in c + + def test_does_not_evict_recently_read_key(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + _ = c["a"] # promote "a" → "b" becomes LRU + c.insert("d", 4) + assert "b" not in c + assert "a" in c + + def test_reinserting_existing_key_promotes_it(self): + c = self.create_cache(3, [("a", 1), ("b", 2), ("c", 3)]) + c.insert("a", 99) # "a" was LRU, now MRU + c.insert("d", 4) # should evict "b", not "a" + assert "a" in c + assert "b" not in c + + def test_cache_never_exceeds_maxsize(self): + c = self.create_cache(5) + for i in range(20): + c.insert(i, i) + assert len(c) <= 5 + + def test_sequential_inserts_keep_only_latest(self): + c = self.create_cache(3) + for i in range(6): + c.insert(i, i) + + for k in range(3): + assert k not in c + + for k in range(3, 6): + assert k in c + + def test_update_evicts_lru_to_make_room(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.update({"d": 4}) + assert "a" not in c + + def test_update_existing_key_promotes_it(self): + c = self.create_cache(3, [("a", 1), ("b", 2), ("c", 3)]) + c.update({"a": 99}) # "a" was LRU, now MRU + c.update({"d": 4}) # should evict "b" + assert "a" in c + assert "b" not in c + + def test_lru_and_mru_key_methods(self): + c = self.create_cache(3) + c.insert("a", 1) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "a" + + c.insert("b", 2) + c.insert("c", 3) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" + + _ = c["a"] # promote "a" + + assert c.least_recently_used() == "b" + assert c.most_recently_used() == "a" + + assert "b" in c # promote "b" + + assert c.least_recently_used() == "c" + assert c.most_recently_used() == "b" + + def test_setdefault_on_existing_key_promotes_it(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + c.setdefault("a", 0) + assert c.most_recently_used() == "a" + + def test_lru_mru_empty_raises(self): + with pytest.raises(KeyError): + self.create_cache(5).least_recently_used() + + with pytest.raises(KeyError): + self.create_cache(5).most_recently_used() + + def test_removes_least_recently_used(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + key, val = c.popitem() + assert key == "a" + assert val == 1 + assert "a" not in c + + def test_order_after_read(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + _ = c["a"] # "a" now MRU → "b" is LRU + key, _ = c.popitem() + assert key == "b" + + def test_order_after_reinsert(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + c.insert("a", 99) # "a" now MRU → "b" is LRU + key, _ = c.popitem() + assert key == "b" + + def test_repeated_popitem_respects_lru_order(self): + c = self.create_cache(5) + for i in range(5): + c.insert(i, i * 10) + + for expected in range(5): + key, _ = c.popitem() + assert key == expected + + def test_empty_raises(self): + with pytest.raises(KeyError): + self.create_cache(5).popitem() + + def test_hot_key_never_evicted(self): + c = self.create_cache(3) + c.insert("hot", 0) + for i in range(20): + _ = c.get("hot") + c.insert(f"cold_{i}", i) + + assert "hot" in c + + def test_mixed_reads_and_writes_evict_correctly(self): + c = self.create_cache(4) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.insert("d", 4) + _ = c["a"] # order: b, c, d, a + _ = c["c"] # order: b, d, a, c + c.insert("e", 5) # evicts "b" + assert "b" not in c + c.insert("f", 6) # evicts "d" + assert "d" not in c + + def test_peek_existing_key(self): + cache = self.create_cache() + + cache.insert("k", 42) + assert cache.peek("k") == 42 + + def test_peek_missing_key_returns_none(self): + cache = self.create_cache() + + assert cache.peek("nope") is None + + def test_peek_missing_key_returns_custom_default(self): + cache = self.create_cache() + + assert cache.peek("nope", "fallback") == "fallback" + + def test_peek_no_promote_key(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" + + c.peek("a") + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" + + +class TestLFUCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LFUCache: + return cachebox.LFUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + @staticmethod + def _hit(cache: cachebox.LFUCache, key, times: int = 1) -> None: + """Access a key `times` times to accumulate frequency.""" + for _ in range(times): + cache[key] + + def test_evicts_least_frequent_on_insert(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 3) + # "c" has frequency 1 — should be evicted + c["d"] = 4 + assert "c" not in c + assert "a" in c + assert "b" in c + assert "d" in c + + def test_evicts_lowest_frequency_not_oldest(self): + """LFU must evict by count, not by insertion order.""" + c = self.create_cache(3) + c["old"] = 0 # inserted first + c["mid"] = 0 + c["new"] = 0 # inserted last + self._hit(c, "old", 10) + self._hit(c, "mid", 10) + # "new" has lowest frequency even though "old" is oldest + c["x"] = 99 + assert "new" not in c + assert "old" in c + assert "mid" in c + + def test_frequency_survives_value_update(self): + """Re-inserting a key should update value but preserve (and increment) frequency.""" + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 1 + self._hit(c, "a", 5) # a.freq = 6 (5 reads + 1 insert) + c["a"] = 99 # update — should NOT reset frequency to 1 + # b has freq=1, a has freq>=6; inserting "c" must evict "b" + c["c"] = 3 + assert "b" not in c + assert "a" in c + + def test_popitem_removes_lfu_item(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 2) + # c has lowest frequency + key, val = c.popitem() + assert key == "c" + assert val == 3 + assert "c" not in c + + def test_tie_broken_by_recency_oldest_evicted(self): + """When frequencies are equal, the oldest-inserted key is evicted.""" + c = self.create_cache(3) + c["first"] = 1 # inserted first → evicted on tie + c["second"] = 2 + c["third"] = 3 + # All have freq=1; "first" is oldest + c["fourth"] = 4 + assert "first" not in c + + def test_single_item_cache_evicts_on_second_insert(self): + c = self.create_cache(1) + c["only"] = 42 + self._hit(c, "only", 100) + c["new"] = 7 + assert "only" not in c + assert c["new"] == 7 + + def test_get_increments_frequency(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + self._hit(c, "a", 3) # a.freq = 4, b.freq = 1 + c["c"] = 3 # evicts b + assert "b" not in c + assert "a" in c + + def test_setdefault_increments_frequency_on_hit(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + # setdefault on existing key should count as an access + for _ in range(5): + c.setdefault("a", 999) + c["c"] = 3 # should evict "b", not "a" + assert "b" not in c + assert "a" in c + + def test_peek_does_not_increment_frequency(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + + # Peek "a" many times — frequency must NOT change + for _ in range(100): + c.peek("a") + + # hit b once so it has freq=2 vs a's freq=1 + self._hit(c, "b", 1) + c["c"] = 3 # must evict "a" (lower freq due to peek not counting) + assert "a" not in c + assert "b" in c + + def test_least_frequently_used_reflects_access_counts(self): + c = self.create_cache(4) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + c["d"] = 4 + self._hit(c, "a", 10) + self._hit(c, "b", 5) + self._hit(c, "c", 2) + # d has freq=1, c has freq=3, b has freq=6, a has freq=11 + assert c.least_frequently_used(0) == "d" + assert c.least_frequently_used(1) == "c" + assert c.least_frequently_used(2) == "b" + assert c.least_frequently_used(3) == "a" + + def test_frequency_not_reset_after_pop_and_reinsert(self): + """A key that is popped and re-added starts fresh at frequency 1.""" + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + self._hit(c, "a", 10) + c.pop("a") + c["a"] = 1 # fresh insert — freq resets to 1 + # now b also has freq=1; tie broken by insertion order — a is newer + c["c"] = 3 # should evict b (older with same freq=1) + assert "b" not in c + assert "a" in c + + def test_cache_never_exceeds_maxsize(self): + c = self.create_cache(5) + for i in range(20): + c[i] = i + assert len(c) <= 5 + + def test_update_triggers_eviction(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 3) + c.update({"d": 4, "e": 5}) + assert len(c) == 3 + + def test_drain_removes_lfu_items_in_order(self): + c = self.create_cache(4) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + c["d"] = 4 + self._hit(c, "d", 10) + self._hit(c, "c", 5) + self._hit(c, "b", 2) + # a has freq=1 → evicted first; b next; etc. + removed = c.drain(2) + assert removed == 2 + assert "a" not in c + assert "b" not in c + assert "c" in c + assert "d" in c + + def test_single_entry_popitem(self): + c = self.create_cache(10) + c["solo"] = 99 + k, v = c.popitem() + assert k == "solo" and v == 99 + assert len(c) == 0 + + def test_popitem_empty_raises(self): + c = self.create_cache(5) + with pytest.raises(KeyError): + c.popitem() + + def test_least_frequently_used_empty_raises(self): + c = self.create_cache(5) + with pytest.raises(IndexError): + c.least_frequently_used() + + def test_least_frequently_used_out_of_range_raises(self): + c = self.create_cache(5) + c["a"] = 1 + with pytest.raises(IndexError): + c.least_frequently_used(5) + + def test_clear_resets_all_frequencies(self): + c = self.create_cache(3) + c["a"] = 1 + self._hit(c, "a", 50) + c.clear() + assert len(c) == 0 + # After clearing, re-inserted keys start at frequency 1 + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + # All freq=1; tie → oldest ("a") evicted + c["d"] = 4 + assert "a" not in c + + def test_generation_version_on_least_frequently_used(self): + c = self.create_cache(5, {i: i for i in range(5)}) + + self._hit(c, 1, 5) + self._hit(c, 2, 3) + self._hit(c, 4, 10) + + # calling __iter__ causes sorts lazyheap + # so least_frequently_used shouldn't intrupt iteration + for _ in c: + c.least_frequently_used() + + +class TestTTLCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.TTLCache: + return cachebox.TTLCache( + maxsize, + 100, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestTTLCachePolicy(mixins.SweepIntervalMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + global_ttl: float | timedelta = 1, + sweep_interval: float | timedelta | None = None, + ) -> cachebox.TTLCache: + return cachebox.TTLCache( + maxsize, + global_ttl, + iterable, + capacity=capacity, + getsizeof=getsizeof, + sweep_interval=sweep_interval, + ) + + def test_global_ttl_property(self): + c = self.create_cache(10, global_ttl=5) + assert c.global_ttl == 5 + + c = self.create_cache(10, global_ttl=timedelta(seconds=5)) + assert c.global_ttl == 5 + + with pytest.raises(ValueError): + c = self.create_cache(10, global_ttl=0) + + with pytest.raises(ValueError): + c = self.create_cache(10, global_ttl=-1) + + def test_global_ttl_with_iterable(self): + c = self.create_cache(10, {"A": "B", "C": "D"}, global_ttl=1) + assert c.global_ttl == 1 + + assert "A" in c + assert "C" in c + + time.sleep(1) + + assert "A" not in c + assert "C" not in c + + # __len__ does not call expire + assert len(c) == 2 + + # current_size calls expire + assert c.current_size() == 0 + assert len(c) == 0 + + def test_oldest_item_evicted_on_overflow(self): + """When capacity is exceeded, the first inserted key must be evicted.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + cache[4] = "d" # triggers eviction of key 1 + assert 1 not in cache + assert 4 in cache + + def test_eviction_is_strictly_insertion_ordered(self): + """Keys evict in the exact order they were inserted, not access order.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + + cache[4] = "d" # evicts 1 + cache[5] = "e" # evicts 2 + cache[6] = "f" # evicts 3 + + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert {4, 5, 6} == set(cache.keys()) + + def test_accessing_key_does_not_reset_eviction_priority(self): + """ + Unlike LRU, a cache hit must NOT push the key to the back. + Key 1 is accessed repeatedly but must still be the first evicted. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + + _ = cache[1] + _ = cache[1] + _ = cache[1] + + cache[4] = "d" # must still evict key 1 + assert 1 not in cache + + def test_overwriting_existing_key_does_not_change_eviction_order(self): + """ + Updating the value of an existing key must NOT change its insertion + position in the eviction queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + + cache[1] = "updated" # update, not a new insertion + cache[4] = "d" # must still evict key 1 + + assert 1 not in cache + assert cache[4] == "d" + + def test_popitem_removes_oldest(self): + """popitem() must always remove and return the oldest inserted entry.""" + cache = self.create_cache(3, [(10, "x"), (20, "y"), (30, "z")], global_ttl=10) + key, value = cache.popitem() + assert key == 10 + assert value == "x" + + def test_popitem_successive_calls_follow_fifo(self): + """Successive popitem() calls must yield keys in insertion order.""" + insertion_order = [(1, "a"), (2, "b"), (3, "c"), (4, "d")] + cache = self.create_cache(4, insertion_order, global_ttl=10) + popped_keys = [cache.popitem()[0] for _ in range(4)] + assert popped_keys == [1, 2, 3, 4] + + def test_drain_removes_n_oldest(self): + """drain(n) must remove exactly n items, oldest-first.""" + cache = self.create_cache(5, [(i, str(i)) for i in range(1, 6)], global_ttl=10) + removed = cache.drain(3) + assert removed == 3 + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert 4 in cache + assert 5 in cache + + def test_first_returns_oldest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")], global_ttl=10) + assert cache.first() == 7 + + def test_last_returns_newest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")], global_ttl=10) + assert cache.last() == 9 + + def test_first_with_positive_n_browses_in_insertion_order(self): + """first(n) must walk forward through insertion order.""" + cache = self.create_cache( + 4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")], global_ttl=10 + ) + assert cache.first(0) == 10 + assert cache.first(1) == 20 + assert cache.first(2) == 30 + assert cache.first(3) == 40 + + def test_first_with_negative_n_browses_from_end(self): + """first(-1) is an alias for last(); first(-2) is the second newest.""" + cache = self.create_cache( + 4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")], global_ttl=10 + ) + assert cache.first(-1) == 40 + assert cache.first(-2) == 30 + + def test_first_after_eviction_reflects_new_head(self): + """After an eviction, first() must return the new oldest key.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + cache[4] = "d" # evicts key 1 + assert cache.first() == 2 + + def test_last_after_insertion_reflects_new_tail(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + cache[4] = "d" + assert cache.last() == 4 + + def test_first_on_single_element_cache(self): + cache = self.create_cache(1, [(42, "only")], global_ttl=10) + assert cache.first() == 42 + assert cache.last() == 42 + + def test_first_raise_indexerror_on_empty_cache(self): + cache = self.create_cache(0) + + with pytest.raises(IndexError): + cache.first() + + def test_rolling_window_maintains_correct_contents(self): + """ + Simulate a sliding-window workload: insert N items into a cache of + size K and verify that only the most-recently inserted K items survive. + """ + maxsize = 4 + total = 20 + cache = self.create_cache(maxsize) + + for i in range(total): + cache[i] = i * 10 + + expected = set(range(total - maxsize, total)) + assert set(cache.keys()) == expected + + def test_no_phantom_keys_after_eviction(self): + """Evicted keys must not linger in contains() or iteration.""" + cache = self.create_cache(2, [(1, "a"), (2, "b")], global_ttl=10) + cache[3] = "c" # evicts 1 + + for key in cache: + assert key != 1 + + assert not cache.contains(1) + + def test_reinsert_evicted_key_rejoins_at_tail(self): + """ + Re-inserting a previously evicted key must treat it as a brand-new + entry positioned at the back of the queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) + cache[4] = "d" # evicts 1 + cache[1] = "re" # re-insert 1 — should now be at the tail + cache[5] = "e" # must evict 2 (now the oldest), not 1 + + assert 2 not in cache + assert 1 in cache + assert cache[1] == "re" + + def test_is_full_triggers_at_maxsize(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + assert cache.is_full() + cache[4] = "d" # eviction should keep it full, not overflow + assert cache.is_full() + assert len(cache) == 3 + + def test_len_never_exceeds_maxsize(self): + cache = self.create_cache(5) + for i in range(100): + cache[i] = i + + assert len(cache) <= 5 + + def test_clear_resets_fifo_order(self): + """After clear(), the insertion order restarts from scratch.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache.clear() + cache[10] = "x" + cache[20] = "y" + cache[30] = "z" + assert cache.first() == 10 + assert cache.last() == 30 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_entries_scan(self): + """ + Verifies that FIFOCache correctly rebases its internal `front_offset` + counter when it approaches `u8::MAX` (255 in the use-small-offset test build). + """ + U8_MAX = 255 + CACHE_SIZE = 2 + + # Phase 2 + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_table_scan(self): + U8_MAX = 255 + CACHE_SIZE = 20 + + # Phase 2 + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + def test_global_ttl_on_insert(self): + obj = self.create_cache(2, global_ttl=0.5) + assert obj.global_ttl == 0.5 + + obj.insert(0, 1) + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[0] + + obj = self.create_cache(2, global_ttl=20) + + obj.insert(0, 0) + obj.insert(1, 1) + obj.insert(2, 2) + + assert 0 not in obj + assert (1, 1) == obj.popitem() + + def test_global_ttl_on_update(self): + obj = self.create_cache(2, global_ttl=0.5) + + # maxsize=2 - (1, 1) should be evicated because + obj.update((i + 1, i + 1) for i in range(3)) + + with pytest.raises(KeyError): + obj[1] + + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[2] + + with pytest.raises(KeyError): + obj[3] + + def test_get_with_expire(self): + obj = self.create_cache(2, global_ttl=10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_pop_with_expire(self): + obj = self.create_cache(2, global_ttl=10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.pop_with_expire("no-exists", None) + assert value is None + assert 0 == dur + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_popitem_with_expire(self): + obj = self.create_cache(2, global_ttl=10) + + obj.insert(1, 1) + obj.insert(2, 2) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + with pytest.raises(KeyError): + obj.popitem_with_expire() + + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = self.create_cache(10, {1: 2, 3: 4}) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float) + + def test_sweep_interval(self): + obj = cachebox.TTLCache(10, 3, {1: 1, 2: 2, 3: 3}, sweep_interval=3) + + # __len__ doesn't call expire itself + assert len(obj) == 3 + time.sleep(3.5) + assert len(obj) == 0 + + +class TestVTTLCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.VTTLCache: + return cachebox.VTTLCache( + maxsize, + iterable, + 100, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestVTTLCachePolicy(mixins.SweepIntervalMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + sweep_interval: float | timedelta | None = None, + ) -> cachebox.VTTLCache: + return cachebox.VTTLCache( + maxsize, + iterable, + 100, + capacity=capacity, + getsizeof=getsizeof, + sweep_interval=sweep_interval, + ) + + def test_item_accessible_before_ttl(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.5) + assert c["k"] == "v" + + def test_item_expires_after_ttl(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert "k" not in c + + def test_expired_item_not_returned_by_get(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert c.get("k") is None + assert c.get("k", "default") == "default" + + def test_expired_item_raises_on_getitem(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + with pytest.raises(KeyError): + _ = c["k"] + + def test_no_ttl_item_never_expires(self): + c = self.create_cache() + c.insert("k", "v") # no TTL + time.sleep(0.1) + assert c["k"] == "v" + + def test_expired_key_not_in_contains(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert not c.contains("k") + assert "k" not in c + + def test_ttl_as_float(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert "k" not in c + + def test_ttl_as_timedelta(self): + c = self.create_cache() + c.insert("k", "v", ttl=timedelta(milliseconds=100)) + time.sleep(0.15) + assert "k" not in c + + def test_ttl_as_datetime(self): + c = self.create_cache() + expiry = datetime.now() + timedelta(milliseconds=100) + c.insert("k", "v", ttl=expiry) + assert "k" in c + time.sleep(0.15) + assert "k" not in c + + def test_datetime_in_the_past_expires_immediately(self): + c = self.create_cache() + past = datetime.now() - timedelta(seconds=1) + c.insert("k", "v", ttl=past) + assert "k" not in c + + c.insert("k", "v", ttl=-0.1) + assert "k" not in c + + c.insert("k", "v", ttl=-1) + assert "k" not in c + + c.insert("k", "v", ttl=timedelta(days=-1)) + assert "k" not in c + + def test_items_have_independent_ttls(self): + c = self.create_cache() + c.insert("short", "s", ttl=0.1) + c.insert("long", "l", ttl=1.0) + time.sleep(0.15) + assert "short" not in c + assert "long" in c + + def test_mixed_ttl_and_no_ttl(self): + c = self.create_cache() + c.insert("expires", "e", ttl=0.1) + c.insert("permanent", "p") + time.sleep(0.15) + assert "expires" not in c + assert "permanent" in c + + def test_multiple_items_expire_independently(self): + c = self.create_cache() + c.insert("a", 1, ttl=0.2) + c.insert("b", 2, ttl=0.6) + c.insert("c", 3, ttl=1) + time.sleep(0.2) + assert "a" not in c + assert "b" in c + assert "c" in c + time.sleep(0.4) + assert "b" not in c + assert "c" in c + + def test_reinsertion_resets_ttl(self): + c = self.create_cache() + c.insert("k", "v1", ttl=0.2) + time.sleep(0.1) + c.insert("k", "v2", ttl=0.3) + time.sleep(0.15) + # original TTL would have expired; new one should not + assert "k" in c + assert c["k"] == "v2" + + def test_reinsertion_without_ttl_makes_permanent(self): + c = self.create_cache() + c.insert("k", "v1", ttl=0.1) + c.insert("k", "v2") # no TTL — should become permanent + time.sleep(0.15) + assert "k" in c + + def test_setitem_uses_no_ttl(self): + """__setitem__ inserts without TTL; previously TTL'd key should persist.""" + c = self.create_cache() + c.insert("k", "v1", ttl=0.1) + c["k"] = "v2" + time.sleep(0.15) + assert "k" in c + assert c["k"] == "v2" + + def test_update_applies_ttl_to_all_items(self): + c = self.create_cache() + c.update({"a": 1, "b": 2}, ttl=0.1) + time.sleep(0.15) + assert "a" not in c + assert "b" not in c + + def test_update_without_ttl_items_are_permanent(self): + c = self.create_cache() + c.update({"a": 1, "b": 2}) + time.sleep(0.1) + assert "a" in c + assert "b" in c + + def test_update_mixes_with_existing_items(self): + c = self.create_cache() + c.insert("perm", 0) + c.update({"temp": 1}, ttl=0.1) + time.sleep(0.15) + assert "temp" not in c + assert "perm" in c + + def test_setdefault_inserts_with_ttl_when_absent(self): + c = self.create_cache() + c.setdefault("k", "v", ttl=0.1) + assert c["k"] == "v" + time.sleep(0.15) + assert "k" not in c + + def test_setdefault_does_not_update_existing_key(self): + c = self.create_cache() + c.insert("k", "original", ttl=1.0) + c.setdefault("k", "new", ttl=0.1) + time.sleep(0.15) + # should still be there with original TTL + assert c["k"] == "original" + + def test_popitem_removes_soonest_expiring_item(self): + c = self.create_cache() + c.insert("soon", "s", ttl=0.1) + c.insert("later", "l", ttl=10.0) + key, _ = c.popitem() + assert key == "soon" + + def test_popitem_prefers_expiring_over_permanent(self): + c = self.create_cache() + c.insert("perm", "p") + c.insert("temp", "t", ttl=0.5) + key, _ = c.popitem() + assert key == "temp" + + def test_popitem_on_empty_raises(self): + c = self.create_cache() + with pytest.raises(KeyError): + c.popitem() + + def test_expire_removes_stale_items(self): + c = self.create_cache() + c.insert("stale", "s", ttl=0.1) + c.insert("fresh", "f", ttl=10.0) + time.sleep(0.15) + c.expire() + assert "stale" not in c + assert "fresh" in c + + def test_expire_does_not_remove_unexpired_items(self): + c = self.create_cache() + c.insert("a", 1, ttl=10.0) + c.insert("b", 2) + c.expire() + assert "a" in c + assert "b" in c + + def test_expire_reuse_retains_capacity(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + cap_before = c.capacity() + time.sleep(0.15) + c.expire(reuse=True) + assert c.capacity() >= cap_before + + def test_soonest_expiring_evicted_when_full(self): + c = self.create_cache(maxsize=2) + c.insert("a", 1, ttl=0.2) + c.insert("b", 2, ttl=10.0) + # inserting a third item must evict "a" (soonest expiry) + c.insert("c", 3, ttl=10.0) + assert "b" in c + assert "c" in c + assert "a" not in c + + def test_expired_items_cleared_on_insert_when_full(self): + c = self.create_cache(maxsize=2) + c.insert("a", 1, ttl=0.1) + c.insert("b", 2, ttl=0.1) + time.sleep(0.15) + # both expired; inserting should succeed + c.insert("c", 3) + assert "c" in c + + def test_keys_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "e", ttl=0.1) + c.insert("live", "l") + time.sleep(0.15) + assert "exp" not in list(c.keys()) + assert "live" in list(c.keys()) + + def test_values_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "expired_val", ttl=0.1) + c.insert("live", "live_val") + time.sleep(0.15) + assert "expired_val" not in list(c.values()) + assert "live_val" in list(c.values()) + + def test_items_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "e", ttl=0.1) + c.insert("live", "l") + time.sleep(0.15) + keys = [k for k, _ in c.items()] + assert "exp" not in keys + assert "live" in keys + + def test_get_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + obj.insert(1, 1, None) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert dur is None, "dur is None failed [dur: {}]".format(dur) + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert dur is None + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert dur is None + + def test_pop_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + obj.insert(1, 1, None) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert dur is None, "dur is None failed [dur: {}]".format(dur) + + value, dur = obj.pop_with_expire("no-exists", None) + assert value is None + assert dur is None + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert dur is None + + def test_popitem_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + obj.insert(2, 2, 20) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert isinstance(dur, float) and 20 > dur > 19, ( + "20 > dur > 19 failed [dur: %f]" % dur + ) + + with pytest.raises(KeyError): + obj.popitem_with_expire() + + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = self.create_cache(10, {1: 2, 3: 4}) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float) + + def test_sweep_interval(self): + obj = cachebox.VTTLCache(10, {1: 1, 2: 2, 3: 3}, 3, sweep_interval=3) + + # __len__ doesn't call expire itself + assert len(obj) == 3 + time.sleep(3.5) + assert len(obj) == 0 diff --git a/python/tests/test_utils.py b/tests/test_utils.py similarity index 56% rename from python/tests/test_utils.py rename to tests/test_utils.py index 6d6dc7c..5ef6ace 100644 --- a/python/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,22 +1,42 @@ -from cachebox import ( - Frozen, - LRUCache, - BaseCacheImpl, - cached, - make_typed_key, - make_key, - EVENT_HIT, - EVENT_MISS, - is_cached, -) import asyncio -import pytest import time +import typing + +import pytest + +import cachebox + + +@pytest.fixture( + scope="function", + params=[ + cachebox.Cache, + cachebox.FIFOCache, + cachebox.LFUCache, + cachebox.LRUCache, + cachebox.TTLCache, + cachebox.RRCache, + cachebox.VTTLCache, + ], +) +def random_cache_impl(request): + typ: typing.Type[cachebox.BaseCacheImpl] = request.param + + def inner(maxsize, iterable=None): + if typ is cachebox.TTLCache: + return typ(maxsize, global_ttl=10, iterable=iterable) + + if typ is cachebox.VTTLCache: + return typ(maxsize, ttl=10, iterable=iterable) + + return typ(maxsize, iterable=iterable) + return inner -def test_frozen(random_cache_impl: type[BaseCacheImpl]): + +def test_frozen(random_cache_impl: type[cachebox.BaseCacheImpl]): cache = random_cache_impl(10, {i: i for i in range(8)}) - f = Frozen(cache) + f = cachebox.Frozen(cache) assert f.maxsize == cache.maxsize @@ -35,39 +55,39 @@ def test_frozen(random_cache_impl: type[BaseCacheImpl]): assert len(f) == 9 assert len(f) == len(cache) - f = Frozen(cache, ignore=True) + f = cachebox.Frozen(cache, ignore=True) f.popitem() -def test_cached(random_cache_impl: type[BaseCacheImpl]): +def test_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(3) - @cached(obj) - def factorial(n): + @cachebox.cached(obj) + def factorial(n: int): fact = 1 for num in range(2, n + 1): fact *= num - time.sleep(0.1) # need for testing + time.sleep(0.1) return fact perf_1 = time.perf_counter() factorial(15) perf_1 = time.perf_counter() - perf_1 - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 + assert cachebox.get_cached_cache_info(factorial).length == 1 + assert cachebox.get_cached_cache_info(factorial).misses == 1 perf_2 = time.perf_counter() factorial(15) perf_2 = time.perf_counter() - perf_2 assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 + assert cachebox.get_cached_cache_info(factorial).hits == 1 - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 + cachebox.clear_cached_cache(factorial) + assert cachebox.get_cached_cache_info(factorial).hits == 0 + assert cachebox.get_cached_cache_info(factorial).misses == 0 perf_3 = time.perf_counter() factorial(15) @@ -75,39 +95,39 @@ def factorial(n): assert perf_3 > perf_2 # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - factorial(15, cachebox__ignore=True) - assert len(factorial.cache) == 0 + cachebox.clear_cached_cache(factorial) + assert len(cachebox.get_cached_cache(factorial)) == 0 + factorial(15, cachebox__ignore=True) # type: ignore + assert len(cachebox.get_cached_cache(factorial)) == 0 -def test_key_makers(random_cache_impl: type[BaseCacheImpl]): - @cached(random_cache_impl(125), key_maker=make_key) - def func(a, b, c): +def test_key_makers(random_cache_impl: type[cachebox.BaseCacheImpl]): + @cachebox.cached(random_cache_impl(125), key_maker=cachebox.make_key) + def func_1(a, b, c): return a, b, c - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) + func_1(1, 2, 3) + func_1(1.0, 2, 3.0) + func_1(3, 2, 1) - assert len(func.cache) == 2 + assert len(cachebox.get_cached_cache(func_1)) == 2 - @cached(random_cache_impl(125), key_maker=make_typed_key) - def func(a, b, c): + @cachebox.cached(random_cache_impl(125), key_maker=cachebox.make_typed_key) + def func_2(a, b, c): return a, b, c - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) + func_2(1, 2, 3) + func_2(1.0, 2, 3.0) + func_2(3, 2, 1) - assert len(func.cache) == 3 + assert len(cachebox.get_cached_cache(func_2)) == 3 @pytest.mark.asyncio -async def test_async_cached(random_cache_impl: type[BaseCacheImpl]): +async def test_async_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(3) - @cached(obj) + @cachebox.cached(obj) async def factorial(n: int, _: str): fact = 1 for num in range(2, n + 1): @@ -120,19 +140,19 @@ async def factorial(n: int, _: str): await factorial(15, "cachebox") perf_1 = time.perf_counter() - perf_1 - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 + assert cachebox.get_cached_cache_info(factorial).length == 1 + assert cachebox.get_cached_cache_info(factorial).misses == 1 perf_2 = time.perf_counter() await factorial(15, "cachebox") perf_2 = time.perf_counter() - perf_2 assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 + assert cachebox.get_cached_cache_info(factorial).hits == 1 - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 + cachebox.clear_cached_cache(factorial) + assert cachebox.get_cached_cache_info(factorial).hits == 0 + assert cachebox.get_cached_cache_info(factorial).misses == 0 perf_3 = time.perf_counter() await factorial(15, "cachebox") @@ -140,10 +160,10 @@ async def factorial(n: int, _: str): assert perf_3 > perf_2 # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - await factorial(15, "me", cachebox__ignore=True) - assert len(factorial.cache) == 0 + cachebox.clear_cached_cache(factorial) + assert len(cachebox.get_cached_cache(factorial)) == 0 + await factorial(15, "me", cachebox__ignore=True) # type: ignore + assert len(cachebox.get_cached_cache(factorial)) == 0 def test_cachedmethod(): @@ -151,7 +171,7 @@ class TestCachedMethod: def __init__(self, num) -> None: self.num = num - @cached(None) + @cachebox.cached(None) def method(self, char: str): assert type(self) is TestCachedMethod return char * self.num @@ -163,29 +183,14 @@ def method(self, char: str): assert cls.method("a") == ("a" * 2) -@pytest.mark.asyncio -async def test_async_cachedmethod(random_cache_impl: type[BaseCacheImpl]): - class TestCachedMethod: - def __init__(self, num) -> None: - self.num = num - - @cached(random_cache_impl(0)) - async def method(self, char: str): - assert type(self) is TestCachedMethod - return char * self.num - - cls = TestCachedMethod(10) - assert (await cls.method("a")) == ("a" * 10) - - -def test_callback(random_cache_impl: type[BaseCacheImpl]): +def test_callback(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(3) called = list() - @cached( + @cachebox.cached( obj, - key_maker=lambda args, _: args[0], + key_maker=lambda n: n, callback=lambda event, key, value: called.append((event, key, value)), ) def factorial(n: int, /): @@ -197,20 +202,36 @@ def factorial(n: int, /): assert factorial(5) == 120 assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) + assert called[0] == (cachebox.EVENT_MISS, 5, 120) assert factorial(5) == 120 assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) + assert called[1] == (cachebox.EVENT_HIT, 5, 120) assert factorial(3) == 6 assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) + assert called[2] == (cachebox.EVENT_MISS, 3, 6) - assert is_cached(factorial) + assert cachebox.is_cached(factorial) -async def _test_async_callback(random_cache_impl: type[BaseCacheImpl]): +@pytest.mark.asyncio +async def test_async_cachedmethod(random_cache_impl: type[cachebox.BaseCacheImpl]): + class TestCachedMethod: + def __init__(self, num) -> None: + self.num = num + + @cachebox.cached(random_cache_impl(0)) + async def method(self, char: str): + assert type(self) is TestCachedMethod + return char * self.num + + cls = TestCachedMethod(10) + assert (await cls.method("a")) == ("a" * 10) + + +@pytest.mark.asyncio +async def test_async_callback(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(3) called = list() @@ -218,7 +239,7 @@ async def _test_async_callback(random_cache_impl: type[BaseCacheImpl]): async def _callback(event, key, value): called.append((event, key, value)) - @cached(obj, key_maker=lambda args, _: args[0], callback=_callback) + @cachebox.cached(obj, key_maker=lambda n: n, callback=_callback) async def factorial(n: int, /): fact = 1 for num in range(2, n + 1): @@ -228,55 +249,18 @@ async def factorial(n: int, /): assert await factorial(5) == 120 assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) + assert called[0] == (cachebox.EVENT_MISS, 5, 120) assert await factorial(5) == 120 assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) + assert called[1] == (cachebox.EVENT_HIT, 5, 120) assert await factorial(3) == 6 assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) + assert called[2] == (cachebox.EVENT_MISS, 3, 6) - assert is_cached(factorial) - assert not is_cached(_callback) - - -def test_async_callback(random_cache_impl: type[BaseCacheImpl]): - try: - loop = asyncio.get_running_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - - loop.run_until_complete(_test_async_callback(random_cache_impl)) - - -def test_copy_level(random_cache_impl: type[BaseCacheImpl]): - class A: - def __init__(self, c: int) -> None: - self.c = c - - @cached(random_cache_impl(0)) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 2 # !!! - - @cached(random_cache_impl(0), copy_level=2) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 1 # :) + assert cachebox.is_cached(factorial) + assert not cachebox.is_cached(_callback) def test_classmethod(): @@ -285,7 +269,7 @@ def __init__(self, num: int) -> None: self.num = num @classmethod - @cached(None, copy_level=2) + @cachebox.cached(None, postprocess=cachebox.postprocess_copy) def new(cls, num: int): return cls(num) @@ -299,7 +283,7 @@ def __init__(self, num: int) -> None: self.num = num @staticmethod - @cached(None, copy_level=2) + @cachebox.cached(None, postprocess=cachebox.postprocess_copy) def new(num: int): return num @@ -307,13 +291,13 @@ def new(num: int): assert isinstance(a, int) and a == 1 -def test_new_cached_method(random_cache_impl: type[BaseCacheImpl]): +def test_cached_method(random_cache_impl: type[cachebox.BaseCacheImpl]): class Test: def __init__(self, num) -> None: self.num = num self._cache = random_cache_impl(20) - @cached(lambda self: self._cache) + @cachebox.cached(lambda self: self._cache) def method(self, char: str): assert type(self) is Test return char * self.num @@ -323,14 +307,18 @@ def method(self, char: str): assert cls.method("a") == ("a" * i) -def test_nested_cached_shared_cache(random_cache_impl: type[BaseCacheImpl]): +def test_nested_cached_shared_cache(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(10) - @cached(obj, key_maker=make_typed_key) + @cachebox.cached(obj, key_maker=cachebox.make_typed_key) def func_inner(a: int, b: int): return a + b - @cached(obj, key_maker=make_key) + @cachebox.cached( + obj, + # `key_maker`s should be different + key_maker=cachebox.make_key, + ) def func_outer(a: int, b: int): return f"{a} + {b} = {func_inner(a, b)}" @@ -342,13 +330,13 @@ def func_outer(a: int, b: int): assert func_outer(a=2, b=3) == "2 + 3 = 5" -def test_recursive_cached(random_cache_impl: type[BaseCacheImpl]): +def test_recursive_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): obj = random_cache_impl(10) - @cached(obj) + @cachebox.cached(obj) def factorial(n): if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") + raise ValueError if n == 0 or n == 1: return 1 else: @@ -365,12 +353,12 @@ def factorial(n): def test_recursive_threading_cached(): import threading - obj = LRUCache(10) + obj = cachebox.LRUCache(10) - @cached(obj) + @cachebox.cached(obj) def factorial(n): if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") + raise ValueError if n == 0 or n == 1: return 1 else: @@ -379,7 +367,10 @@ def factorial(n): threads = list( map( lambda x: x.start() or x, - (threading.Thread(target=factorial, args=(10,), name=str(i)) for i in range(10)), + ( + threading.Thread(target=factorial, args=(10,), name=str(i)) + for i in range(10) + ), ) ) for t in threads: @@ -388,12 +379,12 @@ def factorial(n): @pytest.mark.asyncio async def test_recursive_asyncio_cached(): - obj = LRUCache(10) + obj = cachebox.LRUCache(10) - @cached(obj) + @cachebox.cached(obj) async def factorial(n) -> int: if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") + raise ValueError if n == 0 or n == 1: return 1 else: